diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index a9f757c3e2413..e2eb0683b6e59 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -4,6 +4,9 @@ on:
   push:
     branches:
     - master
+  pull_request:
+    branches:
+    - master
 
 jobs:
   build:
@@ -12,16 +15,46 @@ jobs:
     strategy:
       matrix:
         java: [ '1.8', '11' ]
-    name: Build Spark with JDK ${{ matrix.java }}
+        hadoop: [ 'hadoop-2.7', 'hadoop-3.2' ]
+        exclude:
+        - java: '11'
+          hadoop: 'hadoop-2.7'
+    name: Build Spark with JDK ${{ matrix.java }} and ${{ matrix.hadoop }}
 
     steps:
     - uses: actions/checkout@master
     - name: Set up JDK ${{ matrix.java }}
       uses: actions/setup-java@v1
       with:
-        version: ${{ matrix.java }}
+        java-version: ${{ matrix.java }}
     - name: Build with Maven
       run: |
-        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+        export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
         export MAVEN_CLI_OPTS="--no-transfer-progress"
-        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-3.2 -Phadoop-cloud -Djava.version=${{ matrix.java }} package
+        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -P${{ matrix.hadoop }} -Phadoop-cloud -Djava.version=${{ matrix.java }} package
+
+
+  lint:
+    runs-on: ubuntu-latest
+    name: Linters
+    steps:
+    - uses: actions/checkout@master
+    - uses: actions/setup-java@v1
+      with:
+        java-version: '11'
+    - uses: actions/setup-python@v1
+      with:
+        python-version: '3.x'
+        architecture: 'x64'
+    - name: Scala
+      run: ./dev/lint-scala
+    - name: Java
+      run: ./dev/lint-java
+    - name: Python
+      run: |
+        pip install flake8 sphinx numpy
+        ./dev/lint-python
+    - name: License
+      run: ./dev/check-license
+    - name: Dependencies
+      run: ./dev/test-dependencies.sh
diff --git a/LICENSE-binary b/LICENSE-binary
index ba20eea118687..d2a189a3fca11 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -218,6 +218,7 @@ javax.jdo:jdo-api
 joda-time:joda-time
 net.sf.opencsv:opencsv
 org.apache.derby:derby
+org.ehcache:ehcache
 org.objenesis:objenesis
 org.roaringbitmap:RoaringBitmap
 org.scalanlp:breeze-macros_2.12
@@ -259,6 +260,7 @@ net.sf.supercsv:super-csv
 org.apache.arrow:arrow-format
 org.apache.arrow:arrow-memory
 org.apache.arrow:arrow-vector
+org.apache.commons:commons-configuration2
 org.apache.commons:commons-crypto
 org.apache.commons:commons-lang3
 org.apache.hadoop:hadoop-annotations
@@ -266,6 +268,7 @@ org.apache.hadoop:hadoop-auth
 org.apache.hadoop:hadoop-client
 org.apache.hadoop:hadoop-common
 org.apache.hadoop:hadoop-hdfs
+org.apache.hadoop:hadoop-hdfs-client
 org.apache.hadoop:hadoop-mapreduce-client-app
 org.apache.hadoop:hadoop-mapreduce-client-common
 org.apache.hadoop:hadoop-mapreduce-client-core
@@ -278,6 +281,21 @@ org.apache.hadoop:hadoop-yarn-server-common
 org.apache.hadoop:hadoop-yarn-server-web-proxy
 org.apache.httpcomponents:httpclient
 org.apache.httpcomponents:httpcore
+org.apache.kerby:kerb-admin
+org.apache.kerby:kerb-client
+org.apache.kerby:kerb-common
+org.apache.kerby:kerb-core
+org.apache.kerby:kerb-crypto
+org.apache.kerby:kerb-identity
+org.apache.kerby:kerb-server
+org.apache.kerby:kerb-simplekdc
+org.apache.kerby:kerb-util
+org.apache.kerby:kerby-asn1
+org.apache.kerby:kerby-config
+org.apache.kerby:kerby-pkix
+org.apache.kerby:kerby-util
+org.apache.kerby:kerby-xdr
+org.apache.kerby:token-provider
 org.apache.orc:orc-core
 org.apache.orc:orc-mapreduce
 org.mortbay.jetty:jetty
@@ -292,14 +310,19 @@ com.fasterxml.jackson.core:jackson-annotations
 com.fasterxml.jackson.core:jackson-core
 com.fasterxml.jackson.core:jackson-databind
 com.fasterxml.jackson.dataformat:jackson-dataformat-yaml
+com.fasterxml.jackson.jaxrs:jackson-jaxrs-base
+com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider
 com.fasterxml.jackson.module:jackson-module-jaxb-annotations
 com.fasterxml.jackson.module:jackson-module-paranamer
 com.fasterxml.jackson.module:jackson-module-scala_2.12
+com.fasterxml.woodstox:woodstox-core
 com.github.mifmif:generex
+com.github.stephenc.jcip:jcip-annotations
 com.google.code.findbugs:jsr305
 com.google.code.gson:gson
 com.google.inject:guice
 com.google.inject.extensions:guice-servlet
+com.nimbusds:nimbus-jose-jwt
 com.twitter:parquet-hadoop-bundle
 commons-cli:commons-cli
 commons-dbcp:commons-dbcp
@@ -313,6 +336,8 @@ javax.inject:javax.inject
 javax.validation:validation-api
 log4j:apache-log4j-extras
 log4j:log4j
+net.minidev:accessors-smart
+net.minidev:json-smart
 net.sf.jpam:jpam
 org.apache.avro:avro
 org.apache.avro:avro-ipc
@@ -328,6 +353,7 @@ org.apache.directory.server:apacheds-i18n
 org.apache.directory.server:apacheds-kerberos-codec
 org.apache.htrace:htrace-core
 org.apache.ivy:ivy
+org.apache.geronimo.specs:geronimo-jcache_1.0_spec
 org.apache.mesos:mesos
 org.apache.parquet:parquet-column
 org.apache.parquet:parquet-common
@@ -369,6 +395,20 @@ org.eclipse.jetty:jetty-webapp
 org.eclipse.jetty:jetty-xml
 org.scala-lang.modules:scala-xml_2.12
 org.opencypher:okapi-shade
+com.github.joshelser:dropwizard-metrics-hadoop-metrics2-reporter
+com.zaxxer.HikariCP
+org.apache.hive:hive-common
+org.apache.hive:hive-llap-common
+org.apache.hive:hive-serde
+org.apache.hive:hive-service-rpc
+org.apache.hive:hive-shims-0.23
+org.apache.hive:hive-shims
+org.apache.hive:hive-shims-scheduler
+org.apache.hive:hive-storage-api
+org.apache.hive:hive-vector-code-gen
+org.datanucleus:javax.jdo
+com.tdunning:json
+org.apache.velocity:velocity
 
 core/src/main/java/org/apache/spark/util/collection/TimSort.java
 core/src/main/resources/org/apache/spark/ui/static/bootstrap*
@@ -387,6 +427,7 @@ BSD 2-Clause
 ------------
 
 com.github.luben:zstd-jni
+dnsjava:dnsjava
 javolution:javolution
 com.esotericsoftware:kryo-shaded
 com.esotericsoftware:minlog
@@ -394,8 +435,11 @@ com.esotericsoftware:reflectasm
 com.google.protobuf:protobuf-java
 org.codehaus.janino:commons-compiler
 org.codehaus.janino:janino
+org.codehaus.woodstox:stax2-api
 jline:jline
 org.jodd:jodd-core
+com.github.wendykierp:JTransforms
+pl.edu.icm:JLargeArrays
 
 
 BSD 3-Clause
@@ -408,6 +452,7 @@ org.antlr:stringtemplate
 org.antlr:antlr4-runtime
 antlr:antlr
 com.github.fommil.netlib:core
+com.google.re2j:re2j
 com.thoughtworks.paranamer:paranamer
 org.scala-lang:scala-compiler
 org.scala-lang:scala-library
@@ -433,8 +478,13 @@ is distributed under the 3-Clause BSD license.
 MIT License
 -----------
 
-org.spire-math:spire-macros_2.12
-org.spire-math:spire_2.12
+com.microsoft.sqlserver:mssql-jdbc
+org.typelevel:spire_2.12
+org.typelevel:spire-macros_2.12
+org.typelevel:spire-platform_2.12
+org.typelevel:spire-util_2.12
+org.typelevel:algebra_2.12:jar
+org.typelevel:cats-kernel_2.12
 org.typelevel:machinist_2.12
 net.razorvine:pyrolite
 org.slf4j:jcl-over-slf4j
@@ -458,6 +508,7 @@ Common Development and Distribution License (CDDL) 1.0
 
 javax.activation:activation  http://www.oracle.com/technetwork/java/javase/tech/index-jsp-138795.html
 javax.xml.stream:stax-api    https://jcp.org/en/jsr/detail?id=173
+javax.transaction:javax.transaction-api
 
 
 Common Development and Distribution License (CDDL) 1.1
@@ -496,11 +547,6 @@ Eclipse Public License (EPL) 2.0
 jakarta.annotation:jakarta-annotation-api https://projects.eclipse.org/projects/ee4j.ca
 jakarta.ws.rs:jakarta.ws.rs-api https://github.com/eclipse-ee4j/jaxrs-api
 
-Mozilla Public License (MPL) 1.1
---------------------------------
-
-com.github.rwl:jtransforms https://sourceforge.net/projects/jtransforms/
-
 
 Python Software Foundation License
 ----------------------------------
diff --git a/NOTICE-binary b/NOTICE-binary
index f93e088a9a731..80ddfd10a1874 100644
--- a/NOTICE-binary
+++ b/NOTICE-binary
@@ -1135,4 +1135,356 @@ Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
-limitations under the License.
\ No newline at end of file
+limitations under the License.
+
+dropwizard-metrics-hadoop-metrics2-reporter
+Copyright 2016 Josh Elser
+
+Hive Common
+Copyright 2019 The Apache Software Foundation
+
+Hive Llap Common
+Copyright 2019 The Apache Software Foundation
+
+Hive Serde
+Copyright 2019 The Apache Software Foundation
+
+Hive Service RPC
+Copyright 2019 The Apache Software Foundation
+
+Hive Shims 0.23
+Copyright 2019 The Apache Software Foundation
+
+Hive Shims Common
+Copyright 2019 The Apache Software Foundation
+
+Hive Shims Scheduler
+Copyright 2019 The Apache Software Foundation
+
+Hive Storage API
+Copyright 2018 The Apache Software Foundation
+
+Hive Vector-Code-Gen Utilities
+Copyright 2019 The Apache Software Foundation
+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2015-2015 DataNucleus
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+Apache Velocity
+
+Copyright (C) 2000-2007 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+Apache Yetus - Audience Annotations
+Copyright 2015-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+Ehcache V3
+Copyright 2014-2016 Terracotta, Inc.
+
+The product includes software from the Apache Commons Lang project,
+under the Apache License 2.0 (see: org.ehcache.impl.internal.classes.commonslang)
+
+Apache Geronimo JCache Spec 1.0
+Copyright 2003-2014 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Admin
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Client
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Common
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb core
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Crypto
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Identity
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Server
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerb Simple Kdc
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby-kerb Util
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby ASN1 Project
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby Config
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby PKIX Project
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby Util
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Kerby XDR Project
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+
+Token provider
+Copyright 2014-2017 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
diff --git a/R/check-cran.sh b/R/check-cran.sh
index 22cc9c6b601fc..22c8f423cfd12 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -65,6 +65,10 @@ fi
 
 echo "Running CRAN check with $CRAN_CHECK_OPTIONS options"
 
+# Remove this environment variable to allow to check suggested packages once
+# Jenkins installs arrow. See SPARK-29339.
+export _R_CHECK_FORCE_SUGGESTS_=FALSE
+
 if [ -n "$NO_TESTS" ] && [ -n "$NO_MANUAL" ]
 then
   "$R_SCRIPT_PATH/R" CMD check $CRAN_CHECK_OPTIONS "SparkR_$VERSION.tar.gz"
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index f4780862099d3..95d3e52bef3a9 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -22,7 +22,8 @@ Suggests:
     rmarkdown,
     testthat,
     e1071,
-    survival
+    survival,
+    arrow
 Collate:
     'schema.R'
     'generics.R'
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index 43ea27b359a9c..f27ef4ee28f16 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -148,19 +148,7 @@ getDefaultSqlSource <- function() {
 }
 
 writeToFileInArrow <- function(fileName, rdf, numPartitions) {
-  requireNamespace1 <- requireNamespace
-
-  # R API in Arrow is not yet released in CRAN. CRAN requires to add the
-  # package in requireNamespace at DESCRIPTION. Later, CRAN checks if the package is available
-  # or not. Therefore, it works around by avoiding direct requireNamespace.
-  # Currently, as of Arrow 0.12.0, it can be installed by install_github. See ARROW-3204.
-  if (requireNamespace1("arrow", quietly = TRUE)) {
-    record_batch <- get("record_batch", envir = asNamespace("arrow"), inherits = FALSE)
-    RecordBatchStreamWriter <- get(
-      "RecordBatchStreamWriter", envir = asNamespace("arrow"), inherits = FALSE)
-    FileOutputStream <- get(
-      "FileOutputStream", envir = asNamespace("arrow"), inherits = FALSE)
-
+  if (requireNamespace("arrow", quietly = TRUE)) {
     numPartitions <- if (!is.null(numPartitions)) {
       numToInt(numPartitions)
     } else {
@@ -176,11 +164,11 @@ writeToFileInArrow <- function(fileName, rdf, numPartitions) {
     stream_writer <- NULL
     tryCatch({
       for (rdf_slice in rdf_slices) {
-        batch <- record_batch(rdf_slice)
+        batch <- arrow::record_batch(rdf_slice)
         if (is.null(stream_writer)) {
-          stream <- FileOutputStream(fileName)
+          stream <- arrow::FileOutputStream(fileName)
           schema <- batch$schema
-          stream_writer <- RecordBatchStreamWriter(stream, schema)
+          stream_writer <- arrow::RecordBatchStreamWriter(stream, schema)
         }
 
         stream_writer$write_batch(batch)
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index 51ae2d2954a9a..93ba1307043a3 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -301,7 +301,7 @@ broadcastRDD <- function(sc, object) {
 #' Set the checkpoint directory
 #'
 #' Set the directory under which RDDs are going to be checkpointed. The
-#' directory must be a HDFS path if running on a cluster.
+#' directory must be an HDFS path if running on a cluster.
 #'
 #' @param sc Spark Context to use
 #' @param dirName Directory path
@@ -446,7 +446,7 @@ setLogLevel <- function(level) {
 #' Set checkpoint directory
 #'
 #' Set the directory under which SparkDataFrame are going to be checkpointed. The directory must be
-#' a HDFS path if running on a cluster.
+#' an HDFS path if running on a cluster.
 #'
 #' @rdname setCheckpointDir
 #' @param directory Directory path to checkpoint to
diff --git a/R/pkg/R/deserialize.R b/R/pkg/R/deserialize.R
index b38d245a0cca7..a6febb1cbd132 100644
--- a/R/pkg/R/deserialize.R
+++ b/R/pkg/R/deserialize.R
@@ -232,11 +232,7 @@ readMultipleObjectsWithKeys <- function(inputCon) {
 }
 
 readDeserializeInArrow <- function(inputCon) {
-  # This is a hack to avoid CRAN check. Arrow is not uploaded into CRAN now. See ARROW-3204.
-  requireNamespace1 <- requireNamespace
-  if (requireNamespace1("arrow", quietly = TRUE)) {
-    RecordBatchStreamReader <- get(
-      "RecordBatchStreamReader", envir = asNamespace("arrow"), inherits = FALSE)
+  if (requireNamespace("arrow", quietly = TRUE)) {
     # Arrow drops `as_tibble` since 0.14.0, see ARROW-5190.
     useAsTibble <- exists("as_tibble", envir = asNamespace("arrow"))
 
@@ -246,7 +242,7 @@ readDeserializeInArrow <- function(inputCon) {
     # for now.
     dataLen <- readInt(inputCon)
     arrowData <- readBin(inputCon, raw(), as.integer(dataLen), endian = "big")
-    batches <- RecordBatchStreamReader(arrowData)$batches()
+    batches <- arrow::RecordBatchStreamReader(arrowData)$batches()
 
     if (useAsTibble) {
       as_tibble <- get("as_tibble", envir = asNamespace("arrow"))
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index eecb84572a30b..eec221c2be4bf 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3617,7 +3617,7 @@ setMethod("size",
 
 #' @details
 #' \code{slice}: Returns an array containing all the elements in x from the index start
-#' (or starting from the end if start is negative) with the specified length.
+#' (array indices start at 1, or from the end if start is negative) with the specified length.
 #'
 #' @rdname column_collection_functions
 #' @param start an index indicating the first element occurring in the result.
diff --git a/R/pkg/R/mllib_recommendation.R b/R/pkg/R/mllib_recommendation.R
index 9a77b07462585..d238ff93ed245 100644
--- a/R/pkg/R/mllib_recommendation.R
+++ b/R/pkg/R/mllib_recommendation.R
@@ -82,6 +82,12 @@ setClass("ALSModel", representation(jobj = "jobj"))
 #' statsS <- summary(modelS)
 #' }
 #' @note spark.als since 2.1.0
+#' @note the input rating dataframe to the ALS implementation should be deterministic.
+#'       Nondeterministic data can cause failure during fitting ALS model. For example,
+#'       an order-sensitive operation like sampling after a repartition makes dataframe output
+#'       nondeterministic, like \code{sample(repartition(df, 2L), FALSE, 0.5, 1618L)}.
+#'       Checkpointing sampled dataframe or adding a sort before sampling can help make the
+#'       dataframe deterministic.
 setMethod("spark.als", signature(data = "SparkDataFrame"),
           function(data, ratingCol = "rating", userCol = "user", itemCol = "item",
                    rank = 10, regParam = 0.1, maxIter = 10, nonnegative = FALSE,
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 0d6f32c8f7e1f..cb3c1c59d12ed 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -222,15 +222,11 @@ writeArgs <- function(con, args) {
 }
 
 writeSerializeInArrow <- function(conn, df) {
-  # This is a hack to avoid CRAN check. Arrow is not uploaded into CRAN now. See ARROW-3204.
-  requireNamespace1 <- requireNamespace
-  if (requireNamespace1("arrow", quietly = TRUE)) {
-    write_arrow <- get("write_arrow", envir = asNamespace("arrow"), inherits = FALSE)
-
+  if (requireNamespace("arrow", quietly = TRUE)) {
     # There looks no way to send each batch in streaming format via socket
     # connection. See ARROW-4512.
     # So, it writes the whole Arrow streaming-formatted binary at once for now.
-    writeRaw(conn, write_arrow(df, raw()))
+    writeRaw(conn, arrow::write_arrow(df, raw()))
   } else {
     stop("'arrow' package should be installed.")
   }
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 31b986c326d0c..cdb59093781fb 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -266,11 +266,12 @@ sparkR.sparkContext <- function(
 #' df <- read.json(path)
 #'
 #' sparkR.session("local[2]", "SparkR", "/home/spark")
-#' sparkR.session("yarn-client", "SparkR", "/home/spark",
-#'                list(spark.executor.memory="4g"),
+#' sparkR.session("yarn", "SparkR", "/home/spark",
+#'                list(spark.executor.memory="4g", spark.submit.deployMode="client"),
 #'                c("one.jar", "two.jar", "three.jar"),
 #'                c("com.databricks:spark-avro_2.12:2.0.1"))
-#' sparkR.session(spark.master = "yarn-client", spark.executor.memory = "4g")
+#' sparkR.session(spark.master = "yarn", spark.submit.deployMode = "client",
+#                 spark.executor.memory = "4g")
 #'}
 #' @note sparkR.session since 2.0.0
 sparkR.session <- function(
diff --git a/R/pkg/inst/worker/worker.R b/R/pkg/inst/worker/worker.R
index 80dc4ee634512..dfe69b7f4f1fb 100644
--- a/R/pkg/inst/worker/worker.R
+++ b/R/pkg/inst/worker/worker.R
@@ -50,7 +50,7 @@ compute <- function(mode, partition, serializer, deserializer, key,
     } else {
       # Check to see if inputData is a valid data.frame
       stopifnot(deserializer == "byte" || deserializer == "arrow")
-      stopifnot(class(inputData) == "data.frame")
+      stopifnot(is.data.frame(inputData))
     }
 
     if (mode == 2) {
diff --git a/R/pkg/tests/fulltests/test_sparkR.R b/R/pkg/tests/fulltests/test_sparkR.R
index f73fc6baeccef..4232f5ec430f6 100644
--- a/R/pkg/tests/fulltests/test_sparkR.R
+++ b/R/pkg/tests/fulltests/test_sparkR.R
@@ -36,8 +36,8 @@ test_that("sparkCheckInstall", {
 
   # "yarn-client, mesos-client" mode, SPARK_HOME was not set
   sparkHome <- ""
-  master <- "yarn-client"
-  deployMode <- ""
+  master <- "yarn"
+  deployMode <- "client"
   expect_error(sparkCheckInstall(sparkHome, master, deployMode))
   sparkHome <- ""
   master <- ""
diff --git a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
index 825c7423e1579..97972753a78fa 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL_arrow.R
@@ -101,7 +101,7 @@ test_that("dapply() Arrow optimization", {
   tryCatch({
     ret <- dapply(df,
     function(rdf) {
-      stopifnot(class(rdf) == "data.frame")
+      stopifnot(is.data.frame(rdf))
       rdf
     },
     schema(df))
@@ -115,7 +115,7 @@ test_that("dapply() Arrow optimization", {
   tryCatch({
     ret <- dapply(df,
                   function(rdf) {
-                    stopifnot(class(rdf) == "data.frame")
+                    stopifnot(is.data.frame(rdf))
                     # mtcars' hp is more then 50.
                     stopifnot(all(rdf$hp > 50))
                     rdf
@@ -199,7 +199,7 @@ test_that("gapply() Arrow optimization", {
                    if (length(key) > 0) {
                      stopifnot(is.numeric(key[[1]]))
                    }
-                   stopifnot(class(grouped) == "data.frame")
+                   stopifnot(is.data.frame(grouped))
                    grouped
                  },
                  schema(df))
@@ -217,7 +217,7 @@ test_that("gapply() Arrow optimization", {
                    if (length(key) > 0) {
                      stopifnot(is.numeric(key[[1]]))
                    }
-                   stopifnot(class(grouped) == "data.frame")
+                   stopifnot(is.data.frame(grouped))
                    stopifnot(length(colnames(grouped)) == 11)
                    # mtcars' hp is more then 50.
                    stopifnot(all(grouped$hp > 50))
diff --git a/appveyor.yml b/appveyor.yml
index a61436c5d2e68..00c688ba18eb6 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -42,13 +42,13 @@ install:
   # Install maven and dependencies
   - ps: .\dev\appveyor-install-dependencies.ps1
   # Required package for R unit tests
-  - cmd: R -e "install.packages(c('knitr', 'rmarkdown', 'e1071', 'survival'), repos='https://cloud.r-project.org/')"
+  - cmd: R -e "install.packages(c('knitr', 'rmarkdown', 'e1071', 'survival', 'arrow'), repos='https://cloud.r-project.org/')"
   # Here, we use the fixed version of testthat. For more details, please see SPARK-22817.
   # As of devtools 2.1.0, it requires testthat higher then 2.1.1 as a dependency. SparkR test requires testthat 1.0.2.
   # Therefore, we don't use devtools but installs it directly from the archive including its dependencies.
   - cmd: R -e "install.packages(c('crayon', 'praise', 'R6'), repos='https://cloud.r-project.org/')"
   - cmd: R -e "install.packages('https://cloud.r-project.org/src/contrib/Archive/testthat/testthat_1.0.2.tar.gz', repos=NULL, type='source')"
-  - cmd: R -e "packageVersion('knitr'); packageVersion('rmarkdown'); packageVersion('testthat'); packageVersion('e1071'); packageVersion('survival')"
+  - cmd: R -e "packageVersion('knitr'); packageVersion('rmarkdown'); packageVersion('testthat'); packageVersion('e1071'); packageVersion('survival'); packageVersion('arrow')"
 
 build_script:
   # '-Djna.nosys=true' is required to avoid kernel32.dll load failure.
diff --git a/build/mvn b/build/mvn
index f68377b3ddc71..3628be9880253 100755
--- a/build/mvn
+++ b/build/mvn
@@ -22,7 +22,7 @@ _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 # Preserve the calling directory
 _CALLING_DIR="$(pwd)"
 # Options used during compilation
-_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
+_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
 
 # Installs any application tarball given a URL, the expected tarball name,
 # and, optionally, a checkable binary path to determine if the binary has
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
index 6af45aec3c7b2..b33c53871c32f 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
@@ -252,7 +252,7 @@ private static <T> Predicate<? super T> getPredicate(
 
         return (value) -> set.contains(indexValueForEntity(getter, value));
       } else {
-        HashSet<Comparable> set = new HashSet<>(values.size());
+        HashSet<Comparable<?>> set = new HashSet<>(values.size());
         for (Object key : values) {
           set.add(asKey(key));
         }
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
index b8c5fab8709ed..d2a26982d8703 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
@@ -124,7 +124,7 @@ interface Accessor {
 
     Object get(Object instance) throws ReflectiveOperationException;
 
-    Class getType();
+    Class<?> getType();
   }
 
   private class FieldAccessor implements Accessor {
@@ -141,7 +141,7 @@ public Object get(Object instance) throws ReflectiveOperationException {
     }
 
     @Override
-    public Class getType() {
+    public Class<?> getType() {
       return field.getType();
     }
   }
@@ -160,7 +160,7 @@ public Object get(Object instance) throws ReflectiveOperationException {
     }
 
     @Override
-    public Class getType() {
+    public Class<?> getType() {
       return method.getReturnType();
     }
   }
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index c107af9ceb415..2ee17800c10e4 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -35,6 +35,12 @@
   </properties>
 
   <dependencies>
+    <!-- SPARK-28932 This is required in JDK11 -->
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+    </dependency>
+
     <!-- Core dependencies -->
     <dependency>
       <groupId>io.netty</groupId>
@@ -87,13 +93,6 @@
     </dependency>
 
     <!-- Test dependencies -->
-    <!-- SPARK-28932 This is required in JDK11 -->
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>${scala.version}</version>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>log4j</groupId>
       <artifactId>log4j</artifactId>
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
index 53835d8304866..c9ef9f918ffd1 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -293,9 +293,8 @@ public void close() {
     }
     connectionPool.clear();
 
-    if (workerGroup != null) {
+    if (workerGroup != null && !workerGroup.isShuttingDown()) {
       workerGroup.shutdownGracefully();
-      workerGroup = null;
     }
   }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
index 736059fdd1f57..490915f6de4b3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/Encoders.java
@@ -112,4 +112,27 @@ public static int[] decode(ByteBuf buf) {
       return ints;
     }
   }
+
+  /** Long integer arrays are encoded with their length followed by long integers. */
+  public static class LongArrays {
+    public static int encodedLength(long[] longs) {
+      return 4 + 8 * longs.length;
+    }
+
+    public static void encode(ByteBuf buf, long[] longs) {
+      buf.writeInt(longs.length);
+      for (long i : longs) {
+        buf.writeLong(i);
+      }
+    }
+
+    public static long[] decode(ByteBuf buf) {
+      int numLongs = buf.readInt();
+      long[] longs = new long[numLongs];
+      for (int i = 0; i < longs.length; i ++) {
+        longs[i] = buf.readLong();
+      }
+      return longs;
+    }
+  }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java b/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
index 2aec4a33bbe43..9b76981c31c57 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TransportClientFactorySuite.java
@@ -217,4 +217,11 @@ public Iterable<Map.Entry<String, String>> getAll() {
       assertFalse(c1.isActive());
     }
   }
+
+  @Test(expected = IOException.class)
+  public void closeFactoryBeforeCreateClient() throws IOException, InterruptedException {
+    TransportClientFactory factory = context.createClientFactory();
+    factory.close();
+    factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
index 037e5cf7e5222..2d7a72315cf23 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
@@ -106,7 +106,7 @@ protected void handleMessage(
             numBlockIds += ids.length;
           }
           streamId = streamManager.registerStream(client.getClientId(),
-            new ManagedBufferIterator(msg, numBlockIds), client.getChannel());
+            new ShuffleManagedBufferIterator(msg), client.getChannel());
         } else {
           // For the compatibility with the old version, still keep the support for OpenBlocks.
           OpenBlocks msg = (OpenBlocks) msgObj;
@@ -299,21 +299,6 @@ private int[] shuffleMapIdAndReduceIds(String[] blockIds, int shuffleId) {
       return mapIdAndReduceIds;
     }
 
-    ManagedBufferIterator(FetchShuffleBlocks msg, int numBlockIds) {
-      final int[] mapIdAndReduceIds = new int[2 * numBlockIds];
-      int idx = 0;
-      for (int i = 0; i < msg.mapIds.length; i++) {
-        for (int reduceId : msg.reduceIds[i]) {
-          mapIdAndReduceIds[idx++] = msg.mapIds[i];
-          mapIdAndReduceIds[idx++] = reduceId;
-        }
-      }
-      assert(idx == 2 * numBlockIds);
-      size = mapIdAndReduceIds.length;
-      blockDataForIndexFn = index -> blockManager.getBlockData(msg.appId, msg.execId,
-        msg.shuffleId, mapIdAndReduceIds[index], mapIdAndReduceIds[index + 1]);
-    }
-
     @Override
     public boolean hasNext() {
       return index < size;
@@ -328,6 +313,49 @@ public ManagedBuffer next() {
     }
   }
 
+  private class ShuffleManagedBufferIterator implements Iterator<ManagedBuffer> {
+
+    private int mapIdx = 0;
+    private int reduceIdx = 0;
+
+    private final String appId;
+    private final String execId;
+    private final int shuffleId;
+    private final long[] mapIds;
+    private final int[][] reduceIds;
+
+    ShuffleManagedBufferIterator(FetchShuffleBlocks msg) {
+      appId = msg.appId;
+      execId = msg.execId;
+      shuffleId = msg.shuffleId;
+      mapIds = msg.mapIds;
+      reduceIds = msg.reduceIds;
+    }
+
+    @Override
+    public boolean hasNext() {
+      // mapIds.length must equal to reduceIds.length, and the passed in FetchShuffleBlocks
+      // must have non-empty mapIds and reduceIds, see the checking logic in
+      // OneForOneBlockFetcher.
+      assert(mapIds.length != 0 && mapIds.length == reduceIds.length);
+      return mapIdx < mapIds.length && reduceIdx < reduceIds[mapIdx].length;
+    }
+
+    @Override
+    public ManagedBuffer next() {
+      final ManagedBuffer block = blockManager.getBlockData(
+        appId, execId, shuffleId, mapIds[mapIdx], reduceIds[mapIdx][reduceIdx]);
+      if (reduceIdx < reduceIds[mapIdx].length - 1) {
+        reduceIdx += 1;
+      } else {
+        reduceIdx = 0;
+        mapIdx += 1;
+      }
+      metrics.blockTransferRateBytes.mark(block != null ? block.size() : 0);
+      return block;
+    }
+  }
+
   @Override
   public void channelActive(TransportClient client) {
     metrics.activeConnections.inc();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
index 50f16fc700f12..8b0d1e145a813 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
@@ -172,7 +172,7 @@ public ManagedBuffer getBlockData(
       String appId,
       String execId,
       int shuffleId,
-      int mapId,
+      long mapId,
       int reduceId) {
     ExecutorShuffleInfo executor = executors.get(new AppExecId(appId, execId));
     if (executor == null) {
@@ -296,7 +296,7 @@ private void deleteNonShuffleServiceServedFiles(String[] dirs) {
    * and the block id format is from ShuffleDataBlockId and ShuffleIndexBlockId.
    */
   private ManagedBuffer getSortBasedShuffleBlockData(
-    ExecutorShuffleInfo executor, int shuffleId, int mapId, int reduceId) {
+    ExecutorShuffleInfo executor, int shuffleId, long mapId, int reduceId) {
     File indexFile = ExecutorDiskUtils.getFile(executor.localDirs, executor.subDirsPerLocalDir,
       "shuffle_" + shuffleId + "_" + mapId + "_0.index");
 
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
index cc11e92067375..52854c86be3e6 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -24,6 +24,8 @@
 import java.util.HashMap;
 
 import com.google.common.primitives.Ints;
+import com.google.common.primitives.Longs;
+import org.apache.commons.lang3.tuple.ImmutableTriple;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -111,21 +113,21 @@ private boolean isShuffleBlocks(String[] blockIds) {
    */
   private FetchShuffleBlocks createFetchShuffleBlocksMsg(
       String appId, String execId, String[] blockIds) {
-    int shuffleId = splitBlockId(blockIds[0])[0];
-    HashMap<Integer, ArrayList<Integer>> mapIdToReduceIds = new HashMap<>();
+    int shuffleId = splitBlockId(blockIds[0]).left;
+    HashMap<Long, ArrayList<Integer>> mapIdToReduceIds = new HashMap<>();
     for (String blockId : blockIds) {
-      int[] blockIdParts = splitBlockId(blockId);
-      if (blockIdParts[0] != shuffleId) {
+      ImmutableTriple<Integer, Long, Integer> blockIdParts = splitBlockId(blockId);
+      if (blockIdParts.left != shuffleId) {
         throw new IllegalArgumentException("Expected shuffleId=" + shuffleId +
           ", got:" + blockId);
       }
-      int mapId = blockIdParts[1];
+      long mapId = blockIdParts.middle;
       if (!mapIdToReduceIds.containsKey(mapId)) {
         mapIdToReduceIds.put(mapId, new ArrayList<>());
       }
-      mapIdToReduceIds.get(mapId).add(blockIdParts[2]);
+      mapIdToReduceIds.get(mapId).add(blockIdParts.right);
     }
-    int[] mapIds = Ints.toArray(mapIdToReduceIds.keySet());
+    long[] mapIds = Longs.toArray(mapIdToReduceIds.keySet());
     int[][] reduceIdArr = new int[mapIds.length][];
     for (int i = 0; i < mapIds.length; i++) {
       reduceIdArr[i] = Ints.toArray(mapIdToReduceIds.get(mapIds[i]));
@@ -134,17 +136,16 @@ private FetchShuffleBlocks createFetchShuffleBlocksMsg(
   }
 
   /** Split the shuffleBlockId and return shuffleId, mapId and reduceId. */
-  private int[] splitBlockId(String blockId) {
+  private ImmutableTriple<Integer, Long, Integer> splitBlockId(String blockId) {
     String[] blockIdParts = blockId.split("_");
     if (blockIdParts.length != 4 || !blockIdParts[0].equals("shuffle")) {
       throw new IllegalArgumentException(
         "Unexpected shuffle block id format: " + blockId);
     }
-    return new int[] {
-      Integer.parseInt(blockIdParts[1]),
-      Integer.parseInt(blockIdParts[2]),
-      Integer.parseInt(blockIdParts[3])
-    };
+    return new ImmutableTriple<>(
+        Integer.parseInt(blockIdParts[1]),
+        Long.parseLong(blockIdParts[2]),
+        Integer.parseInt(blockIdParts[3]));
   }
 
   /** Callback invoked on receipt of each chunk. We equate a single chunk to a single block. */
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java
index 466eeb3e048a8..faa960d414bcc 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java
@@ -34,14 +34,14 @@ public class FetchShuffleBlocks extends BlockTransferMessage {
   public final int shuffleId;
   // The length of mapIds must equal to reduceIds.size(), for the i-th mapId in mapIds,
   // it corresponds to the i-th int[] in reduceIds, which contains all reduce id for this map id.
-  public final int[] mapIds;
+  public final long[] mapIds;
   public final int[][] reduceIds;
 
   public FetchShuffleBlocks(
       String appId,
       String execId,
       int shuffleId,
-      int[] mapIds,
+      long[] mapIds,
       int[][] reduceIds) {
     this.appId = appId;
     this.execId = execId;
@@ -98,7 +98,7 @@ public int encodedLength() {
     return Encoders.Strings.encodedLength(appId)
       + Encoders.Strings.encodedLength(execId)
       + 4 /* encoded length of shuffleId */
-      + Encoders.IntArrays.encodedLength(mapIds)
+      + Encoders.LongArrays.encodedLength(mapIds)
       + 4 /* encoded length of reduceIds.size() */
       + encodedLengthOfReduceIds;
   }
@@ -108,7 +108,7 @@ public void encode(ByteBuf buf) {
     Encoders.Strings.encode(buf, appId);
     Encoders.Strings.encode(buf, execId);
     buf.writeInt(shuffleId);
-    Encoders.IntArrays.encode(buf, mapIds);
+    Encoders.LongArrays.encode(buf, mapIds);
     buf.writeInt(reduceIds.length);
     for (int[] ids: reduceIds) {
       Encoders.IntArrays.encode(buf, ids);
@@ -119,7 +119,7 @@ public static FetchShuffleBlocks decode(ByteBuf buf) {
     String appId = Encoders.Strings.decode(buf);
     String execId = Encoders.Strings.decode(buf);
     int shuffleId = buf.readInt();
-    int[] mapIds = Encoders.IntArrays.decode(buf);
+    long[] mapIds = Encoders.LongArrays.decode(buf);
     int reduceIdsSize = buf.readInt();
     int[][] reduceIds = new int[reduceIdsSize][];
     for (int i = 0; i < reduceIdsSize; i++) {
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
index 649c471dc1679..ba40f4a45ac8f 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/BlockTransferMessagesSuite.java
@@ -29,7 +29,7 @@ public class BlockTransferMessagesSuite {
   public void serializeOpenShuffleBlocks() {
     checkSerializeDeserialize(new OpenBlocks("app-1", "exec-2", new String[] { "b1", "b2" }));
     checkSerializeDeserialize(new FetchShuffleBlocks(
-      "app-1", "exec-2", 0, new int[] {0, 1},
+      "app-1", "exec-2", 0, new long[] {0, 1},
       new int[][] {{ 0, 1 }, { 0, 1, 2 }}));
     checkSerializeDeserialize(new RegisterExecutor("app-1", "exec-2", new ExecutorShuffleInfo(
       new String[] { "/local1", "/local2" }, 32, "MyShuffleManager")));
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
index 9c623a70424b6..6a5d04b6f417b 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
@@ -101,7 +101,7 @@ public void testFetchShuffleBlocks() {
     when(blockResolver.getBlockData("app0", "exec1", 0, 0, 1)).thenReturn(blockMarkers[1]);
 
     FetchShuffleBlocks fetchShuffleBlocks = new FetchShuffleBlocks(
-      "app0", "exec1", 0, new int[] { 0 }, new int[][] {{ 0, 1 }});
+      "app0", "exec1", 0, new long[] { 0 }, new int[][] {{ 0, 1 }});
     checkOpenBlocksReceive(fetchShuffleBlocks, blockMarkers);
 
     verify(blockResolver, times(1)).getBlockData("app0", "exec1", 0, 0, 0);
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
index 66633cc7a3595..26a11672b8068 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -64,7 +64,7 @@ public void testFetchOne() {
     BlockFetchingListener listener = fetchBlocks(
       blocks,
       blockIds,
-      new FetchShuffleBlocks("app-id", "exec-id", 0, new int[] { 0 }, new int[][] {{ 0 }}),
+      new FetchShuffleBlocks("app-id", "exec-id", 0, new long[] { 0 }, new int[][] {{ 0 }}),
       conf);
 
     verify(listener).onBlockFetchSuccess("shuffle_0_0_0", blocks.get("shuffle_0_0_0"));
@@ -100,7 +100,7 @@ public void testFetchThreeShuffleBlocks() {
     BlockFetchingListener listener = fetchBlocks(
       blocks,
       blockIds,
-      new FetchShuffleBlocks("app-id", "exec-id", 0, new int[] { 0 }, new int[][] {{ 0, 1, 2 }}),
+      new FetchShuffleBlocks("app-id", "exec-id", 0, new long[] { 0 }, new int[][] {{ 0, 1, 2 }}),
       conf);
 
     for (int i = 0; i < 3; i ++) {
diff --git a/common/tags/src/test/java/org/apache/spark/tags/ExtendedSQLTest.java b/common/tags/src/test/java/org/apache/spark/tags/ExtendedSQLTest.java
new file mode 100644
index 0000000000000..1c0fff1b4045d
--- /dev/null
+++ b/common/tags/src/test/java/org/apache/spark/tags/ExtendedSQLTest.java
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.tags;
+
+import org.scalatest.TagAnnotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface ExtendedSQLTest { }
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
index fdb81a06d41c9..72aa682bb95bc 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/UTF8StringPropertyCheckSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.unsafe.types
 
 import org.apache.commons.text.similarity.LevenshteinDistance
 import org.scalacheck.{Arbitrary, Gen}
-import org.scalatest.prop.GeneratorDrivenPropertyChecks
+import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks
 // scalastyle:off
 import org.scalatest.{FunSuite, Matchers}
 
@@ -28,7 +28,7 @@ import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8}
 /**
  * This TestSuite utilize ScalaCheck to generate randomized inputs for UTF8String testing.
  */
-class UTF8StringPropertyCheckSuite extends FunSuite with GeneratorDrivenPropertyChecks with Matchers {
+class UTF8StringPropertyCheckSuite extends FunSuite with ScalaCheckDrivenPropertyChecks with Matchers {
 // scalastyle:on
 
   test("toString") {
diff --git a/conf/metrics.properties.template b/conf/metrics.properties.template
index da0b06d295252..f52d33fd64223 100644
--- a/conf/metrics.properties.template
+++ b/conf/metrics.properties.template
@@ -113,6 +113,15 @@
 #     /metrics/applications/json # App information
 #     /metrics/master/json       # Master information
 
+# org.apache.spark.metrics.sink.PrometheusServlet
+#   Name:     Default:   Description:
+#   path      VARIES*    Path prefix from the web server root
+#
+# * Default path is /metrics/prometheus for all instances except the master. The
+#   master has two paths:
+#     /metrics/applications/prometheus # App information
+#     /metrics/master/prometheus       # Master information
+
 # org.apache.spark.metrics.sink.GraphiteSink
 #   Name:     Default:      Description:
 #   host      NONE          Hostname of the Graphite server, must be set
@@ -192,4 +201,10 @@
 
 #driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
 
-#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
\ No newline at end of file
+#executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
+
+# Example configuration for PrometheusServlet
+#*.sink.prometheusServlet.class=org.apache.spark.metrics.sink.PrometheusServlet
+#*.sink.prometheusServlet.path=/metrics/prometheus
+#master.sink.prometheusServlet.path=/metrics/master/prometheus
+#applications.sink.prometheusServlet.path=/metrics/applications/prometheus
diff --git a/core/benchmarks/CoalescedRDDBenchmark-jdk11-results.txt b/core/benchmarks/CoalescedRDDBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..e944111ff9e93
--- /dev/null
+++ b/core/benchmarks/CoalescedRDDBenchmark-jdk11-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Coalesced RDD , large scale
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Coalesced RDD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Coalesce Num Partitions: 100 Num Hosts: 1            344            360          14          0.3        3441.4       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5            283            301          22          0.4        2825.1       1.2X
+Coalesce Num Partitions: 100 Num Hosts: 10            270            271           2          0.4        2700.5       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 20            272            273           1          0.4        2721.1       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 40            271            272           1          0.4        2710.0       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 80            266            267           2          0.4        2656.3       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 1            609            619          15          0.2        6089.0       0.6X
+Coalesce Num Partitions: 500 Num Hosts: 5            338            343           6          0.3        3383.0       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 10            303            306           3          0.3        3029.4       1.1X
+Coalesce Num Partitions: 500 Num Hosts: 20            286            288           2          0.4        2855.9       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 40            279            282           4          0.4        2793.3       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 80            273            275           3          0.4        2725.9       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 1            951            955           4          0.1        9514.1       0.4X
+Coalesce Num Partitions: 1000 Num Hosts: 5            421            429           8          0.2        4211.3       0.8X
+Coalesce Num Partitions: 1000 Num Hosts: 10            347            352           4          0.3        3473.5       1.0X
+Coalesce Num Partitions: 1000 Num Hosts: 20            309            312           5          0.3        3087.5       1.1X
+Coalesce Num Partitions: 1000 Num Hosts: 40            290            294           6          0.3        2896.4       1.2X
+Coalesce Num Partitions: 1000 Num Hosts: 80            281            286           5          0.4        2811.3       1.2X
+Coalesce Num Partitions: 5000 Num Hosts: 1           3928           3950          27          0.0       39278.0       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5           1373           1389          27          0.1       13725.2       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 10            812            827          13          0.1        8123.3       0.4X
+Coalesce Num Partitions: 5000 Num Hosts: 20            530            540           9          0.2        5299.1       0.6X
+Coalesce Num Partitions: 5000 Num Hosts: 40            421            425           5          0.2        4210.5       0.8X
+Coalesce Num Partitions: 5000 Num Hosts: 80            335            344          12          0.3        3353.7       1.0X
+Coalesce Num Partitions: 10000 Num Hosts: 1           7116           7120           4          0.0       71159.0       0.0X
+Coalesce Num Partitions: 10000 Num Hosts: 5           2539           2598          51          0.0       25390.1       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1393           1432          34          0.1       13928.1       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 20            833           1009         303          0.1        8329.2       0.4X
+Coalesce Num Partitions: 10000 Num Hosts: 40            562            563           3          0.2        5615.2       0.6X
+Coalesce Num Partitions: 10000 Num Hosts: 80            420            426           7          0.2        4204.0       0.8X
+
+
diff --git a/core/benchmarks/CoalescedRDDBenchmark-results.txt b/core/benchmarks/CoalescedRDDBenchmark-results.txt
index dd63b0adea4f2..f1b867951a074 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_201-b09 on Windows 10 10.0
-Intel64 Family 6 Model 63 Stepping 2, GenuineIntel
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Coalesced RDD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1            346            364          24          0.3        3458.9       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5            258            264           6          0.4        2579.0       1.3X
-Coalesce Num Partitions: 100 Num Hosts: 10            242            249           7          0.4        2415.2       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 20            237            242           7          0.4        2371.7       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 40            230            231           1          0.4        2299.8       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 80            222            233          14          0.4        2223.0       1.6X
-Coalesce Num Partitions: 500 Num Hosts: 1            659            665           5          0.2        6590.4       0.5X
-Coalesce Num Partitions: 500 Num Hosts: 5            340            381          47          0.3        3395.2       1.0X
-Coalesce Num Partitions: 500 Num Hosts: 10            279            307          47          0.4        2788.3       1.2X
-Coalesce Num Partitions: 500 Num Hosts: 20            259            261           2          0.4        2591.9       1.3X
-Coalesce Num Partitions: 500 Num Hosts: 40            241            250          15          0.4        2406.5       1.4X
-Coalesce Num Partitions: 500 Num Hosts: 80            235            237           3          0.4        2349.9       1.5X
-Coalesce Num Partitions: 1000 Num Hosts: 1           1050           1053           4          0.1       10503.2       0.3X
-Coalesce Num Partitions: 1000 Num Hosts: 5            405            407           2          0.2        4049.5       0.9X
-Coalesce Num Partitions: 1000 Num Hosts: 10            320            322           2          0.3        3202.7       1.1X
-Coalesce Num Partitions: 1000 Num Hosts: 20            276            277           0          0.4        2762.3       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 40            257            260           5          0.4        2571.2       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 80            245            252          13          0.4        2448.9       1.4X
-Coalesce Num Partitions: 5000 Num Hosts: 1           3099           3145          55          0.0       30988.6       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 5           1037           1050          20          0.1       10374.4       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 10            626            633           8          0.2        6261.8       0.6X
-Coalesce Num Partitions: 5000 Num Hosts: 20            426            431           5          0.2        4258.6       0.8X
-Coalesce Num Partitions: 5000 Num Hosts: 40            328            341          22          0.3        3275.4       1.1X
-Coalesce Num Partitions: 5000 Num Hosts: 80            272            275           4          0.4        2721.4       1.3X
-Coalesce Num Partitions: 10000 Num Hosts: 1           5516           5526           9          0.0       55156.8       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 5           1956           1992          48          0.1       19560.9       0.2X
-Coalesce Num Partitions: 10000 Num Hosts: 10           1045           1057          18          0.1       10447.4       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 20            637            658          24          0.2        6373.2       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 40            431            448          15          0.2        4312.9       0.8X
-Coalesce Num Partitions: 10000 Num Hosts: 80            326            328           2          0.3        3263.4       1.1X
+Coalesce Num Partitions: 100 Num Hosts: 1            395            401           9          0.3        3952.3       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5            296            344          42          0.3        2963.2       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 10            294            308          15          0.3        2941.7       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 20            316            328          13          0.3        3155.2       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 40            294            316          36          0.3        2940.3       1.3X
+Coalesce Num Partitions: 100 Num Hosts: 80            292            324          30          0.3        2922.2       1.4X
+Coalesce Num Partitions: 500 Num Hosts: 1            629            687          61          0.2        6292.4       0.6X
+Coalesce Num Partitions: 500 Num Hosts: 5            354            378          42          0.3        3541.7       1.1X
+Coalesce Num Partitions: 500 Num Hosts: 10            318            338          29          0.3        3179.8       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 20            306            317          11          0.3        3059.2       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 40            294            311          28          0.3        2941.6       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 80            288            309          34          0.3        2883.9       1.4X
+Coalesce Num Partitions: 1000 Num Hosts: 1            956            978          20          0.1        9562.2       0.4X
+Coalesce Num Partitions: 1000 Num Hosts: 5            431            452          36          0.2        4306.2       0.9X
+Coalesce Num Partitions: 1000 Num Hosts: 10            358            379          23          0.3        3581.1       1.1X
+Coalesce Num Partitions: 1000 Num Hosts: 20            324            347          20          0.3        3236.7       1.2X
+Coalesce Num Partitions: 1000 Num Hosts: 40            312            333          20          0.3        3116.8       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 80            307            342          32          0.3        3068.4       1.3X
+Coalesce Num Partitions: 5000 Num Hosts: 1           3895           3906          12          0.0       38946.8       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5           1388           1401          19          0.1       13881.7       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 10            806            839          57          0.1        8063.7       0.5X
+Coalesce Num Partitions: 5000 Num Hosts: 20            546            573          44          0.2        5462.6       0.7X
+Coalesce Num Partitions: 5000 Num Hosts: 40            413            418           5          0.2        4134.7       1.0X
+Coalesce Num Partitions: 5000 Num Hosts: 80            345            365          23          0.3        3448.1       1.1X
+Coalesce Num Partitions: 10000 Num Hosts: 1           6933           6966          55          0.0       69328.8       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 5           2455           2499          69          0.0       24551.7       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1352           1392          34          0.1       13520.2       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 20            815            853          50          0.1        8147.5       0.5X
+Coalesce Num Partitions: 10000 Num Hosts: 40            558            581          28          0.2        5578.0       0.7X
+Coalesce Num Partitions: 10000 Num Hosts: 80            416            423           5          0.2        4163.3       0.9X
 
 
diff --git a/core/benchmarks/KryoBenchmark-jdk11-results.txt b/core/benchmarks/KryoBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..27f0b8f59f47a
--- /dev/null
+++ b/core/benchmarks/KryoBenchmark-jdk11-results.txt
@@ -0,0 +1,28 @@
+================================================================================================
+Benchmark Kryo Unsafe vs safe Serialization
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+basicTypes: Int with unsafe:true                    275            288          14          3.6         275.2       1.0X
+basicTypes: Long with unsafe:true                   331            336          13          3.0         330.9       0.8X
+basicTypes: Float with unsafe:true                  304            305           1          3.3         304.4       0.9X
+basicTypes: Double with unsafe:true                 328            332           3          3.0         328.1       0.8X
+Array: Int with unsafe:true                           4              4           0        252.8           4.0      69.6X
+Array: Long with unsafe:true                          6              6           0        161.5           6.2      44.5X
+Array: Float with unsafe:true                         4              4           0        264.6           3.8      72.8X
+Array: Double with unsafe:true                        6              7           0        160.5           6.2      44.2X
+Map of string->Double  with unsafe:true              52             52           0         19.3          51.8       5.3X
+basicTypes: Int with unsafe:false                   344            345           1          2.9         344.3       0.8X
+basicTypes: Long with unsafe:false                  372            373           1          2.7         372.3       0.7X
+basicTypes: Float with unsafe:false                 333            334           1          3.0         333.4       0.8X
+basicTypes: Double with unsafe:false                344            345           0          2.9         344.3       0.8X
+Array: Int with unsafe:false                         25             25           0         40.8          24.5      11.2X
+Array: Long with unsafe:false                        37             37           1         27.3          36.7       7.5X
+Array: Float with unsafe:false                       11             11           0         92.1          10.9      25.4X
+Array: Double with unsafe:false                      17             18           0         58.3          17.2      16.0X
+Map of string->Double  with unsafe:false             51             52           1         19.4          51.5       5.3X
+
+
diff --git a/core/benchmarks/KryoBenchmark-results.txt b/core/benchmarks/KryoBenchmark-results.txt
index 91e22f3afc14f..49791e6e87e3a 100644
--- a/core/benchmarks/KryoBenchmark-results.txt
+++ b/core/benchmarks/KryoBenchmark-results.txt
@@ -2,28 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
-Benchmark Kryo Unsafe vs safe Serialization: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true               138 /  149          7.2         138.0       1.0X
-basicTypes: Long with unsafe:true              168 /  173          6.0         167.7       0.8X
-basicTypes: Float with unsafe:true             153 /  174          6.5         153.1       0.9X
-basicTypes: Double with unsafe:true            161 /  185          6.2         161.1       0.9X
-Array: Int with unsafe:true                      2 /    3        409.7           2.4      56.5X
-Array: Long with unsafe:true                     4 /    5        232.5           4.3      32.1X
-Array: Float with unsafe:true                    3 /    4        367.3           2.7      50.7X
-Array: Double with unsafe:true                   4 /    5        228.5           4.4      31.5X
-Map of string->Double  with unsafe:true         38 /   45         26.5          37.8       3.7X
-basicTypes: Int with unsafe:false              176 /  187          5.7         175.9       0.8X
-basicTypes: Long with unsafe:false             191 /  203          5.2         191.2       0.7X
-basicTypes: Float with unsafe:false            166 /  176          6.0         166.2       0.8X
-basicTypes: Double with unsafe:false           174 /  190          5.7         174.3       0.8X
-Array: Int with unsafe:false                    19 /   26         52.9          18.9       7.3X
-Array: Long with unsafe:false                   27 /   31         37.7          26.5       5.2X
-Array: Float with unsafe:false                   8 /   10        124.3           8.0      17.2X
-Array: Double with unsafe:false                 12 /   13         83.6          12.0      11.5X
-Map of string->Double  with unsafe:false        38 /   42         26.1          38.3       3.6X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+basicTypes: Int with unsafe:true                    269            290          23          3.7         269.0       1.0X
+basicTypes: Long with unsafe:true                   294            295           1          3.4         293.8       0.9X
+basicTypes: Float with unsafe:true                  300            301           1          3.3         300.4       0.9X
+basicTypes: Double with unsafe:true                 304            305           1          3.3         304.0       0.9X
+Array: Int with unsafe:true                           5              6           1        193.5           5.2      52.0X
+Array: Long with unsafe:true                          8              9           1        131.2           7.6      35.3X
+Array: Float with unsafe:true                         6              6           0        163.5           6.1      44.0X
+Array: Double with unsafe:true                        9             10           0        108.8           9.2      29.3X
+Map of string->Double  with unsafe:true              54             54           1         18.7          53.6       5.0X
+basicTypes: Int with unsafe:false                   326            327           1          3.1         326.2       0.8X
+basicTypes: Long with unsafe:false                  353            354           1          2.8         353.3       0.8X
+basicTypes: Float with unsafe:false                 325            327           1          3.1         325.1       0.8X
+basicTypes: Double with unsafe:false                335            336           1          3.0         335.0       0.8X
+Array: Int with unsafe:false                         27             28           1         36.7          27.2       9.9X
+Array: Long with unsafe:false                        40             41           1         25.0          40.0       6.7X
+Array: Float with unsafe:false                       12             13           1         80.8          12.4      21.7X
+Array: Double with unsafe:false                      21             21           1         48.6          20.6      13.1X
+Map of string->Double  with unsafe:false             56             57           1         17.8          56.1       4.8X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-jdk11-results.txt b/core/benchmarks/KryoSerializerBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..6b148bde12d36
--- /dev/null
+++ b/core/benchmarks/KryoSerializerBenchmark-jdk11-results.txt
@@ -0,0 +1,12 @@
+================================================================================================
+Benchmark KryoPool vs old"pool of 1" implementation
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+KryoPool:true                                      6208           8374         NaN          0.0    12416876.6       1.0X
+KryoPool:false                                     9084          11577         724          0.0    18168947.4       0.7X
+
+
diff --git a/core/benchmarks/KryoSerializerBenchmark-results.txt b/core/benchmarks/KryoSerializerBenchmark-results.txt
index c3ce336d93241..609f3298cbc00 100644
--- a/core/benchmarks/KryoSerializerBenchmark-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-results.txt
@@ -1,12 +1,12 @@
 ================================================================================================
-Benchmark KryoPool vs "pool of 1"
+Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.14
-Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
-Benchmark KryoPool vs "pool of 1":       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-KryoPool:true                                 2682 / 3425          0.0     5364627.9       1.0X
-KryoPool:false                                8176 / 9292          0.0    16351252.2       0.3X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+KryoPool:true                                      6012           7586         NaN          0.0    12023020.2       1.0X
+KryoPool:false                                     9289          11566         909          0.0    18578683.1       0.6X
 
 
diff --git a/core/benchmarks/PropertiesCloneBenchmark-jdk11-results.txt b/core/benchmarks/PropertiesCloneBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..605b856d53382
--- /dev/null
+++ b/core/benchmarks/PropertiesCloneBenchmark-jdk11-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Properties Cloning
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              0              0           0          0.1       11539.0       1.0X
+Utils.cloneProperties                                 0              0           0          1.7         572.0      20.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              0              0           0          0.0      217514.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.2        5387.0      40.4X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              1              1           0          0.0      634574.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.3        3082.0     205.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              3              3           0          0.0     2576565.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.1       16071.0     160.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              5              5           0          0.0     5027248.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       31842.0     157.9X
+
+
diff --git a/core/benchmarks/PropertiesCloneBenchmark-results.txt b/core/benchmarks/PropertiesCloneBenchmark-results.txt
new file mode 100644
index 0000000000000..5d332a147c698
--- /dev/null
+++ b/core/benchmarks/PropertiesCloneBenchmark-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Properties Cloning
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              0              0           0          0.1       13640.0       1.0X
+Utils.cloneProperties                                 0              0           0          1.6         608.0      22.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              0              0           0          0.0      238968.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.4        2318.0     103.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              1              1           0          0.0      725849.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.3        2900.0     250.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              3              3           0          0.0     2999676.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.1       11734.0     255.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SerializationUtils.clone                              6              6           1          0.0     5846410.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       22405.0     260.9X
+
+
diff --git a/core/benchmarks/XORShiftRandomBenchmark-jdk11-results.txt b/core/benchmarks/XORShiftRandomBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..9aa10e4835a2f
--- /dev/null
+++ b/core/benchmarks/XORShiftRandomBenchmark-jdk11-results.txt
@@ -0,0 +1,44 @@
+================================================================================================
+Pseudo random
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   1362           1362           0         73.4          13.6       1.0X
+XORShiftRandom                                      227            227           0        440.6           2.3       6.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   2725           2726           1         36.7          27.3       1.0X
+XORShiftRandom                                      694            694           1        144.1           6.9       3.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   2727           2728           0         36.7          27.3       1.0X
+XORShiftRandom                                      693            694           0        144.2           6.9       3.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   7012           7016           4         14.3          70.1       1.0X
+XORShiftRandom                                     6065           6067           1         16.5          60.7       1.2X
+
+
+================================================================================================
+hash seed
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+XORShiftRandom.hashSeed                              36             37           1        276.5           3.6       1.0X
+
+
diff --git a/core/benchmarks/XORShiftRandomBenchmark-results.txt b/core/benchmarks/XORShiftRandomBenchmark-results.txt
index 1140489e4a7f3..4b069878b2e9b 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-nextInt:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-java.util.Random                              1362 / 1362         73.4          13.6       1.0X
-XORShiftRandom                                 227 /  227        440.6           2.3       6.0X
+nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   1362           1396          59         73.4          13.6       1.0X
+XORShiftRandom                                      227            227           0        440.7           2.3       6.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-nextLong:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-java.util.Random                              2732 / 2732         36.6          27.3       1.0X
-XORShiftRandom                                 629 /  629        159.0           6.3       4.3X
+nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   2732           2732           1         36.6          27.3       1.0X
+XORShiftRandom                                      630            630           1        158.7           6.3       4.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-nextDouble:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-java.util.Random                              2730 / 2730         36.6          27.3       1.0X
-XORShiftRandom                                 629 /  629        159.0           6.3       4.3X
+nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   2731           2732           1         36.6          27.3       1.0X
+XORShiftRandom                                      630            630           0        158.8           6.3       4.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-nextGaussian:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-java.util.Random                            10288 / 10288          9.7         102.9       1.0X
-XORShiftRandom                                6351 / 6351         15.7          63.5       1.6X
+nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+java.util.Random                                   8895           8899           4         11.2          88.9       1.0X
+XORShiftRandom                                     5049           5052           5         19.8          50.5       1.8X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash seed:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                       1193 / 1195          8.4         119.3       1.0X
+Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+XORShiftRandom.hashSeed                              67             68           1        148.8           6.7       1.0X
 
 
diff --git a/core/pom.xml b/core/pom.xml
index 42fc2c4b3a287..38eb8adac500e 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -384,6 +384,11 @@
       <artifactId>curator-test</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-minikdc</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>net.razorvine</groupId>
       <artifactId>pyrolite</artifactId>
@@ -551,6 +556,15 @@
         </plugins>
       </build>
     </profile>
+    <profile>
+      <id>scala-2.13</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
   </profiles>
 
 </project>
diff --git a/core/src/main/java/org/apache/spark/ExecutorPlugin.java b/core/src/main/java/org/apache/spark/ExecutorPlugin.java
index f86520c81df33..b25c46266247e 100644
--- a/core/src/main/java/org/apache/spark/ExecutorPlugin.java
+++ b/core/src/main/java/org/apache/spark/ExecutorPlugin.java
@@ -40,12 +40,15 @@ public interface ExecutorPlugin {
    * Initialize the executor plugin.
    *
    * <p>Each executor will, during its initialization, invoke this method on each
-   * plugin provided in the spark.executor.plugins configuration.</p>
+   * plugin provided in the spark.executor.plugins configuration. The Spark executor
+   * will wait on the completion of the execution of the init method.</p>
    *
    * <p>Plugins should create threads in their implementation of this method for
    * any polling, blocking, or intensive computation.</p>
+   *
+   * @param pluginContext Context information for the executor where the plugin is running.
    */
-  default void init() {}
+  default void init(ExecutorPluginContext pluginContext) {}
 
   /**
    * Clean up and terminate this plugin.
diff --git a/core/src/main/java/org/apache/spark/ExecutorPluginContext.java b/core/src/main/java/org/apache/spark/ExecutorPluginContext.java
new file mode 100644
index 0000000000000..8f018732b8217
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/ExecutorPluginContext.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+import com.codahale.metrics.MetricRegistry;
+import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.annotation.Private;
+
+/**
+ * Encapsulates information about the executor when initializing {@link ExecutorPlugin} instances.
+ */
+@DeveloperApi
+public class ExecutorPluginContext {
+
+  public final MetricRegistry metricRegistry;
+  public final SparkConf sparkConf;
+  public final String executorId;
+  public final String executorHostName;
+  public final boolean isLocal;
+
+  @Private
+  public ExecutorPluginContext(
+      MetricRegistry registry,
+      SparkConf conf,
+      String id,
+      String hostName,
+      boolean local) {
+    metricRegistry = registry;
+    sparkConf = conf;
+    executorId = id;
+    executorHostName = hostName;
+    isLocal = local;
+  }
+
+}
diff --git a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
index 92bf0ecc1b5cb..a1e29a8c873da 100644
--- a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
@@ -51,7 +51,6 @@ public NioBufferedFileInputStream(File file) throws IOException {
   /**
    * Checks weather data is left to be read from the input stream.
    * @return true if data is left, false otherwise
-   * @throws IOException
    */
   private boolean refill() throws IOException {
     if (!byteBuffer.hasRemaining()) {
diff --git a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
index 4bfd2d358f36f..9a9d0c7946549 100644
--- a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
+++ b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
@@ -54,7 +54,7 @@ public MemoryMode getMode() {
   /**
    * Returns the size of used memory in bytes.
    */
-  protected long getUsed() {
+  public long getUsed() {
     return used;
   }
 
@@ -78,7 +78,6 @@ public void spill() throws IOException {
    * @param size the amount of memory should be released
    * @param trigger the MemoryConsumer that trigger this spilling
    * @return the amount of released memory in bytes
-   * @throws IOException
    */
   public abstract long spill(long size, MemoryConsumer trigger) throws IOException;
 
diff --git a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleExecutorComponents.java b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleExecutorComponents.java
index 70c112b78911d..d30f3dad3c940 100644
--- a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleExecutorComponents.java
+++ b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleExecutorComponents.java
@@ -18,6 +18,7 @@
 package org.apache.spark.shuffle.api;
 
 import java.io.IOException;
+import java.util.Optional;
 
 import org.apache.spark.annotation.Private;
 
@@ -39,17 +40,31 @@ public interface ShuffleExecutorComponents {
   /**
    * Called once per map task to create a writer that will be responsible for persisting all the
    * partitioned bytes written by that map task.
-   *  @param shuffleId Unique identifier for the shuffle the map task is a part of
-   * @param mapId Within the shuffle, the identifier of the map task
-   * @param mapTaskAttemptId Identifier of the task attempt. Multiple attempts of the same map task
- *                         with the same (shuffleId, mapId) pair can be distinguished by the
- *                         different values of mapTaskAttemptId.
+   *
+   * @param shuffleId Unique identifier for the shuffle the map task is a part of
+   * @param mapId An ID of the map task. The ID is unique within this Spark application.
    * @param numPartitions The number of partitions that will be written by the map task. Some of
-*                      these partitions may be empty.
+   *                      these partitions may be empty.
    */
   ShuffleMapOutputWriter createMapOutputWriter(
       int shuffleId,
-      int mapId,
-      long mapTaskAttemptId,
+      long mapId,
       int numPartitions) throws IOException;
+
+  /**
+   * An optional extension for creating a map output writer that can optimize the transfer of a
+   * single partition file, as the entire result of a map task, to the backing store.
+   * <p>
+   * Most implementations should return the default {@link Optional#empty()} to indicate that
+   * they do not support this optimization. This primarily is for backwards-compatibility in
+   * preserving an optimization in the local disk shuffle storage implementation.
+   *
+   * @param shuffleId Unique identifier for the shuffle the map task is a part of
+   * @param mapId An ID of the map task. The ID is unique within this Spark application.
+   */
+  default Optional<SingleSpillShuffleMapOutputWriter> createSingleFileMapOutputWriter(
+      int shuffleId,
+      long mapId) throws IOException {
+    return Optional.empty();
+  }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java
index 7fac00b7fbc3f..21abe9a57cd25 100644
--- a/core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/api/ShuffleMapOutputWriter.java
@@ -39,7 +39,7 @@ public interface ShuffleMapOutputWriter {
    * for the same partition within any given map task. The partition identifier will be in the
    * range of precisely 0 (inclusive) to numPartitions (exclusive), where numPartitions was
    * provided upon the creation of this map output writer via
-   * {@link ShuffleExecutorComponents#createMapOutputWriter(int, int, long, int)}.
+   * {@link ShuffleExecutorComponents#createMapOutputWriter(int, long, int)}.
    * <p>
    * Calls to this method will be invoked with monotonically increasing reducePartitionIds; each
    * call to this method will be called with a reducePartitionId that is strictly greater than
diff --git a/core/src/main/java/org/apache/spark/shuffle/api/SingleSpillShuffleMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/api/SingleSpillShuffleMapOutputWriter.java
new file mode 100644
index 0000000000000..cad8dcfda52bc
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/api/SingleSpillShuffleMapOutputWriter.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.api;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.spark.annotation.Private;
+
+/**
+ * Optional extension for partition writing that is optimized for transferring a single
+ * file to the backing store.
+ */
+@Private
+public interface SingleSpillShuffleMapOutputWriter {
+
+  /**
+   * Transfer a file that contains the bytes of all the partitions written by this map task.
+   */
+  void transferMapSpillFile(File mapOutputFile, long[] partitionLengths) throws IOException;
+}
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index f75e932860f90..dc157eaa3b253 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -85,8 +85,7 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   private final Partitioner partitioner;
   private final ShuffleWriteMetricsReporter writeMetrics;
   private final int shuffleId;
-  private final int mapId;
-  private final long mapTaskAttemptId;
+  private final long mapId;
   private final Serializer serializer;
   private final ShuffleExecutorComponents shuffleExecutorComponents;
 
@@ -106,8 +105,7 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   BypassMergeSortShuffleWriter(
       BlockManager blockManager,
       BypassMergeSortShuffleHandle<K, V> handle,
-      int mapId,
-      long mapTaskAttemptId,
+      long mapId,
       SparkConf conf,
       ShuffleWriteMetricsReporter writeMetrics,
       ShuffleExecutorComponents shuffleExecutorComponents) {
@@ -117,7 +115,6 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
     this.blockManager = blockManager;
     final ShuffleDependency<K, V, V> dep = handle.dependency();
     this.mapId = mapId;
-    this.mapTaskAttemptId = mapTaskAttemptId;
     this.shuffleId = dep.shuffleId();
     this.partitioner = dep.partitioner();
     this.numPartitions = partitioner.numPartitions();
@@ -130,11 +127,12 @@ final class BypassMergeSortShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   public void write(Iterator<Product2<K, V>> records) throws IOException {
     assert (partitionWriters == null);
     ShuffleMapOutputWriter mapOutputWriter = shuffleExecutorComponents
-        .createMapOutputWriter(shuffleId, mapId, mapTaskAttemptId, numPartitions);
+        .createMapOutputWriter(shuffleId, mapId, numPartitions);
     try {
       if (!records.hasNext()) {
         partitionLengths = mapOutputWriter.commitAllPartitions();
-        mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
+        mapStatus = MapStatus$.MODULE$.apply(
+          blockManager.shuffleServerId(), partitionLengths, mapId);
         return;
       }
       final SerializerInstance serInstance = serializer.newInstance();
@@ -167,7 +165,8 @@ public void write(Iterator<Product2<K, V>> records) throws IOException {
       }
 
       partitionLengths = writePartitionedData(mapOutputWriter);
-      mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
+      mapStatus = MapStatus$.MODULE$.apply(
+        blockManager.shuffleServerId(), partitionLengths, mapId);
     } catch (Exception e) {
       try {
         mapOutputWriter.abort(e);
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index 024756087bf7f..833744f4777ce 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -423,7 +423,6 @@ public void insertRecord(Object recordBase, long recordOffset, int length, int p
    *
    * @return metadata for the spill files written by this sorter. If no records were ever inserted
    *         into this sorter, then this will return an empty array.
-   * @throws IOException
    */
   public SpillInfo[] closeAndGetSpills() throws IOException {
     if (inMemSorter != null) {
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 9d05f03613ce9..d09282e61a9c7 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -17,9 +17,12 @@
 
 package org.apache.spark.shuffle.sort;
 
+import java.nio.channels.Channels;
+import java.util.Optional;
 import javax.annotation.Nullable;
 import java.io.*;
 import java.nio.channels.FileChannel;
+import java.nio.channels.WritableByteChannel;
 import java.util.Iterator;
 
 import scala.Option;
@@ -31,7 +34,6 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.io.ByteStreams;
 import com.google.common.io.Closeables;
-import com.google.common.io.Files;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -41,8 +43,6 @@
 import org.apache.spark.io.CompressionCodec;
 import org.apache.spark.io.CompressionCodec$;
 import org.apache.spark.io.NioBufferedFileInputStream;
-import org.apache.commons.io.output.CloseShieldOutputStream;
-import org.apache.commons.io.output.CountingOutputStream;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.network.util.LimitedInputStream;
 import org.apache.spark.scheduler.MapStatus;
@@ -50,8 +50,12 @@
 import org.apache.spark.shuffle.ShuffleWriteMetricsReporter;
 import org.apache.spark.serializer.SerializationStream;
 import org.apache.spark.serializer.SerializerInstance;
-import org.apache.spark.shuffle.IndexShuffleBlockResolver;
 import org.apache.spark.shuffle.ShuffleWriter;
+import org.apache.spark.shuffle.api.ShuffleExecutorComponents;
+import org.apache.spark.shuffle.api.ShuffleMapOutputWriter;
+import org.apache.spark.shuffle.api.ShufflePartitionWriter;
+import org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter;
+import org.apache.spark.shuffle.api.WritableByteChannelWrapper;
 import org.apache.spark.storage.BlockManager;
 import org.apache.spark.storage.TimeTrackingOutputStream;
 import org.apache.spark.unsafe.Platform;
@@ -65,23 +69,21 @@ public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   private static final ClassTag<Object> OBJECT_CLASS_TAG = ClassTag$.MODULE$.Object();
 
   @VisibleForTesting
-  static final int DEFAULT_INITIAL_SORT_BUFFER_SIZE = 4096;
   static final int DEFAULT_INITIAL_SER_BUFFER_SIZE = 1024 * 1024;
 
   private final BlockManager blockManager;
-  private final IndexShuffleBlockResolver shuffleBlockResolver;
   private final TaskMemoryManager memoryManager;
   private final SerializerInstance serializer;
   private final Partitioner partitioner;
   private final ShuffleWriteMetricsReporter writeMetrics;
+  private final ShuffleExecutorComponents shuffleExecutorComponents;
   private final int shuffleId;
-  private final int mapId;
+  private final long mapId;
   private final TaskContext taskContext;
   private final SparkConf sparkConf;
   private final boolean transferToEnabled;
   private final int initialSortBufferSize;
   private final int inputBufferSizeInBytes;
-  private final int outputBufferSizeInBytes;
 
   @Nullable private MapStatus mapStatus;
   @Nullable private ShuffleExternalSorter sorter;
@@ -103,27 +105,15 @@ private static final class MyByteArrayOutputStream extends ByteArrayOutputStream
    */
   private boolean stopping = false;
 
-  private class CloseAndFlushShieldOutputStream extends CloseShieldOutputStream {
-
-    CloseAndFlushShieldOutputStream(OutputStream outputStream) {
-      super(outputStream);
-    }
-
-    @Override
-    public void flush() {
-      // do nothing
-    }
-  }
-
   public UnsafeShuffleWriter(
       BlockManager blockManager,
-      IndexShuffleBlockResolver shuffleBlockResolver,
       TaskMemoryManager memoryManager,
       SerializedShuffleHandle<K, V> handle,
-      int mapId,
+      long mapId,
       TaskContext taskContext,
       SparkConf sparkConf,
-      ShuffleWriteMetricsReporter writeMetrics) throws IOException {
+      ShuffleWriteMetricsReporter writeMetrics,
+      ShuffleExecutorComponents shuffleExecutorComponents) {
     final int numPartitions = handle.dependency().partitioner().numPartitions();
     if (numPartitions > SortShuffleManager.MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE()) {
       throw new IllegalArgumentException(
@@ -132,7 +122,6 @@ public UnsafeShuffleWriter(
         " reduce partitions");
     }
     this.blockManager = blockManager;
-    this.shuffleBlockResolver = shuffleBlockResolver;
     this.memoryManager = memoryManager;
     this.mapId = mapId;
     final ShuffleDependency<K, V, V> dep = handle.dependency();
@@ -140,6 +129,7 @@ public UnsafeShuffleWriter(
     this.serializer = dep.serializer().newInstance();
     this.partitioner = dep.partitioner();
     this.writeMetrics = writeMetrics;
+    this.shuffleExecutorComponents = shuffleExecutorComponents;
     this.taskContext = taskContext;
     this.sparkConf = sparkConf;
     this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
@@ -147,8 +137,6 @@ public UnsafeShuffleWriter(
       (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE());
     this.inputBufferSizeInBytes =
       (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
-    this.outputBufferSizeInBytes =
-      (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE()) * 1024;
     open();
   }
 
@@ -231,25 +219,17 @@ void closeAndWriteOutput() throws IOException {
     final SpillInfo[] spills = sorter.closeAndGetSpills();
     sorter = null;
     final long[] partitionLengths;
-    final File output = shuffleBlockResolver.getDataFile(shuffleId, mapId);
-    final File tmp = Utils.tempFileWith(output);
     try {
-      try {
-        partitionLengths = mergeSpills(spills, tmp);
-      } finally {
-        for (SpillInfo spill : spills) {
-          if (spill.file.exists() && ! spill.file.delete()) {
-            logger.error("Error while deleting spill file {}", spill.file.getPath());
-          }
-        }
-      }
-      shuffleBlockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, tmp);
+      partitionLengths = mergeSpills(spills);
     } finally {
-      if (tmp.exists() && !tmp.delete()) {
-        logger.error("Error while deleting temp file {}", tmp.getAbsolutePath());
+      for (SpillInfo spill : spills) {
+        if (spill.file.exists() && !spill.file.delete()) {
+          logger.error("Error while deleting spill file {}", spill.file.getPath());
+        }
       }
     }
-    mapStatus = MapStatus$.MODULE$.apply(blockManager.shuffleServerId(), partitionLengths);
+    mapStatus = MapStatus$.MODULE$.apply(
+      blockManager.shuffleServerId(), partitionLengths, mapId);
   }
 
   @VisibleForTesting
@@ -281,137 +261,153 @@ void forceSorterToSpill() throws IOException {
    *
    * @return the partition lengths in the merged file.
    */
-  private long[] mergeSpills(SpillInfo[] spills, File outputFile) throws IOException {
+  private long[] mergeSpills(SpillInfo[] spills) throws IOException {
+    long[] partitionLengths;
+    if (spills.length == 0) {
+      final ShuffleMapOutputWriter mapWriter = shuffleExecutorComponents
+          .createMapOutputWriter(shuffleId, mapId, partitioner.numPartitions());
+      return mapWriter.commitAllPartitions();
+    } else if (spills.length == 1) {
+      Optional<SingleSpillShuffleMapOutputWriter> maybeSingleFileWriter =
+          shuffleExecutorComponents.createSingleFileMapOutputWriter(shuffleId, mapId);
+      if (maybeSingleFileWriter.isPresent()) {
+        // Here, we don't need to perform any metrics updates because the bytes written to this
+        // output file would have already been counted as shuffle bytes written.
+        partitionLengths = spills[0].partitionLengths;
+        maybeSingleFileWriter.get().transferMapSpillFile(spills[0].file, partitionLengths);
+      } else {
+        partitionLengths = mergeSpillsUsingStandardWriter(spills);
+      }
+    } else {
+      partitionLengths = mergeSpillsUsingStandardWriter(spills);
+    }
+    return partitionLengths;
+  }
+
+  private long[] mergeSpillsUsingStandardWriter(SpillInfo[] spills) throws IOException {
+    long[] partitionLengths;
     final boolean compressionEnabled = (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_COMPRESS());
     final CompressionCodec compressionCodec = CompressionCodec$.MODULE$.createCodec(sparkConf);
     final boolean fastMergeEnabled =
-      (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_UNDAFE_FAST_MERGE_ENABLE());
+        (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_UNSAFE_FAST_MERGE_ENABLE());
     final boolean fastMergeIsSupported = !compressionEnabled ||
-      CompressionCodec$.MODULE$.supportsConcatenationOfSerializedStreams(compressionCodec);
+        CompressionCodec$.MODULE$.supportsConcatenationOfSerializedStreams(compressionCodec);
     final boolean encryptionEnabled = blockManager.serializerManager().encryptionEnabled();
+    final ShuffleMapOutputWriter mapWriter = shuffleExecutorComponents
+        .createMapOutputWriter(shuffleId, mapId, partitioner.numPartitions());
     try {
-      if (spills.length == 0) {
-        new FileOutputStream(outputFile).close(); // Create an empty file
-        return new long[partitioner.numPartitions()];
-      } else if (spills.length == 1) {
-        // Here, we don't need to perform any metrics updates because the bytes written to this
-        // output file would have already been counted as shuffle bytes written.
-        Files.move(spills[0].file, outputFile);
-        return spills[0].partitionLengths;
-      } else {
-        final long[] partitionLengths;
-        // There are multiple spills to merge, so none of these spill files' lengths were counted
-        // towards our shuffle write count or shuffle write time. If we use the slow merge path,
-        // then the final output file's size won't necessarily be equal to the sum of the spill
-        // files' sizes. To guard against this case, we look at the output file's actual size when
-        // computing shuffle bytes written.
-        //
-        // We allow the individual merge methods to report their own IO times since different merge
-        // strategies use different IO techniques.  We count IO during merge towards the shuffle
-        // shuffle write time, which appears to be consistent with the "not bypassing merge-sort"
-        // branch in ExternalSorter.
-        if (fastMergeEnabled && fastMergeIsSupported) {
-          // Compression is disabled or we are using an IO compression codec that supports
-          // decompression of concatenated compressed streams, so we can perform a fast spill merge
-          // that doesn't need to interpret the spilled bytes.
-          if (transferToEnabled && !encryptionEnabled) {
-            logger.debug("Using transferTo-based fast merge");
-            partitionLengths = mergeSpillsWithTransferTo(spills, outputFile);
-          } else {
-            logger.debug("Using fileStream-based fast merge");
-            partitionLengths = mergeSpillsWithFileStream(spills, outputFile, null);
-          }
+      // There are multiple spills to merge, so none of these spill files' lengths were counted
+      // towards our shuffle write count or shuffle write time. If we use the slow merge path,
+      // then the final output file's size won't necessarily be equal to the sum of the spill
+      // files' sizes. To guard against this case, we look at the output file's actual size when
+      // computing shuffle bytes written.
+      //
+      // We allow the individual merge methods to report their own IO times since different merge
+      // strategies use different IO techniques.  We count IO during merge towards the shuffle
+      // write time, which appears to be consistent with the "not bypassing merge-sort" branch in
+      // ExternalSorter.
+      if (fastMergeEnabled && fastMergeIsSupported) {
+        // Compression is disabled or we are using an IO compression codec that supports
+        // decompression of concatenated compressed streams, so we can perform a fast spill merge
+        // that doesn't need to interpret the spilled bytes.
+        if (transferToEnabled && !encryptionEnabled) {
+          logger.debug("Using transferTo-based fast merge");
+          mergeSpillsWithTransferTo(spills, mapWriter);
         } else {
-          logger.debug("Using slow merge");
-          partitionLengths = mergeSpillsWithFileStream(spills, outputFile, compressionCodec);
+          logger.debug("Using fileStream-based fast merge");
+          mergeSpillsWithFileStream(spills, mapWriter, null);
         }
-        // When closing an UnsafeShuffleExternalSorter that has already spilled once but also has
-        // in-memory records, we write out the in-memory records to a file but do not count that
-        // final write as bytes spilled (instead, it's accounted as shuffle write). The merge needs
-        // to be counted as shuffle write, but this will lead to double-counting of the final
-        // SpillInfo's bytes.
-        writeMetrics.decBytesWritten(spills[spills.length - 1].file.length());
-        writeMetrics.incBytesWritten(outputFile.length());
-        return partitionLengths;
+      } else {
+        logger.debug("Using slow merge");
+        mergeSpillsWithFileStream(spills, mapWriter, compressionCodec);
       }
-    } catch (IOException e) {
-      if (outputFile.exists() && !outputFile.delete()) {
-        logger.error("Unable to delete output file {}", outputFile.getPath());
+      // When closing an UnsafeShuffleExternalSorter that has already spilled once but also has
+      // in-memory records, we write out the in-memory records to a file but do not count that
+      // final write as bytes spilled (instead, it's accounted as shuffle write). The merge needs
+      // to be counted as shuffle write, but this will lead to double-counting of the final
+      // SpillInfo's bytes.
+      writeMetrics.decBytesWritten(spills[spills.length - 1].file.length());
+      partitionLengths = mapWriter.commitAllPartitions();
+    } catch (Exception e) {
+      try {
+        mapWriter.abort(e);
+      } catch (Exception e2) {
+        logger.warn("Failed to abort writing the map output.", e2);
+        e.addSuppressed(e2);
       }
       throw e;
     }
+    return partitionLengths;
   }
 
   /**
    * Merges spill files using Java FileStreams. This code path is typically slower than
    * the NIO-based merge, {@link UnsafeShuffleWriter#mergeSpillsWithTransferTo(SpillInfo[],
-   * File)}, and it's mostly used in cases where the IO compression codec does not support
-   * concatenation of compressed data, when encryption is enabled, or when users have
-   * explicitly disabled use of {@code transferTo} in order to work around kernel bugs.
+   * ShuffleMapOutputWriter)}, and it's mostly used in cases where the IO compression codec
+   * does not support concatenation of compressed data, when encryption is enabled, or when
+   * users have explicitly disabled use of {@code transferTo} in order to work around kernel bugs.
    * This code path might also be faster in cases where individual partition size in a spill
    * is small and UnsafeShuffleWriter#mergeSpillsWithTransferTo method performs many small
    * disk ios which is inefficient. In those case, Using large buffers for input and output
    * files helps reducing the number of disk ios, making the file merging faster.
    *
    * @param spills the spills to merge.
-   * @param outputFile the file to write the merged data to.
+   * @param mapWriter the map output writer to use for output.
    * @param compressionCodec the IO compression codec, or null if shuffle compression is disabled.
    * @return the partition lengths in the merged file.
    */
-  private long[] mergeSpillsWithFileStream(
+  private void mergeSpillsWithFileStream(
       SpillInfo[] spills,
-      File outputFile,
+      ShuffleMapOutputWriter mapWriter,
       @Nullable CompressionCodec compressionCodec) throws IOException {
-    assert (spills.length >= 2);
     final int numPartitions = partitioner.numPartitions();
-    final long[] partitionLengths = new long[numPartitions];
     final InputStream[] spillInputStreams = new InputStream[spills.length];
 
-    final OutputStream bos = new BufferedOutputStream(
-            new FileOutputStream(outputFile),
-            outputBufferSizeInBytes);
-    // Use a counting output stream to avoid having to close the underlying file and ask
-    // the file system for its size after each partition is written.
-    final CountingOutputStream mergedFileOutputStream = new CountingOutputStream(bos);
-
     boolean threwException = true;
     try {
       for (int i = 0; i < spills.length; i++) {
         spillInputStreams[i] = new NioBufferedFileInputStream(
-            spills[i].file,
-            inputBufferSizeInBytes);
+          spills[i].file,
+          inputBufferSizeInBytes);
       }
       for (int partition = 0; partition < numPartitions; partition++) {
-        final long initialFileLength = mergedFileOutputStream.getByteCount();
-        // Shield the underlying output stream from close() and flush() calls, so that we can close
-        // the higher level streams to make sure all data is really flushed and internal state is
-        // cleaned.
-        OutputStream partitionOutput = new CloseAndFlushShieldOutputStream(
-          new TimeTrackingOutputStream(writeMetrics, mergedFileOutputStream));
-        partitionOutput = blockManager.serializerManager().wrapForEncryption(partitionOutput);
-        if (compressionCodec != null) {
-          partitionOutput = compressionCodec.compressedOutputStream(partitionOutput);
-        }
-        for (int i = 0; i < spills.length; i++) {
-          final long partitionLengthInSpill = spills[i].partitionLengths[partition];
-          if (partitionLengthInSpill > 0) {
-            InputStream partitionInputStream = new LimitedInputStream(spillInputStreams[i],
-              partitionLengthInSpill, false);
-            try {
-              partitionInputStream = blockManager.serializerManager().wrapForEncryption(
-                partitionInputStream);
-              if (compressionCodec != null) {
-                partitionInputStream = compressionCodec.compressedInputStream(partitionInputStream);
+        boolean copyThrewException = true;
+        ShufflePartitionWriter writer = mapWriter.getPartitionWriter(partition);
+        OutputStream partitionOutput = writer.openStream();
+        try {
+          partitionOutput = new TimeTrackingOutputStream(writeMetrics, partitionOutput);
+          partitionOutput = blockManager.serializerManager().wrapForEncryption(partitionOutput);
+          if (compressionCodec != null) {
+            partitionOutput = compressionCodec.compressedOutputStream(partitionOutput);
+          }
+          for (int i = 0; i < spills.length; i++) {
+            final long partitionLengthInSpill = spills[i].partitionLengths[partition];
+
+            if (partitionLengthInSpill > 0) {
+              InputStream partitionInputStream = null;
+              boolean copySpillThrewException = true;
+              try {
+                partitionInputStream = new LimitedInputStream(spillInputStreams[i],
+                    partitionLengthInSpill, false);
+                partitionInputStream = blockManager.serializerManager().wrapForEncryption(
+                    partitionInputStream);
+                if (compressionCodec != null) {
+                  partitionInputStream = compressionCodec.compressedInputStream(
+                      partitionInputStream);
+                }
+                ByteStreams.copy(partitionInputStream, partitionOutput);
+                copySpillThrewException = false;
+              } finally {
+                Closeables.close(partitionInputStream, copySpillThrewException);
               }
-              ByteStreams.copy(partitionInputStream, partitionOutput);
-            } finally {
-              partitionInputStream.close();
             }
           }
+          copyThrewException = false;
+        } finally {
+          Closeables.close(partitionOutput, copyThrewException);
         }
-        partitionOutput.flush();
-        partitionOutput.close();
-        partitionLengths[partition] = (mergedFileOutputStream.getByteCount() - initialFileLength);
+        long numBytesWritten = writer.getNumBytesWritten();
+        writeMetrics.incBytesWritten(numBytesWritten);
       }
       threwException = false;
     } finally {
@@ -420,9 +416,7 @@ private long[] mergeSpillsWithFileStream(
       for (InputStream stream : spillInputStreams) {
         Closeables.close(stream, threwException);
       }
-      Closeables.close(mergedFileOutputStream, threwException);
     }
-    return partitionLengths;
   }
 
   /**
@@ -430,54 +424,46 @@ private long[] mergeSpillsWithFileStream(
    * This is only safe when the IO compression codec and serializer support concatenation of
    * serialized streams.
    *
+   * @param spills the spills to merge.
+   * @param mapWriter the map output writer to use for output.
    * @return the partition lengths in the merged file.
    */
-  private long[] mergeSpillsWithTransferTo(SpillInfo[] spills, File outputFile) throws IOException {
-    assert (spills.length >= 2);
+  private void mergeSpillsWithTransferTo(
+      SpillInfo[] spills,
+      ShuffleMapOutputWriter mapWriter) throws IOException {
     final int numPartitions = partitioner.numPartitions();
-    final long[] partitionLengths = new long[numPartitions];
     final FileChannel[] spillInputChannels = new FileChannel[spills.length];
     final long[] spillInputChannelPositions = new long[spills.length];
-    FileChannel mergedFileOutputChannel = null;
 
     boolean threwException = true;
     try {
       for (int i = 0; i < spills.length; i++) {
         spillInputChannels[i] = new FileInputStream(spills[i].file).getChannel();
       }
-      // This file needs to opened in append mode in order to work around a Linux kernel bug that
-      // affects transferTo; see SPARK-3948 for more details.
-      mergedFileOutputChannel = new FileOutputStream(outputFile, true).getChannel();
-
-      long bytesWrittenToMergedFile = 0;
       for (int partition = 0; partition < numPartitions; partition++) {
-        for (int i = 0; i < spills.length; i++) {
-          final long partitionLengthInSpill = spills[i].partitionLengths[partition];
-          final FileChannel spillInputChannel = spillInputChannels[i];
-          final long writeStartTime = System.nanoTime();
-          Utils.copyFileStreamNIO(
-            spillInputChannel,
-            mergedFileOutputChannel,
-            spillInputChannelPositions[i],
-            partitionLengthInSpill);
-          spillInputChannelPositions[i] += partitionLengthInSpill;
-          writeMetrics.incWriteTime(System.nanoTime() - writeStartTime);
-          bytesWrittenToMergedFile += partitionLengthInSpill;
-          partitionLengths[partition] += partitionLengthInSpill;
+        boolean copyThrewException = true;
+        ShufflePartitionWriter writer = mapWriter.getPartitionWriter(partition);
+        WritableByteChannelWrapper resolvedChannel = writer.openChannelWrapper()
+            .orElseGet(() -> new StreamFallbackChannelWrapper(openStreamUnchecked(writer)));
+        try {
+          for (int i = 0; i < spills.length; i++) {
+            long partitionLengthInSpill = spills[i].partitionLengths[partition];
+            final FileChannel spillInputChannel = spillInputChannels[i];
+            final long writeStartTime = System.nanoTime();
+            Utils.copyFileStreamNIO(
+                spillInputChannel,
+                resolvedChannel.channel(),
+                spillInputChannelPositions[i],
+                partitionLengthInSpill);
+            copyThrewException = false;
+            spillInputChannelPositions[i] += partitionLengthInSpill;
+            writeMetrics.incWriteTime(System.nanoTime() - writeStartTime);
+          }
+        } finally {
+          Closeables.close(resolvedChannel, copyThrewException);
         }
-      }
-      // Check the position after transferTo loop to see if it is in the right position and raise an
-      // exception if it is incorrect. The position will not be increased to the expected length
-      // after calling transferTo in kernel version 2.6.32. This issue is described at
-      // https://bugs.openjdk.java.net/browse/JDK-7052359 and SPARK-3948.
-      if (mergedFileOutputChannel.position() != bytesWrittenToMergedFile) {
-        throw new IOException(
-          "Current position " + mergedFileOutputChannel.position() + " does not equal expected " +
-            "position " + bytesWrittenToMergedFile + " after transferTo. Please check your kernel" +
-            " version to see if it is 2.6.32, as there is a kernel bug which will lead to " +
-            "unexpected behavior when using transferTo. You can set spark.file.transferTo=false " +
-            "to disable this NIO feature."
-        );
+        long numBytes = writer.getNumBytesWritten();
+        writeMetrics.incBytesWritten(numBytes);
       }
       threwException = false;
     } finally {
@@ -487,9 +473,7 @@ private long[] mergeSpillsWithTransferTo(SpillInfo[] spills, File outputFile) th
         assert(spillInputChannelPositions[i] == spills[i].file.length());
         Closeables.close(spillInputChannels[i], threwException);
       }
-      Closeables.close(mergedFileOutputChannel, threwException);
     }
-    return partitionLengths;
   }
 
   @Override
@@ -518,4 +502,30 @@ public Option<MapStatus> stop(boolean success) {
       }
     }
   }
+
+  private static OutputStream openStreamUnchecked(ShufflePartitionWriter writer) {
+    try {
+      return writer.openStream();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private static final class StreamFallbackChannelWrapper implements WritableByteChannelWrapper {
+    private final WritableByteChannel channel;
+
+    StreamFallbackChannelWrapper(OutputStream fallbackStream) {
+      this.channel = Channels.newChannel(fallbackStream);
+    }
+
+    @Override
+    public WritableByteChannel channel() {
+      return channel;
+    }
+
+    @Override
+    public void close() throws IOException {
+      channel.close();
+    }
+  }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
index 02eb710737285..a0c7d3c248d48 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
@@ -17,6 +17,8 @@
 
 package org.apache.spark.shuffle.sort.io;
 
+import java.util.Optional;
+
 import com.google.common.annotations.VisibleForTesting;
 
 import org.apache.spark.SparkConf;
@@ -24,6 +26,7 @@
 import org.apache.spark.shuffle.api.ShuffleExecutorComponents;
 import org.apache.spark.shuffle.api.ShuffleMapOutputWriter;
 import org.apache.spark.shuffle.IndexShuffleBlockResolver;
+import org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter;
 import org.apache.spark.storage.BlockManager;
 
 public class LocalDiskShuffleExecutorComponents implements ShuffleExecutorComponents {
@@ -58,8 +61,7 @@ public void initializeExecutor(String appId, String execId) {
   @Override
   public ShuffleMapOutputWriter createMapOutputWriter(
       int shuffleId,
-      int mapId,
-      long mapTaskAttemptId,
+      long mapId,
       int numPartitions) {
     if (blockResolver == null) {
       throw new IllegalStateException(
@@ -68,4 +70,15 @@ public ShuffleMapOutputWriter createMapOutputWriter(
     return new LocalDiskShuffleMapOutputWriter(
         shuffleId, mapId, numPartitions, blockResolver, sparkConf);
   }
+
+  @Override
+  public Optional<SingleSpillShuffleMapOutputWriter> createSingleFileMapOutputWriter(
+      int shuffleId,
+      long mapId) {
+    if (blockResolver == null) {
+      throw new IllegalStateException(
+          "Executor components must be initialized before getting writers.");
+    }
+    return Optional.of(new LocalDiskSingleSpillMapOutputWriter(shuffleId, mapId, blockResolver));
+  }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java
index 7fc19b1270a46..a6529fd76188a 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java
@@ -24,8 +24,8 @@
 import java.io.OutputStream;
 import java.nio.channels.FileChannel;
 import java.nio.channels.WritableByteChannel;
-
 import java.util.Optional;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -48,12 +48,13 @@ public class LocalDiskShuffleMapOutputWriter implements ShuffleMapOutputWriter {
     LoggerFactory.getLogger(LocalDiskShuffleMapOutputWriter.class);
 
   private final int shuffleId;
-  private final int mapId;
+  private final long mapId;
   private final IndexShuffleBlockResolver blockResolver;
   private final long[] partitionLengths;
   private final int bufferSize;
   private int lastPartitionId = -1;
   private long currChannelPosition;
+  private long bytesWrittenToMergedFile = 0L;
 
   private final File outputFile;
   private File outputTempFile;
@@ -63,7 +64,7 @@ public class LocalDiskShuffleMapOutputWriter implements ShuffleMapOutputWriter {
 
   public LocalDiskShuffleMapOutputWriter(
       int shuffleId,
-      int mapId,
+      long mapId,
       int numPartitions,
       IndexShuffleBlockResolver blockResolver,
       SparkConf sparkConf) {
@@ -97,6 +98,18 @@ public ShufflePartitionWriter getPartitionWriter(int reducePartitionId) throws I
 
   @Override
   public long[] commitAllPartitions() throws IOException {
+    // Check the position after transferTo loop to see if it is in the right position and raise a
+    // exception if it is incorrect. The position will not be increased to the expected length
+    // after calling transferTo in kernel version 2.6.32. This issue is described at
+    // https://bugs.openjdk.java.net/browse/JDK-7052359 and SPARK-3948.
+    if (outputFileChannel != null && outputFileChannel.position() != bytesWrittenToMergedFile) {
+      throw new IOException(
+          "Current position " + outputFileChannel.position() + " does not equal expected " +
+              "position " + bytesWrittenToMergedFile + " after transferTo. Please check your " +
+              " kernel version to see if it is 2.6.32, as there is a kernel bug which will lead " +
+              "to unexpected behavior when using transferTo. You can set " +
+              "spark.file.transferTo=false to disable this NIO feature.");
+    }
     cleanUp();
     File resolvedTmp = outputTempFile != null && outputTempFile.isFile() ? outputTempFile : null;
     blockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, resolvedTmp);
@@ -133,11 +146,10 @@ private void initStream() throws IOException {
   }
 
   private void initChannel() throws IOException {
-    if (outputFileStream == null) {
-      outputFileStream = new FileOutputStream(outputTempFile, true);
-    }
+    // This file needs to opened in append mode in order to work around a Linux kernel bug that
+    // affects transferTo; see SPARK-3948 for more details.
     if (outputFileChannel == null) {
-      outputFileChannel = outputFileStream.getChannel();
+      outputFileChannel = new FileOutputStream(outputTempFile, true).getChannel();
     }
   }
 
@@ -227,6 +239,7 @@ public void write(byte[] buf, int pos, int length) throws IOException {
     public void close() {
       isClosed = true;
       partitionLengths[partitionId] = count;
+      bytesWrittenToMergedFile += count;
     }
 
     private void verifyNotClosed() {
@@ -257,6 +270,7 @@ public WritableByteChannel channel() {
     @Override
     public void close() throws IOException {
       partitionLengths[partitionId] = getCount();
+      bytesWrittenToMergedFile += partitionLengths[partitionId];
     }
   }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskSingleSpillMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskSingleSpillMapOutputWriter.java
new file mode 100644
index 0000000000000..c8b41992a8919
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskSingleSpillMapOutputWriter.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle.sort.io;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+
+import org.apache.spark.shuffle.IndexShuffleBlockResolver;
+import org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter;
+import org.apache.spark.util.Utils;
+
+public class LocalDiskSingleSpillMapOutputWriter
+    implements SingleSpillShuffleMapOutputWriter {
+
+  private final int shuffleId;
+  private final long mapId;
+  private final IndexShuffleBlockResolver blockResolver;
+
+  public LocalDiskSingleSpillMapOutputWriter(
+      int shuffleId,
+      long mapId,
+      IndexShuffleBlockResolver blockResolver) {
+    this.shuffleId = shuffleId;
+    this.mapId = mapId;
+    this.blockResolver = blockResolver;
+  }
+
+  @Override
+  public void transferMapSpillFile(
+      File mapSpillFile,
+      long[] partitionLengths) throws IOException {
+    // The map spill file already has the proper format, and it contains all of the partition data.
+    // So just transfer it directly to the destination without any merging.
+    File outputFile = blockResolver.getDataFile(shuffleId, mapId);
+    File tempFile = Utils.tempFileWith(outputFile);
+    Files.move(mapSpillFile.toPath(), tempFile.toPath());
+    blockResolver.writeIndexFileAndCommit(shuffleId, mapId, partitionLengths, tempFile);
+  }
+}
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index d320ba3139541..b15365fe54ad6 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -886,6 +886,7 @@ public void reset() {
     numKeys = 0;
     numValues = 0;
     freeArray(longArray);
+    longArray = null;
     while (dataPages.size() > 0) {
       MemoryBlock dataPage = dataPages.removeLast();
       freePage(dataPage);
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 1b206c11d9a8e..55e4e609c3c7b 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -447,8 +447,6 @@ public void insertKVRecord(Object keyBase, long keyOffset, int keyLen,
 
   /**
    * Merges another UnsafeExternalSorters into this one, the other one will be emptied.
-   *
-   * @throws IOException
    */
   public void merge(UnsafeExternalSorter other) throws IOException {
     other.spill();
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 3ef1a76fd7202..b28c981da20a5 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -286,7 +286,7 @@ $(document).ready(function () {
         " Show Additional Metrics" +
         "</a></div>" +
         "<div class='container-fluid container-fluid-div' id='toggle-metrics' hidden>" +
-        "<div><input type='checkbox' class='toggle-vis' id='box-0' data-column='0'> Select All</div>" +
+        "<div id='select_all' class='select-all-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-0' data-column='0'> Select All</div>" +
         "<div id='scheduler_delay' class='scheduler-delay-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-11' data-column='11'> Scheduler Delay</div>" +
         "<div id='task_deserialization_time' class='task-deserialization-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-12' data-column='12'> Task Deserialization Time</div>" +
         "<div id='shuffle_read_blocked_time' class='shuffle-read-blocked-time-checkbox-div'><input type='checkbox' class='toggle-vis' id='box-13' data-column='13'> Shuffle Read Blocked Time</div>" +
diff --git a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
index 10bceae2fbdda..3f31403eaeef3 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/timeline-view.css
@@ -207,6 +207,12 @@ rect.getting-result-time-proportion {
   border-color: #3EC0FF;
 }
 
+.vis-timeline .vis-item.executor.added.vis-selected {
+  background-color: #00AAFF;
+  border-color: #184C66;
+  z-index: 2;
+}
+
 .legend-area rect.executor-added-legend {
   fill: #A0DFFF;
   stroke: #3EC0FF;
@@ -217,17 +223,17 @@ rect.getting-result-time-proportion {
   border-color: #FF4D6D;
 }
 
+.vis-timeline .vis-item.executor.removed.vis-selected {
+  background-color: #FF6680;
+  border-color: #661F2C;
+  z-index: 2;
+}
+
 .legend-area rect.executor-removed-legend {
   fill: #FFA1B0;
   stroke: #FF4D6D;
 }
 
-.vis-timeline .vis-item.executor.vis-selected {
-  background-color: #A2FCC0;
-  border-color: #36F572;
-  z-index: 2;
-}
-
 tr.corresponding-item-hover > td, tr.corresponding-item-hover > th {
   background-color: #D6FFE4 !important;
 }
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
old mode 100644
new mode 100755
index 3e28816ba61b6..801c449fd626f
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -245,9 +245,9 @@ a.expandbutton {
   max-width: 600px;
 }
 
-.paginate_button.active > a {
-    color: #999999;
-    text-decoration: underline;
+.paginate_button.active {
+  border: 1px solid #979797 !important;
+  background: white linear-gradient(to bottom, #fff 0%, #dcdcdc 100%);
 }
 
 .title-table {
@@ -263,32 +263,36 @@ a.expandbutton {
   width: 200px;
 }
 
+.select-all-div-checkbox-div {
+  width: 90px;
+}
+
 .scheduler-delay-checkbox-div {
-  width: 120px;
+  width: 130px;
 }
 
 .task-deserialization-time-checkbox-div {
-  width: 175px;
+  width: 190px;
 }
 
 .shuffle-read-blocked-time-checkbox-div {
-  width: 187px;
+  width: 200px;
 }
 
 .shuffle-remote-reads-checkbox-div {
-  width: 157px;
+  width: 170px;
 }
 
 .result-serialization-time-checkbox-div {
-  width: 171px;
+  width: 185px;
 }
 
 .getting-result-time-checkbox-div {
-  width: 141px;
+  width: 155px;
 }
 
 .peak-execution-memory-checkbox-div {
-  width: 170px;
+  width: 180px;
 }
 
 #active-tasks-table th {
diff --git a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
index 9f59295059d30..4e417679ca663 100644
--- a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
@@ -107,9 +107,9 @@ private[spark] class BarrierCoordinator(
     private var timerTask: TimerTask = null
 
     // Init a TimerTask for a barrier() call.
-    private def initTimerTask(): Unit = {
+    private def initTimerTask(state: ContextBarrierState): Unit = {
       timerTask = new TimerTask {
-        override def run(): Unit = synchronized {
+        override def run(): Unit = state.synchronized {
           // Timeout current barrier() call, fail all the sync requests.
           requesters.foreach(_.sendFailure(new SparkException("The coordinator didn't get all " +
             s"barrier sync requests for barrier epoch $barrierEpoch from $barrierId within " +
@@ -148,7 +148,7 @@ private[spark] class BarrierCoordinator(
         // If this is the first sync message received for a barrier() call, start timer to ensure
         // we may timeout for the sync.
         if (requesters.isEmpty) {
-          initTimerTask()
+          initTimerTask(this)
           timer.schedule(timerTask, timeoutInSecs * 1000)
         }
         // Add the requester to array of RPCCallContexts pending for reply.
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index 24c83993b1b60..dfbd7d1c6f058 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -71,7 +71,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging {
 
   private val listeners = new ConcurrentLinkedQueue[CleanerListener]()
 
-  private val cleaningThread = new Thread() { override def run() { keepCleaning() }}
+  private val cleaningThread = new Thread() { override def run(): Unit = keepCleaning() }
 
   private val periodicGCService: ScheduledExecutorService =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("context-cleaner-periodic-gc")
diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index fb051a8c0db8e..f0ac9acd90156 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -93,7 +93,7 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
   val shuffleId: Int = _rdd.context.newShuffleId()
 
   val shuffleHandle: ShuffleHandle = _rdd.context.env.shuffleManager.registerShuffle(
-    shuffleId, _rdd.partitions.length, this)
+    shuffleId, this)
 
   _rdd.sparkContext.cleaner.foreach(_.registerShuffleForCleanup(this))
 }
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
index 8230533f9d245..4bdcafce0d75a 100644
--- a/core/src/main/scala/org/apache/spark/FutureAction.scala
+++ b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -115,7 +115,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
 
   @volatile private var _cancelled: Boolean = false
 
-  override def cancel() {
+  override def cancel(): Unit = {
     _cancelled = true
     jobWaiter.cancel()
   }
@@ -132,7 +132,7 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
     value.get.get
   }
 
-  override def onComplete[U](func: (Try[T]) => U)(implicit executor: ExecutionContext) {
+  override def onComplete[U](func: (Try[T]) => U)(implicit executor: ExecutionContext): Unit = {
     jobWaiter.completionFuture onComplete {_ => func(value.get)}
   }
 
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index d878fc527791a..b610e5d4d9304 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -19,10 +19,11 @@ package org.apache.spark
 
 import java.io._
 import java.util.concurrent.{ConcurrentHashMap, LinkedBlockingQueue, ThreadPoolExecutor, TimeUnit}
+import java.util.concurrent.locks.ReentrantReadWriteLock
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.{HashMap, HashSet, ListBuffer, Map}
+import scala.collection.mutable.{HashMap, ListBuffer, Map}
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 import scala.reflect.ClassTag
@@ -41,14 +42,36 @@ import org.apache.spark.util._
  * Helper class used by the [[MapOutputTrackerMaster]] to perform bookkeeping for a single
  * ShuffleMapStage.
  *
- * This class maintains a mapping from mapIds to `MapStatus`. It also maintains a cache of
+ * This class maintains a mapping from map index to `MapStatus`. It also maintains a cache of
  * serialized map statuses in order to speed up tasks' requests for map output statuses.
  *
  * All public methods of this class are thread-safe.
  */
 private class ShuffleStatus(numPartitions: Int) {
 
-  // All accesses to the following state must be guarded with `this.synchronized`.
+  private val (readLock, writeLock) = {
+    val lock = new ReentrantReadWriteLock()
+    (lock.readLock(), lock.writeLock())
+  }
+
+  // All accesses to the following state must be guarded with `withReadLock` or `withWriteLock`.
+  private def withReadLock[B](fn: => B): B = {
+    readLock.lock()
+    try {
+      fn
+    } finally {
+      readLock.unlock()
+    }
+  }
+
+  private def withWriteLock[B](fn: => B): B = {
+    writeLock.lock()
+    try {
+      fn
+    } finally {
+      writeLock.unlock()
+    }
+  }
 
   /**
    * MapStatus for each partition. The index of the array is the map partition id.
@@ -88,12 +111,12 @@ private class ShuffleStatus(numPartitions: Int) {
    * Register a map output. If there is already a registered location for the map output then it
    * will be replaced by the new location.
    */
-  def addMapOutput(mapId: Int, status: MapStatus): Unit = synchronized {
-    if (mapStatuses(mapId) == null) {
+  def addMapOutput(mapIndex: Int, status: MapStatus): Unit = withWriteLock {
+    if (mapStatuses(mapIndex) == null) {
       _numAvailableOutputs += 1
       invalidateSerializedMapOutputStatusCache()
     }
-    mapStatuses(mapId) = status
+    mapStatuses(mapIndex) = status
   }
 
   /**
@@ -101,10 +124,10 @@ private class ShuffleStatus(numPartitions: Int) {
    * This is a no-op if there is no registered map output or if the registered output is from a
    * different block manager.
    */
-  def removeMapOutput(mapId: Int, bmAddress: BlockManagerId): Unit = synchronized {
-    if (mapStatuses(mapId) != null && mapStatuses(mapId).location == bmAddress) {
+  def removeMapOutput(mapIndex: Int, bmAddress: BlockManagerId): Unit = withWriteLock {
+    if (mapStatuses(mapIndex) != null && mapStatuses(mapIndex).location == bmAddress) {
       _numAvailableOutputs -= 1
-      mapStatuses(mapId) = null
+      mapStatuses(mapIndex) = null
       invalidateSerializedMapOutputStatusCache()
     }
   }
@@ -113,7 +136,7 @@ private class ShuffleStatus(numPartitions: Int) {
    * Removes all shuffle outputs associated with this host. Note that this will also remove
    * outputs which are served by an external shuffle server (if one exists).
    */
-  def removeOutputsOnHost(host: String): Unit = {
+  def removeOutputsOnHost(host: String): Unit = withWriteLock {
     removeOutputsByFilter(x => x.host == host)
   }
 
@@ -122,7 +145,7 @@ private class ShuffleStatus(numPartitions: Int) {
    * remove outputs which are served by an external shuffle server (if one exists), as they are
    * still registered with that execId.
    */
-  def removeOutputsOnExecutor(execId: String): Unit = synchronized {
+  def removeOutputsOnExecutor(execId: String): Unit = withWriteLock {
     removeOutputsByFilter(x => x.executorId == execId)
   }
 
@@ -130,11 +153,11 @@ private class ShuffleStatus(numPartitions: Int) {
    * Removes all shuffle outputs which satisfies the filter. Note that this will also
    * remove outputs which are served by an external shuffle server (if one exists).
    */
-  def removeOutputsByFilter(f: (BlockManagerId) => Boolean): Unit = synchronized {
-    for (mapId <- 0 until mapStatuses.length) {
-      if (mapStatuses(mapId) != null && f(mapStatuses(mapId).location)) {
+  def removeOutputsByFilter(f: BlockManagerId => Boolean): Unit = withWriteLock {
+    for (mapIndex <- mapStatuses.indices) {
+      if (mapStatuses(mapIndex) != null && f(mapStatuses(mapIndex).location)) {
         _numAvailableOutputs -= 1
-        mapStatuses(mapId) = null
+        mapStatuses(mapIndex) = null
         invalidateSerializedMapOutputStatusCache()
       }
     }
@@ -143,14 +166,14 @@ private class ShuffleStatus(numPartitions: Int) {
   /**
    * Number of partitions that have shuffle outputs.
    */
-  def numAvailableOutputs: Int = synchronized {
+  def numAvailableOutputs: Int = withReadLock {
     _numAvailableOutputs
   }
 
   /**
    * Returns the sequence of partition ids that are missing (i.e. needs to be computed).
    */
-  def findMissingPartitions(): Seq[Int] = synchronized {
+  def findMissingPartitions(): Seq[Int] = withReadLock {
     val missing = (0 until numPartitions).filter(id => mapStatuses(id) == null)
     assert(missing.size == numPartitions - _numAvailableOutputs,
       s"${missing.size} missing, expected ${numPartitions - _numAvailableOutputs}")
@@ -169,18 +192,31 @@ private class ShuffleStatus(numPartitions: Int) {
   def serializedMapStatus(
       broadcastManager: BroadcastManager,
       isLocal: Boolean,
-      minBroadcastSize: Int): Array[Byte] = synchronized {
-    if (cachedSerializedMapStatus eq null) {
-      val serResult = MapOutputTracker.serializeMapStatuses(
+      minBroadcastSize: Int): Array[Byte] = {
+    var result: Array[Byte] = null
+
+    withReadLock {
+      if (cachedSerializedMapStatus != null) {
+        result = cachedSerializedMapStatus
+      }
+    }
+
+    if (result == null) withWriteLock {
+      if (cachedSerializedMapStatus == null) {
+        val serResult = MapOutputTracker.serializeMapStatuses(
           mapStatuses, broadcastManager, isLocal, minBroadcastSize)
-      cachedSerializedMapStatus = serResult._1
-      cachedSerializedBroadcast = serResult._2
+        cachedSerializedMapStatus = serResult._1
+        cachedSerializedBroadcast = serResult._2
+      }
+      // The following line has to be outside if statement since it's possible that another thread
+      // initializes cachedSerializedMapStatus in-between `withReadLock` and `withWriteLock`.
+      result = cachedSerializedMapStatus
     }
-    cachedSerializedMapStatus
+    result
   }
 
   // Used in testing.
-  def hasCachedSerializedBroadcast: Boolean = synchronized {
+  def hasCachedSerializedBroadcast: Boolean = withReadLock {
     cachedSerializedBroadcast != null
   }
 
@@ -188,14 +224,14 @@ private class ShuffleStatus(numPartitions: Int) {
    * Helper function which provides thread-safe access to the mapStatuses array.
    * The function should NOT mutate the array.
    */
-  def withMapStatuses[T](f: Array[MapStatus] => T): T = synchronized {
+  def withMapStatuses[T](f: Array[MapStatus] => T): T = withReadLock {
     f(mapStatuses)
   }
 
   /**
    * Clears the cached serialized map output statuses.
    */
-  def invalidateSerializedMapOutputStatusCache(): Unit = synchronized {
+  def invalidateSerializedMapOutputStatusCache(): Unit = withWriteLock {
     if (cachedSerializedBroadcast != null) {
       // Prevent errors during broadcast cleanup from crashing the DAGScheduler (see SPARK-21444)
       Utils.tryLogNonFatalError {
@@ -272,7 +308,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   }
 
   /** Send a one-way message to the trackerEndpoint, to which we expect it to reply with true. */
-  protected def sendTracker(message: Any) {
+  protected def sendTracker(message: Any): Unit = {
     val response = askTracker[Boolean](message)
     if (response != true) {
       throw new SparkException(
@@ -282,8 +318,8 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
 
   // For testing
   def getMapSizesByExecutorId(shuffleId: Int, reduceId: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
-    getMapSizesByExecutorId(shuffleId, reduceId, reduceId + 1)
+      : Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+    getMapSizesByExecutorId(shuffleId, reduceId, reduceId + 1, useOldFetchProtocol = false)
   }
 
   /**
@@ -292,18 +328,22 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
    * endPartition is excluded from the range).
    *
    * @return A sequence of 2-item tuples, where the first item in the tuple is a BlockManagerId,
-   *         and the second item is a sequence of (shuffle block id, shuffle block size) tuples
-   *         describing the shuffle blocks that are stored at that block manager.
+   *         and the second item is a sequence of (shuffle block id, shuffle block size, map index)
+   *         tuples describing the shuffle blocks that are stored at that block manager.
    */
-  def getMapSizesByExecutorId(shuffleId: Int, startPartition: Int, endPartition: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])]
+  def getMapSizesByExecutorId(
+      shuffleId: Int,
+      startPartition: Int,
+      endPartition: Int,
+      useOldFetchProtocol: Boolean)
+  : Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]
 
   /**
    * Deletes map output status information for the specified shuffle stage.
    */
   def unregisterShuffle(shuffleId: Int): Unit
 
-  def stop() {}
+  def stop(): Unit = {}
 }
 
 /**
@@ -412,21 +452,21 @@ private[spark] class MapOutputTrackerMaster(
     shuffleStatuses.valuesIterator.count(_.hasCachedSerializedBroadcast)
   }
 
-  def registerShuffle(shuffleId: Int, numMaps: Int) {
+  def registerShuffle(shuffleId: Int, numMaps: Int): Unit = {
     if (shuffleStatuses.put(shuffleId, new ShuffleStatus(numMaps)).isDefined) {
       throw new IllegalArgumentException("Shuffle ID " + shuffleId + " registered twice")
     }
   }
 
-  def registerMapOutput(shuffleId: Int, mapId: Int, status: MapStatus) {
-    shuffleStatuses(shuffleId).addMapOutput(mapId, status)
+  def registerMapOutput(shuffleId: Int, mapIndex: Int, status: MapStatus): Unit = {
+    shuffleStatuses(shuffleId).addMapOutput(mapIndex, status)
   }
 
   /** Unregister map output information of the given shuffle, mapper and block manager */
-  def unregisterMapOutput(shuffleId: Int, mapId: Int, bmAddress: BlockManagerId) {
+  def unregisterMapOutput(shuffleId: Int, mapIndex: Int, bmAddress: BlockManagerId): Unit = {
     shuffleStatuses.get(shuffleId) match {
       case Some(shuffleStatus) =>
-        shuffleStatus.removeMapOutput(mapId, bmAddress)
+        shuffleStatus.removeMapOutput(mapIndex, bmAddress)
         incrementEpoch()
       case None =>
         throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")
@@ -434,7 +474,7 @@ private[spark] class MapOutputTrackerMaster(
   }
 
   /** Unregister all map output information of the given shuffle. */
-  def unregisterAllMapOutput(shuffleId: Int) {
+  def unregisterAllMapOutput(shuffleId: Int): Unit = {
     shuffleStatuses.get(shuffleId) match {
       case Some(shuffleStatus) =>
         shuffleStatus.removeOutputsByFilter(x => true)
@@ -446,7 +486,7 @@ private[spark] class MapOutputTrackerMaster(
   }
 
   /** Unregister shuffle data */
-  def unregisterShuffle(shuffleId: Int) {
+  def unregisterShuffle(shuffleId: Int): Unit = {
     shuffleStatuses.remove(shuffleId).foreach { shuffleStatus =>
       shuffleStatus.invalidateSerializedMapOutputStatusCache()
     }
@@ -629,7 +669,7 @@ private[spark] class MapOutputTrackerMaster(
     None
   }
 
-  def incrementEpoch() {
+  def incrementEpoch(): Unit = {
     epochLock.synchronized {
       epoch += 1
       logDebug("Increasing epoch to " + epoch)
@@ -645,20 +685,25 @@ private[spark] class MapOutputTrackerMaster(
 
   // Get blocks sizes by executor Id. Note that zero-sized blocks are excluded in the result.
   // This method is only called in local-mode.
-  def getMapSizesByExecutorId(shuffleId: Int, startPartition: Int, endPartition: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+  def getMapSizesByExecutorId(
+      shuffleId: Int,
+      startPartition: Int,
+      endPartition: Int,
+      useOldFetchProtocol: Boolean)
+  : Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
     logDebug(s"Fetching outputs for shuffle $shuffleId, partitions $startPartition-$endPartition")
     shuffleStatuses.get(shuffleId) match {
       case Some (shuffleStatus) =>
         shuffleStatus.withMapStatuses { statuses =>
-          MapOutputTracker.convertMapStatuses(shuffleId, startPartition, endPartition, statuses)
+          MapOutputTracker.convertMapStatuses(
+            shuffleId, startPartition, endPartition, statuses, useOldFetchProtocol)
         }
       case None =>
         Iterator.empty
     }
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     mapOutputRequests.offer(PoisonPill)
     threadpool.shutdown()
     sendTracker(StopMapOutputTracker)
@@ -685,12 +730,17 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
   private val fetchingLock = new KeyLock[Int]
 
   // Get blocks sizes by executor Id. Note that zero-sized blocks are excluded in the result.
-  override def getMapSizesByExecutorId(shuffleId: Int, startPartition: Int, endPartition: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+  override def getMapSizesByExecutorId(
+      shuffleId: Int,
+      startPartition: Int,
+      endPartition: Int,
+      useOldFetchProtocol: Boolean)
+    : Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
     logDebug(s"Fetching outputs for shuffle $shuffleId, partitions $startPartition-$endPartition")
     val statuses = getStatuses(shuffleId)
     try {
-      MapOutputTracker.convertMapStatuses(shuffleId, startPartition, endPartition, statuses)
+      MapOutputTracker.convertMapStatuses(
+        shuffleId, startPartition, endPartition, statuses, useOldFetchProtocol)
     } catch {
       case e: MetadataFetchFailedException =>
         // We experienced a fetch failure so our mapStatuses cache is outdated; clear it:
@@ -832,19 +882,21 @@ private[spark] object MapOutputTracker extends Logging {
    * @param shuffleId Identifier for the shuffle
    * @param startPartition Start of map output partition ID range (included in range)
    * @param endPartition End of map output partition ID range (excluded from range)
-   * @param statuses List of map statuses, indexed by map ID.
+   * @param statuses List of map statuses, indexed by map partition index.
+   * @param useOldFetchProtocol Whether to use the old shuffle fetch protocol.
    * @return A sequence of 2-item tuples, where the first item in the tuple is a BlockManagerId,
-   *         and the second item is a sequence of (shuffle block ID, shuffle block size) tuples
-   *         describing the shuffle blocks that are stored at that block manager.
+   *         and the second item is a sequence of (shuffle block id, shuffle block size, map index)
+   *         tuples describing the shuffle blocks that are stored at that block manager.
    */
   def convertMapStatuses(
       shuffleId: Int,
       startPartition: Int,
       endPartition: Int,
-      statuses: Array[MapStatus]): Iterator[(BlockManagerId, Seq[(BlockId, Long)])] = {
+      statuses: Array[MapStatus],
+      useOldFetchProtocol: Boolean): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
     assert (statuses != null)
-    val splitsByAddress = new HashMap[BlockManagerId, ListBuffer[(BlockId, Long)]]
-    for ((status, mapId) <- statuses.iterator.zipWithIndex) {
+    val splitsByAddress = new HashMap[BlockManagerId, ListBuffer[(BlockId, Long, Int)]]
+    for ((status, mapIndex) <- statuses.iterator.zipWithIndex) {
       if (status == null) {
         val errorMessage = s"Missing an output location for shuffle $shuffleId"
         logError(errorMessage)
@@ -853,8 +905,15 @@ private[spark] object MapOutputTracker extends Logging {
         for (part <- startPartition until endPartition) {
           val size = status.getSizeForBlock(part)
           if (size != 0) {
-            splitsByAddress.getOrElseUpdate(status.location, ListBuffer()) +=
-                ((ShuffleBlockId(shuffleId, mapId, part), size))
+            if (useOldFetchProtocol) {
+              // While we use the old shuffle fetch protocol, we use mapIndex as mapId in the
+              // ShuffleBlockId.
+              splitsByAddress.getOrElseUpdate(status.location, ListBuffer()) +=
+                ((ShuffleBlockId(shuffleId, mapIndex, part), size, mapIndex))
+            } else {
+              splitsByAddress.getOrElseUpdate(status.location, ListBuffer()) +=
+                ((ShuffleBlockId(shuffleId, status.mapTaskId, part), size, mapIndex))
+            }
           }
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index 77db0f5d0eaa7..d061627bea69c 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -108,12 +108,12 @@ private[spark] class SecurityManager(
    * Admin acls should be set before the view or modify acls.  If you modify the admin
    * acls you should also set the view and modify acls again to pick up the changes.
    */
-  def setViewAcls(defaultUsers: Set[String], allowedUsers: Seq[String]) {
+  def setViewAcls(defaultUsers: Set[String], allowedUsers: Seq[String]): Unit = {
     viewAcls = adminAcls ++ defaultUsers ++ allowedUsers
     logInfo("Changing view acls to: " + viewAcls.mkString(","))
   }
 
-  def setViewAcls(defaultUser: String, allowedUsers: Seq[String]) {
+  def setViewAcls(defaultUser: String, allowedUsers: Seq[String]): Unit = {
     setViewAcls(Set[String](defaultUser), allowedUsers)
   }
 
@@ -121,7 +121,7 @@ private[spark] class SecurityManager(
    * Admin acls groups should be set before the view or modify acls groups. If you modify the admin
    * acls groups you should also set the view and modify acls groups again to pick up the changes.
    */
-  def setViewAclsGroups(allowedUserGroups: Seq[String]) {
+  def setViewAclsGroups(allowedUserGroups: Seq[String]): Unit = {
     viewAclsGroups = adminAclsGroups ++ allowedUserGroups
     logInfo("Changing view acls groups to: " + viewAclsGroups.mkString(","))
   }
@@ -149,7 +149,7 @@ private[spark] class SecurityManager(
    * Admin acls should be set before the view or modify acls.  If you modify the admin
    * acls you should also set the view and modify acls again to pick up the changes.
    */
-  def setModifyAcls(defaultUsers: Set[String], allowedUsers: Seq[String]) {
+  def setModifyAcls(defaultUsers: Set[String], allowedUsers: Seq[String]): Unit = {
     modifyAcls = adminAcls ++ defaultUsers ++ allowedUsers
     logInfo("Changing modify acls to: " + modifyAcls.mkString(","))
   }
@@ -158,7 +158,7 @@ private[spark] class SecurityManager(
    * Admin acls groups should be set before the view or modify acls groups. If you modify the admin
    * acls groups you should also set the view and modify acls groups again to pick up the changes.
    */
-  def setModifyAclsGroups(allowedUserGroups: Seq[String]) {
+  def setModifyAclsGroups(allowedUserGroups: Seq[String]): Unit = {
     modifyAclsGroups = adminAclsGroups ++ allowedUserGroups
     logInfo("Changing modify acls groups to: " + modifyAclsGroups.mkString(","))
   }
@@ -186,7 +186,7 @@ private[spark] class SecurityManager(
    * Admin acls should be set before the view or modify acls.  If you modify the admin
    * acls you should also set the view and modify acls again to pick up the changes.
    */
-  def setAdminAcls(adminUsers: Seq[String]) {
+  def setAdminAcls(adminUsers: Seq[String]): Unit = {
     adminAcls = adminUsers.toSet
     logInfo("Changing admin acls to: " + adminAcls.mkString(","))
   }
@@ -195,12 +195,12 @@ private[spark] class SecurityManager(
    * Admin acls groups should be set before the view or modify acls groups. If you modify the admin
    * acls groups you should also set the view and modify acls groups again to pick up the changes.
    */
-  def setAdminAclsGroups(adminUserGroups: Seq[String]) {
+  def setAdminAclsGroups(adminUserGroups: Seq[String]): Unit = {
     adminAclsGroups = adminUserGroups.toSet
     logInfo("Changing admin acls groups to: " + adminAclsGroups.mkString(","))
   }
 
-  def setAcls(aclSetting: Boolean) {
+  def setAcls(aclSetting: Boolean): Unit = {
     aclsOn = aclSetting
     logInfo("Changing acls enabled to: " + aclsOn)
   }
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 24be54ec91828..3a2eaae092e8d 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -504,7 +504,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
    * Checks for illegal or deprecated config settings. Throws an exception for the former. Not
    * idempotent - may mutate this conf object to convert deprecated settings to supported ones.
    */
-  private[spark] def validateSettings() {
+  private[spark] def validateSettings(): Unit = {
     if (contains("spark.local.dir")) {
       val msg = "Note that spark.local.dir will be overridden by the value set by " +
         "the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS" +
@@ -548,23 +548,6 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       }
     }
 
-    if (contains("spark.master") && get("spark.master").startsWith("yarn-")) {
-      val warning = s"spark.master ${get("spark.master")} is deprecated in Spark 2.0+, please " +
-        "instead use \"yarn\" with specified deploy mode."
-
-      get("spark.master") match {
-        case "yarn-cluster" =>
-          logWarning(warning)
-          set("spark.master", "yarn")
-          set(SUBMIT_DEPLOY_MODE, "cluster")
-        case "yarn-client" =>
-          logWarning(warning)
-          set("spark.master", "yarn")
-          set(SUBMIT_DEPLOY_MODE, "client")
-        case _ => // Any other unexpected master will be checked when creating scheduler backend.
-      }
-    }
-
     if (contains(SUBMIT_DEPLOY_MODE)) {
       get(SUBMIT_DEPLOY_MODE) match {
         case "cluster" | "client" =>
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 396d712bd739c..4792c0a5b664b 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -31,7 +31,6 @@ import scala.reflect.{classTag, ClassTag}
 import scala.util.control.NonFatal
 
 import com.google.common.collect.MapMaker
-import org.apache.commons.lang3.SerializationUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.apache.hadoop.io.{ArrayWritable, BooleanWritable, BytesWritable, DoubleWritable, FloatWritable, IntWritable, LongWritable, NullWritable, Text, Writable}
@@ -346,7 +345,7 @@ class SparkContext(config: SparkConf) extends Logging {
     override protected def childValue(parent: Properties): Properties = {
       // Note: make a clone such that changes in the parent properties aren't reflected in
       // the those of the children threads, which has confusing semantics (SPARK-10563).
-      SerializationUtils.clone(parent)
+      Utils.cloneProperties(parent)
     }
     override protected def initialValue(): Properties = new Properties()
   }
@@ -367,7 +366,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * @param logLevel The desired log level as a string.
    * Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
    */
-  def setLogLevel(logLevel: String) {
+  def setLogLevel(logLevel: String): Unit = {
     // let's allow lowercase or mixed case too
     val upperCased = logLevel.toUpperCase(Locale.ROOT)
     require(SparkContext.VALID_LOG_LEVELS.contains(upperCased),
@@ -662,7 +661,7 @@ class SparkContext(config: SparkConf) extends Logging {
 
   private[spark] def getLocalProperties: Properties = localProperties.get()
 
-  private[spark] def setLocalProperties(props: Properties) {
+  private[spark] def setLocalProperties(props: Properties): Unit = {
     localProperties.set(props)
   }
 
@@ -677,7 +676,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * implementation of thread pools have worker threads spawn other worker threads.
    * As a result, local properties may propagate unpredictably.
    */
-  def setLocalProperty(key: String, value: String) {
+  def setLocalProperty(key: String, value: String): Unit = {
     if (value == null) {
       localProperties.get.remove(key)
     } else {
@@ -693,7 +692,7 @@ class SparkContext(config: SparkConf) extends Logging {
     Option(localProperties.get).map(_.getProperty(key)).orNull
 
   /** Set a human readable description of the current job. */
-  def setJobDescription(value: String) {
+  def setJobDescription(value: String): Unit = {
     setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, value)
   }
 
@@ -721,7 +720,8 @@ class SparkContext(config: SparkConf) extends Logging {
    * are actually stopped in a timely manner, but is off by default due to HDFS-1208, where HDFS
    * may respond to Thread.interrupt() by marking nodes as dead.
    */
-  def setJobGroup(groupId: String, description: String, interruptOnCancel: Boolean = false) {
+  def setJobGroup(groupId: String,
+      description: String, interruptOnCancel: Boolean = false): Unit = {
     setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, description)
     setLocalProperty(SparkContext.SPARK_JOB_GROUP_ID, groupId)
     // Note: Specifying interruptOnCancel in setJobGroup (rather than cancelJobGroup) avoids
@@ -732,7 +732,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /** Clear the current thread's job group ID and its description. */
-  def clearJobGroup() {
+  def clearJobGroup(): Unit = {
     setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, null)
     setLocalProperty(SparkContext.SPARK_JOB_GROUP_ID, null)
     setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, null)
@@ -1560,7 +1560,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Register a listener to receive up-calls from events that happen during execution.
    */
   @DeveloperApi
-  def addSparkListener(listener: SparkListenerInterface) {
+  def addSparkListener(listener: SparkListenerInterface): Unit = {
     listenerBus.addToSharedQueue(listener)
   }
 
@@ -1789,14 +1789,14 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Register an RDD to be persisted in memory and/or disk storage
    */
-  private[spark] def persistRDD(rdd: RDD[_]) {
+  private[spark] def persistRDD(rdd: RDD[_]): Unit = {
     persistentRdds(rdd.id) = rdd
   }
 
   /**
    * Unpersist an RDD from memory and/or disk storage
    */
-  private[spark] def unpersistRDD(rddId: Int, blocking: Boolean) {
+  private[spark] def unpersistRDD(rddId: Int, blocking: Boolean): Unit = {
     env.blockManager.master.removeRdd(rddId, blocking)
     persistentRdds.remove(rddId)
     listenerBus.post(SparkListenerUnpersistRDD(rddId))
@@ -1812,7 +1812,7 @@ class SparkContext(config: SparkConf) extends Logging {
    *
    * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
-  def addJar(path: String) {
+  def addJar(path: String): Unit = {
     def addLocalJarFile(file: File): String = {
       try {
         if (!file.exists()) {
@@ -2019,7 +2019,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Set the thread-local property for overriding the call sites
    * of actions and RDDs.
    */
-  def setCallSite(shortCallSite: String) {
+  def setCallSite(shortCallSite: String): Unit = {
     setLocalProperty(CallSite.SHORT_FORM, shortCallSite)
   }
 
@@ -2027,7 +2027,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Set the thread-local property for overriding the call sites
    * of actions and RDDs.
    */
-  private[spark] def setCallSite(callSite: CallSite) {
+  private[spark] def setCallSite(callSite: CallSite): Unit = {
     setLocalProperty(CallSite.SHORT_FORM, callSite.shortForm)
     setLocalProperty(CallSite.LONG_FORM, callSite.longForm)
   }
@@ -2036,7 +2036,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * Clear the thread-local property for overriding the call sites
    * of actions and RDDs.
    */
-  def clearCallSite() {
+  def clearCallSite(): Unit = {
     setLocalProperty(CallSite.SHORT_FORM, null)
     setLocalProperty(CallSite.LONG_FORM, null)
   }
@@ -2156,8 +2156,7 @@ class SparkContext(config: SparkConf) extends Logging {
   def runJob[T, U: ClassTag](
     rdd: RDD[T],
     processPartition: (TaskContext, Iterator[T]) => U,
-    resultHandler: (Int, U) => Unit)
-  {
+    resultHandler: (Int, U) => Unit): Unit = {
     runJob[T, U](rdd, processPartition, 0 until rdd.partitions.length, resultHandler)
   }
 
@@ -2171,8 +2170,7 @@ class SparkContext(config: SparkConf) extends Logging {
   def runJob[T, U: ClassTag](
       rdd: RDD[T],
       processPartition: Iterator[T] => U,
-      resultHandler: (Int, U) => Unit)
-  {
+      resultHandler: (Int, U) => Unit): Unit = {
     val processFunc = (context: TaskContext, iter: Iterator[T]) => processPartition(iter)
     runJob[T, U](rdd, processFunc, 0 until rdd.partitions.length, resultHandler)
   }
@@ -2257,13 +2255,13 @@ class SparkContext(config: SparkConf) extends Logging {
    * Cancel active jobs for the specified group. See `org.apache.spark.SparkContext.setJobGroup`
    * for more information.
    */
-  def cancelJobGroup(groupId: String) {
+  def cancelJobGroup(groupId: String): Unit = {
     assertNotStopped()
     dagScheduler.cancelJobGroup(groupId)
   }
 
   /** Cancel all jobs that have been scheduled or are running.  */
-  def cancelAllJobs() {
+  def cancelAllJobs(): Unit = {
     assertNotStopped()
     dagScheduler.cancelAllJobs()
   }
@@ -2351,7 +2349,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * @param directory path to the directory where checkpoint files will be stored
    * (must be HDFS path if running in cluster)
    */
-  def setCheckpointDir(directory: String) {
+  def setCheckpointDir(directory: String): Unit = {
 
     // If we are running on a cluster, log a warning if the directory is local.
     // Otherwise, the driver may attempt to reconstruct the checkpointed RDD from
@@ -2423,7 +2421,7 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /** Post the application start event */
-  private def postApplicationStart() {
+  private def postApplicationStart(): Unit = {
     // Note: this code assumes that the task scheduler has been initialized and has contacted
     // the cluster manager to get an application ID (in case the cluster manager provides one).
     listenerBus.post(SparkListenerApplicationStart(appName, Some(applicationId),
@@ -2433,12 +2431,12 @@ class SparkContext(config: SparkConf) extends Logging {
   }
 
   /** Post the application end event */
-  private def postApplicationEnd() {
+  private def postApplicationEnd(): Unit = {
     listenerBus.post(SparkListenerApplicationEnd(System.currentTimeMillis))
   }
 
   /** Post the environment update event once the task scheduler is ready */
-  private def postEnvironmentUpdate() {
+  private def postEnvironmentUpdate(): Unit = {
     if (taskScheduler != null) {
       val schedulingMode = getSchedulingMode.toString
       val addedJarPaths = addedJars.keys.toSeq
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 419f0ab065150..78ac00909ea1a 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -70,7 +70,7 @@ class SparkEnv (
     val outputCommitCoordinator: OutputCommitCoordinator,
     val conf: SparkConf) extends Logging {
 
-  private[spark] var isStopped = false
+  @volatile private[spark] var isStopped = false
   private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]()
 
   // A general, soft-reference map for metadata needed during HadoopRDD split computation
@@ -79,7 +79,7 @@ class SparkEnv (
 
   private[spark] var driverTmpDir: Option[String] = None
 
-  private[spark] def stop() {
+  private[spark] def stop(): Unit = {
 
     if (!isStopped) {
       isStopped = true
@@ -119,7 +119,8 @@ class SparkEnv (
   }
 
   private[spark]
-  def destroyPythonWorker(pythonExec: String, envVars: Map[String, String], worker: Socket) {
+  def destroyPythonWorker(pythonExec: String,
+      envVars: Map[String, String], worker: Socket): Unit = {
     synchronized {
       val key = (pythonExec, envVars)
       pythonWorkers.get(key).foreach(_.stopWorker(worker))
@@ -127,7 +128,8 @@ class SparkEnv (
   }
 
   private[spark]
-  def releasePythonWorker(pythonExec: String, envVars: Map[String, String], worker: Socket) {
+  def releasePythonWorker(pythonExec: String,
+      envVars: Map[String, String], worker: Socket): Unit = {
     synchronized {
       val key = (pythonExec, envVars)
       pythonWorkers.get(key).foreach(_.releaseWorker(worker))
@@ -141,7 +143,7 @@ object SparkEnv extends Logging {
   private[spark] val driverSystemName = "sparkDriver"
   private[spark] val executorSystemName = "sparkExecutor"
 
-  def set(e: SparkEnv) {
+  def set(e: SparkEnv): Unit = {
     env = e
   }
 
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 19f71a1dec296..b13028f868072 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -83,14 +83,15 @@ case object Resubmitted extends TaskFailedReason {
 case class FetchFailed(
     bmAddress: BlockManagerId,  // Note that bmAddress can be null
     shuffleId: Int,
-    mapId: Int,
+    mapId: Long,
+    mapIndex: Int,
     reduceId: Int,
     message: String)
   extends TaskFailedReason {
   override def toErrorString: String = {
     val bmAddressString = if (bmAddress == null) "null" else bmAddress.toString
-    s"FetchFailed($bmAddressString, shuffleId=$shuffleId, mapId=$mapId, reduceId=$reduceId, " +
-      s"message=\n$message\n)"
+    s"FetchFailed($bmAddressString, shuffleId=$shuffleId, mapIndex=$mapIndex, " +
+      s"mapId=$mapId, reduceId=$reduceId, message=\n$message\n)"
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 41ae3ae3b758a..b8c094dbea961 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -42,7 +42,6 @@ import org.json4s.JsonAST.JValue
 import org.json4s.jackson.JsonMethods.{compact, render}
 
 import org.apache.spark.executor.TaskMetrics
-import org.apache.spark.internal.config._
 import org.apache.spark.scheduler._
 import org.apache.spark.util.Utils
 
@@ -235,8 +234,10 @@ private[spark] object TestUtils {
       val sslCtx = SSLContext.getInstance("SSL")
       val trustManager = new X509TrustManager {
         override def getAcceptedIssuers(): Array[X509Certificate] = null
-        override def checkClientTrusted(x509Certificates: Array[X509Certificate], s: String) {}
-        override def checkServerTrusted(x509Certificates: Array[X509Certificate], s: String) {}
+        override def checkClientTrusted(x509Certificates: Array[X509Certificate],
+            s: String): Unit = {}
+        override def checkServerTrusted(x509Certificates: Array[X509Certificate],
+            s: String): Unit = {}
       }
       val verifier = new HostnameVerifier() {
         override def verify(hostname: String, session: SSLSession): Boolean = true
@@ -264,7 +265,7 @@ private[spark] object TestUtils {
     try {
       body(listener)
     } finally {
-      sc.listenerBus.waitUntilEmpty(TimeUnit.SECONDS.toMillis(10))
+      sc.listenerBus.waitUntilEmpty()
       sc.listenerBus.removeListener(listener)
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index 317f3c51d0154..aa01374a2f2e8 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -791,7 +791,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[F],
-      conf: JobConf) {
+      conf: JobConf): Unit = {
     rdd.saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass, conf)
   }
 
@@ -800,7 +800,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
       path: String,
       keyClass: Class[_],
       valueClass: Class[_],
-      outputFormatClass: Class[F]) {
+      outputFormatClass: Class[F]): Unit = {
     rdd.saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass)
   }
 
@@ -810,7 +810,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[F],
-      codec: Class[_ <: CompressionCodec]) {
+      codec: Class[_ <: CompressionCodec]): Unit = {
     rdd.saveAsHadoopFile(path, keyClass, valueClass, outputFormatClass, codec)
   }
 
@@ -820,7 +820,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[F],
-      conf: Configuration) {
+      conf: Configuration): Unit = {
     rdd.saveAsNewAPIHadoopFile(path, keyClass, valueClass, outputFormatClass, conf)
   }
 
@@ -828,7 +828,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * Output the RDD to any Hadoop-supported storage system, using
    * a Configuration object for that storage system.
    */
-  def saveAsNewAPIHadoopDataset(conf: Configuration) {
+  def saveAsNewAPIHadoopDataset(conf: Configuration): Unit = {
     rdd.saveAsNewAPIHadoopDataset(conf)
   }
 
@@ -837,7 +837,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
       path: String,
       keyClass: Class[_],
       valueClass: Class[_],
-      outputFormatClass: Class[F]) {
+      outputFormatClass: Class[F]): Unit = {
     rdd.saveAsNewAPIHadoopFile(path, keyClass, valueClass, outputFormatClass)
   }
 
@@ -847,7 +847,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
    * (e.g. a table name to write to) in the same way as it would be configured for a Hadoop
    * MapReduce job.
    */
-  def saveAsHadoopDataset(conf: JobConf) {
+  def saveAsHadoopDataset(conf: JobConf): Unit = {
     rdd.saveAsHadoopDataset(conf)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 5ba821935ac69..1ca5262742665 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -347,7 +347,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   /**
    * Applies a function f to all elements of this RDD.
    */
-  def foreach(f: VoidFunction[T]) {
+  def foreach(f: VoidFunction[T]): Unit = {
     rdd.foreach(x => f.call(x))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index 330c2f6e6117e..149def29b8fbd 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -546,7 +546,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
   def broadcast[T](value: T): Broadcast[T] = sc.broadcast(value)(fakeClassTag)
 
   /** Shut down the SparkContext. */
-  def stop() {
+  def stop(): Unit = {
     sc.stop()
   }
 
@@ -567,7 +567,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
    *
    * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
-  def addFile(path: String) {
+  def addFile(path: String): Unit = {
     sc.addFile(path)
   }
 
@@ -593,7 +593,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
    *
    * @note A path can be added only once. Subsequent additions of the same path are ignored.
    */
-  def addJar(path: String) {
+  def addJar(path: String): Unit = {
     sc.addJar(path)
   }
 
@@ -609,9 +609,9 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
 
   /**
    * Set the directory under which RDDs are going to be checkpointed. The directory must
-   * be a HDFS path if running on a cluster.
+   * be an HDFS path if running on a cluster.
    */
-  def setCheckpointDir(dir: String) {
+  def setCheckpointDir(dir: String): Unit = {
     sc.setCheckpointDir(dir)
   }
 
@@ -631,14 +631,14 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
   /**
    * Pass-through to SparkContext.setCallSite.  For API support only.
    */
-  def setCallSite(site: String) {
+  def setCallSite(site: String): Unit = {
     sc.setCallSite(site)
   }
 
   /**
    * Pass-through to SparkContext.setCallSite.  For API support only.
    */
-  def clearCallSite() {
+  def clearCallSite(): Unit = {
     sc.clearCallSite()
   }
 
@@ -669,7 +669,7 @@ class JavaSparkContext(val sc: SparkContext) extends Closeable {
    * @param logLevel The desired log level as a string.
    * Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
    */
-  def setLogLevel(logLevel: String) {
+  def setLogLevel(logLevel: String): Unit = {
     sc.setLogLevel(logLevel)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
index fd96052f95d3f..e9c77f4086d0d 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
@@ -81,7 +81,7 @@ private[spark] object JavaUtils {
           }
         }
 
-        override def remove() {
+        override def remove(): Unit = {
           prev match {
             case Some(k) =>
               underlying match {
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 4d76ff76e6752..6dc1721f56adf 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -24,6 +24,7 @@ import java.util.{ArrayList => JArrayList, List => JList, Map => JMap}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
+import scala.concurrent.duration.Duration
 import scala.reflect.ClassTag
 
 import org.apache.hadoop.conf.Configuration
@@ -179,15 +180,22 @@ private[spark] object PythonRDD extends Logging {
    *         data collected from this job, the secret for authentication, and a socket auth
    *         server object that can be used to join the JVM serving thread in Python.
    */
-  def toLocalIteratorAndServe[T](rdd: RDD[T]): Array[Any] = {
+  def toLocalIteratorAndServe[T](rdd: RDD[T], prefetchPartitions: Boolean = false): Array[Any] = {
     val handleFunc = (sock: Socket) => {
       val out = new DataOutputStream(sock.getOutputStream)
       val in = new DataInputStream(sock.getInputStream)
       Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
         // Collects a partition on each iteration
         val collectPartitionIter = rdd.partitions.indices.iterator.map { i =>
-          rdd.sparkContext.runJob(rdd, (iter: Iterator[Any]) => iter.toArray, Seq(i)).head
+          var result: Array[Any] = null
+          rdd.sparkContext.submitJob(
+            rdd,
+            (iter: Iterator[Any]) => iter.toArray,
+            Seq(i), // The partition we are evaluating
+            (_, res: Array[Any]) => result = res,
+            result)
         }
+        val prefetchIter = collectPartitionIter.buffered
 
         // Write data until iteration is complete, client stops iteration, or error occurs
         var complete = false
@@ -196,10 +204,15 @@ private[spark] object PythonRDD extends Logging {
           // Read request for data, value of zero will stop iteration or non-zero to continue
           if (in.readInt() == 0) {
             complete = true
-          } else if (collectPartitionIter.hasNext) {
+          } else if (prefetchIter.hasNext) {
 
             // Client requested more data, attempt to collect the next partition
-            val partitionArray = collectPartitionIter.next()
+            val partitionFuture = prefetchIter.next()
+            // Cause the next job to be submitted if prefetchPartitions is enabled.
+            if (prefetchPartitions) {
+              prefetchIter.headOption
+            }
+            val partitionArray = ThreadUtils.awaitResult(partitionFuture, Duration.Inf)
 
             // Send response there is a partition to read
             out.writeInt(1)
@@ -245,7 +258,7 @@ private[spark] object PythonRDD extends Logging {
     new PythonBroadcast(path)
   }
 
-  def writeIteratorToStream[T](iter: Iterator[T], dataOut: DataOutputStream) {
+  def writeIteratorToStream[T](iter: Iterator[T], dataOut: DataOutputStream): Unit = {
 
     def write(obj: Any): Unit = obj match {
       case null =>
@@ -431,7 +444,7 @@ private[spark] object PythonRDD extends Logging {
     }
   }
 
-  def writeUTF(str: String, dataOut: DataOutputStream) {
+  def writeUTF(str: String, dataOut: DataOutputStream): Unit = {
     val bytes = str.getBytes(StandardCharsets.UTF_8)
     dataOut.writeInt(bytes.length)
     dataOut.write(bytes)
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index d2a10df7acbd3..dbbd841d0077a 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -48,6 +48,7 @@ private[spark] object PythonEvalType {
   val SQL_WINDOW_AGG_PANDAS_UDF = 203
   val SQL_SCALAR_PANDAS_ITER_UDF = 204
   val SQL_MAP_PANDAS_ITER_UDF = 205
+  val SQL_COGROUPED_MAP_PANDAS_UDF = 206
 
   def toString(pythonEvalType: Int): String = pythonEvalType match {
     case NON_UDF => "NON_UDF"
@@ -58,6 +59,7 @@ private[spark] object PythonEvalType {
     case SQL_WINDOW_AGG_PANDAS_UDF => "SQL_WINDOW_AGG_PANDAS_UDF"
     case SQL_SCALAR_PANDAS_ITER_UDF => "SQL_SCALAR_PANDAS_ITER_UDF"
     case SQL_MAP_PANDAS_ITER_UDF => "SQL_MAP_PANDAS_ITER_UDF"
+    case SQL_COGROUPED_MAP_PANDAS_UDF => "SQL_COGROUPED_MAP_PANDAS_UDF"
   }
 }
 
@@ -192,7 +194,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
     def exception: Option[Throwable] = Option(_exception)
 
     /** Terminates the writer thread, ignoring any exceptions that may occur due to cleanup. */
-    def shutdownOnTaskCompletion() {
+    def shutdownOnTaskCompletion(): Unit = {
       assert(context.isCompleted)
       this.interrupt()
     }
@@ -410,7 +412,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
       }
     }
 
-    def writeUTF(str: String, dataOut: DataOutputStream) {
+    def writeUTF(str: String, dataOut: DataOutputStream): Unit = {
       val bytes = str.getBytes(UTF_8)
       dataOut.writeInt(bytes.length)
       dataOut.write(bytes)
@@ -529,7 +531,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
 
     setDaemon(true)
 
-    override def run() {
+    override def run(): Unit = {
       // Kill the worker if it is interrupted, checking until task completion.
       // TODO: This has a race condition if interruption occurs, as completed may still become true.
       while (!context.isInterrupted && !context.isCompleted) {
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 6c37844a088ce..1926a5268227c 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -189,7 +189,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     null
   }
 
-  private def startDaemon() {
+  private def startDaemon(): Unit = {
     self.synchronized {
       // Is it already running?
       if (daemon != null) {
@@ -271,7 +271,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   /**
    * Redirect the given streams to our stderr in separate threads.
    */
-  private def redirectStreamsToStderr(stdout: InputStream, stderr: InputStream) {
+  private def redirectStreamsToStderr(stdout: InputStream, stderr: InputStream): Unit = {
     try {
       new RedirectThread(stdout, System.err, "stdout reader for " + pythonExec).start()
       new RedirectThread(stderr, System.err, "stderr reader for " + pythonExec).start()
@@ -288,7 +288,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
 
     setDaemon(true)
 
-    override def run() {
+    override def run(): Unit = {
       while (true) {
         self.synchronized {
           if (IDLE_WORKER_TIMEOUT_NS < System.nanoTime() - lastActivityNs) {
@@ -301,7 +301,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     }
   }
 
-  private def cleanupIdleWorkers() {
+  private def cleanupIdleWorkers(): Unit = {
     while (idleWorkers.nonEmpty) {
       val worker = idleWorkers.dequeue()
       try {
@@ -314,7 +314,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     }
   }
 
-  private def stopDaemon() {
+  private def stopDaemon(): Unit = {
     self.synchronized {
       if (useDaemon) {
         cleanupIdleWorkers()
@@ -332,11 +332,11 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     }
   }
 
-  def stop() {
+  def stop(): Unit = {
     stopDaemon()
   }
 
-  def stopWorker(worker: Socket) {
+  def stopWorker(worker: Socket): Unit = {
     self.synchronized {
       if (useDaemon) {
         if (daemon != null) {
@@ -355,7 +355,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
     worker.close()
   }
 
-  def releaseWorker(worker: Socket) {
+  def releaseWorker(worker: Socket): Unit = {
     if (useDaemon) {
       self.synchronized {
         lastActivityNs = System.nanoTime()
diff --git a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
index 86965dbc2e778..4e790b364e1d2 100644
--- a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
@@ -37,11 +37,11 @@ case class TestWritable(var str: String, var int: Int, var double: Double) exten
   def this() = this("", 0, 0.0)
 
   def getStr: String = str
-  def setStr(str: String) { this.str = str }
+  def setStr(str: String): Unit = { this.str = str }
   def getInt: Int = int
-  def setInt(int: Int) { this.int = int }
+  def setInt(int: Int): Unit = { this.int = int }
   def getDouble: Double = double
-  def setDouble(double: Double) { this.double = double }
+  def setDouble(double: Double): Unit = { this.double = double }
 
   def write(out: DataOutput): Unit = {
     out.writeUTF(str)
@@ -106,13 +106,13 @@ private[python] class WritableToDoubleArrayConverter extends Converter[Any, Arra
  */
 object WriteInputFormatTestDataGenerator {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val path = args(0)
     val sc = new JavaSparkContext("local[4]", "test-writables")
     generateData(path, sc)
   }
 
-  def generateData(path: String, jsc: JavaSparkContext) {
+  def generateData(path: String, jsc: JavaSparkContext): Unit = {
     val sc = jsc.sc
 
     val basePath = s"$path/sftestdata/"
diff --git a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
index f96c5215cf0af..d8f9d1f1729b7 100644
--- a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
@@ -230,7 +230,7 @@ private[spark] class BufferedStreamThread(
     errBufferSize: Int) extends Thread(name) with Logging {
   val lines = new Array[String](errBufferSize)
   var lineIdx = 0
-  override def run() {
+  override def run(): Unit = {
     for (line <- Source.fromInputStream(in).getLines) {
       synchronized {
         lines(lineIdx) = line
diff --git a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
index 0e81ad198db67..9ef6c7c5906a2 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
@@ -74,7 +74,7 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
    * Asynchronously delete cached copies of this broadcast on the executors.
    * If the broadcast is used after this is called, it will need to be re-sent to each executor.
    */
-  def unpersist() {
+  def unpersist(): Unit = {
     unpersist(blocking = false)
   }
 
@@ -83,7 +83,7 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
    * this is called, it will need to be re-sent to each executor.
    * @param blocking Whether to block until unpersisting has completed
    */
-  def unpersist(blocking: Boolean) {
+  def unpersist(blocking: Boolean): Unit = {
     assertValid()
     doUnpersist(blocking)
   }
@@ -93,7 +93,7 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
    * Destroy all data and metadata related to this broadcast variable. Use this with caution;
    * once a broadcast variable has been destroyed, it cannot be used again.
    */
-  def destroy() {
+  def destroy(): Unit = {
     destroy(blocking = false)
   }
 
@@ -102,7 +102,7 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
    * once a broadcast variable has been destroyed, it cannot be used again.
    * @param blocking Whether to block until destroy has completed
    */
-  private[spark] def destroy(blocking: Boolean) {
+  private[spark] def destroy(blocking: Boolean): Unit = {
     assertValid()
     _isValid = false
     _destroySite = Utils.getCallSite().shortForm
@@ -128,17 +128,17 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
    * Actually unpersist the broadcasted value on the executors. Concrete implementations of
    * Broadcast class must define their own logic to unpersist their own data.
    */
-  protected def doUnpersist(blocking: Boolean)
+  protected def doUnpersist(blocking: Boolean): Unit
 
   /**
    * Actually destroy all data and metadata related to this broadcast variable.
    * Implementation of Broadcast class must define their own logic to destroy their own
    * state.
    */
-  protected def doDestroy(blocking: Boolean)
+  protected def doDestroy(blocking: Boolean): Unit
 
   /** Check if this broadcast is valid. If not valid, exception is thrown. */
-  protected def assertValid() {
+  protected def assertValid(): Unit = {
     if (!_isValid) {
       throw new SparkException(
         "Attempted to use %s after it was destroyed (%s) ".format(toString, _destroySite))
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
index 9fa47451c1831..c93cadf1ab3e8 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
@@ -40,7 +40,7 @@ private[spark] class BroadcastManager(
   initialize()
 
   // Called by SparkContext or Executor before using Broadcast
-  private def initialize() {
+  private def initialize(): Unit = {
     synchronized {
       if (!initialized) {
         broadcastFactory = new TorrentBroadcastFactory
@@ -50,7 +50,7 @@ private[spark] class BroadcastManager(
     }
   }
 
-  def stop() {
+  def stop(): Unit = {
     broadcastFactory.stop()
   }
 
@@ -77,7 +77,7 @@ private[spark] class BroadcastManager(
     broadcastFactory.newBroadcast[T](value_, isLocal, bid)
   }
 
-  def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean) {
+  def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean): Unit = {
     broadcastFactory.unbroadcast(id, removeFromDriver, blocking)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 1379314ba1b53..77fbbc08c2103 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -73,7 +73,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   /** Size of each block. Default value is 4MB.  This value is only read by the broadcaster. */
   @transient private var blockSize: Int = _
 
-  private def setConf(conf: SparkConf) {
+  private def setConf(conf: SparkConf): Unit = {
     compressionCodec = if (conf.get(config.BROADCAST_COMPRESS)) {
       Some(CompressionCodec.createCodec(conf))
     } else {
@@ -196,7 +196,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
   /**
    * Remove all persisted state associated with this Torrent broadcast on the executors.
    */
-  override protected def doUnpersist(blocking: Boolean) {
+  override protected def doUnpersist(blocking: Boolean): Unit = {
     TorrentBroadcast.unpersist(id, removeFromDriver = false, blocking)
   }
 
@@ -204,7 +204,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
    * Remove all persisted state associated with this Torrent broadcast on the executors
    * and driver.
    */
-  override protected def doDestroy(blocking: Boolean) {
+  override protected def doDestroy(blocking: Boolean): Unit = {
     TorrentBroadcast.unpersist(id, removeFromDriver = true, blocking)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
index b11f9ba171b84..65fb5186afae1 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
@@ -28,20 +28,21 @@ import org.apache.spark.{SecurityManager, SparkConf}
  */
 private[spark] class TorrentBroadcastFactory extends BroadcastFactory {
 
-  override def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager) { }
+  override def initialize(isDriver: Boolean, conf: SparkConf,
+      securityMgr: SecurityManager): Unit = { }
 
   override def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean, id: Long): Broadcast[T] = {
     new TorrentBroadcast[T](value_, id)
   }
 
-  override def stop() { }
+  override def stop(): Unit = { }
 
   /**
    * Remove all persisted state associated with the torrent broadcast with the given ID.
    * @param removeFromDriver Whether to remove state from the driver.
    * @param blocking Whether to block until unbroadcasted
    */
-  override def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean) {
+  override def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean): Unit = {
     TorrentBroadcast.unpersist(id, removeFromDriver, blocking)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index 648a8b1c763db..7022b986ea025 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -219,7 +219,7 @@ private class ClientEndpoint(
  * Executable utility for starting and terminating drivers inside of a standalone cluster.
  */
 object Client {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // scalastyle:off println
     if (!sys.props.contains("SPARK_SUBMIT")) {
       println("WARNING: This client is deprecated and will be removed in a future version of Spark")
diff --git a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
index a86ee66fb72b9..9d6bbf91168da 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ClientArguments.scala
@@ -100,7 +100,7 @@ private[deploy] class ClientArguments(args: Array[String]) {
   /**
    * Print usage and exit JVM with the given exit code.
    */
-  private def printUsageAndExit(exitCode: Int) {
+  private def printUsageAndExit(exitCode: Int): Unit = {
     // TODO: It wouldn't be too hard to allow users to submit their app and dependency jars
     //       separately similar to in the YARN client.
     val usage =
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
index 64277e8de2a4d..ebfff89308886 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
@@ -87,14 +87,14 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
   }
 
   /** Starts the external shuffle service if the user has configured us to. */
-  def startIfEnabled() {
+  def startIfEnabled(): Unit = {
     if (enabled) {
       start()
     }
   }
 
   /** Start the external shuffle service */
-  def start() {
+  def start(): Unit = {
     require(server == null, "Shuffle server already started")
     val authEnabled = securityManager.isAuthenticationEnabled()
     logInfo(s"Starting shuffle service on port $port (auth enabled = $authEnabled)")
@@ -125,7 +125,7 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
     blockHandler.executorRemoved(executorId, appId)
   }
 
-  def stop() {
+  def stop(): Unit = {
     if (server != null) {
       server.close()
       server = null
diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
index 99f841234005e..6ff68b694f8f3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
@@ -78,7 +78,7 @@ private object FaultToleranceTest extends App with Logging {
 
   System.setProperty(config.DRIVER_HOST_ADDRESS.key, "172.17.42.1") // default docker host ip
 
-  private def afterEach() {
+  private def afterEach(): Unit = {
     if (sc != null) {
       sc.stop()
       sc = null
@@ -180,7 +180,7 @@ private object FaultToleranceTest extends App with Logging {
     }
   }
 
-  private def test(name: String)(fn: => Unit) {
+  private def test(name: String)(fn: => Unit): Unit = {
     try {
       fn
       numPassed += 1
@@ -198,12 +198,12 @@ private object FaultToleranceTest extends App with Logging {
     afterEach()
   }
 
-  private def addMasters(num: Int) {
+  private def addMasters(num: Int): Unit = {
     logInfo(s">>>>> ADD MASTERS $num <<<<<")
     (1 to num).foreach { _ => masters += SparkDocker.startMaster(dockerMountDir) }
   }
 
-  private def addWorkers(num: Int) {
+  private def addWorkers(num: Int): Unit = {
     logInfo(s">>>>> ADD WORKERS $num <<<<<")
     val masterUrls = getMasterUrls(masters)
     (1 to num).foreach { _ => workers += SparkDocker.startWorker(dockerMountDir, masterUrls) }
@@ -239,7 +239,7 @@ private object FaultToleranceTest extends App with Logging {
 
   private def delay(secs: Duration = 5.seconds) = Thread.sleep(secs.toMillis)
 
-  private def terminateCluster() {
+  private def terminateCluster(): Unit = {
     logInfo(">>>>> TERMINATE CLUSTER <<<<<")
     masters.foreach(_.kill())
     workers.foreach(_.kill())
@@ -326,7 +326,7 @@ private object FaultToleranceTest extends App with Logging {
     }
   }
 
-  private def assertTrue(bool: Boolean, message: String = "") {
+  private def assertTrue(bool: Boolean, message: String = ""): Unit = {
     if (!bool) {
       throw new IllegalStateException("Assertion failed: " + message)
     }
@@ -346,7 +346,7 @@ private class TestMasterInfo(val ip: String, val dockerId: DockerId, val logFile
 
   logDebug("Created master: " + this)
 
-  def readState() {
+  def readState(): Unit = {
     try {
       val masterStream = new InputStreamReader(
         new URL("http://%s:8080/json".format(ip)).openStream, StandardCharsets.UTF_8)
@@ -372,7 +372,7 @@ private class TestMasterInfo(val ip: String, val dockerId: DockerId, val logFile
     }
   }
 
-  def kill() { Docker.kill(dockerId) }
+  def kill(): Unit = { Docker.kill(dockerId) }
 
   override def toString: String =
     "[ip=%s, id=%s, logFile=%s, state=%s]".
@@ -386,7 +386,7 @@ private class TestWorkerInfo(val ip: String, val dockerId: DockerId, val logFile
 
   logDebug("Created worker: " + this)
 
-  def kill() { Docker.kill(dockerId) }
+  def kill(): Unit = { Docker.kill(dockerId) }
 
   override def toString: String =
     "[ip=%s, id=%s, logFile=%s]".format(ip, dockerId, logFile.getAbsolutePath)
diff --git a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
index f1b58eb33a1b7..fc849d7f4372f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
@@ -72,7 +72,7 @@ class LocalSparkCluster(
     masters
   }
 
-  def stop() {
+  def stop(): Unit = {
     logInfo("Shutting down local Spark cluster.")
     // Stop the workers before the master so they don't get upset that it disconnected
     workerRpcEnvs.foreach(_.shutdown())
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index 8055a6270dac8..0c9d34986af63 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -35,7 +35,7 @@ import org.apache.spark.util.{RedirectThread, Utils}
  * subprocess and then has it connect back to the JVM to access system properties, etc.
  */
 object PythonRunner {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val pythonFile = args(0)
     val pyFiles = args(1)
     val otherArgs = args.slice(2, args.length)
diff --git a/core/src/main/scala/org/apache/spark/deploy/RRunner.scala b/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
index 60ba0470a628a..b32f9ea3b4747 100644
--- a/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/RRunner.scala
@@ -73,7 +73,7 @@ object RRunner {
     @volatile var sparkRBackendSecret: String = null
     val initialized = new Semaphore(0)
     val sparkRBackendThread = new Thread("SparkR backend") {
-      override def run() {
+      override def run(): Unit = {
         val (port, authHelper) = sparkRBackend.init()
         sparkRBackendPort = port
         sparkRBackendSecret = authHelper.secret
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala
index 8118c01eb712f..b89ae1b35e693 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkCuratorUtil.scala
@@ -45,7 +45,7 @@ private[spark] object SparkCuratorUtil extends Logging {
     zk
   }
 
-  def mkdir(zk: CuratorFramework, path: String) {
+  def mkdir(zk: CuratorFramework, path: String): Unit = {
     if (zk.checkExists().forPath(path) == null) {
       try {
         zk.create().creatingParentsIfNeeded().forPath(path)
@@ -57,7 +57,7 @@ private[spark] object SparkCuratorUtil extends Logging {
     }
   }
 
-  def deleteRecursive(zk: CuratorFramework, path: String) {
+  def deleteRecursive(zk: CuratorFramework, path: String): Unit = {
     if (zk.checkExists().forPath(path) != null) {
       for (child <- zk.getChildren.forPath(path).asScala) {
         zk.delete().forPath(path + "/" + child)
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 11420bb985520..1180501e8c738 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -57,7 +57,7 @@ private[spark] class SparkHadoopUtil extends Logging {
    * you need to look https://issues.apache.org/jira/browse/HDFS-3545 and possibly
    * do a FileSystem.closeAllForUGI in order to avoid leaking Filesystems
    */
-  def runAsSparkUser(func: () => Unit) {
+  def runAsSparkUser(func: () => Unit): Unit = {
     createSparkUser().doAs(new PrivilegedExceptionAction[Unit] {
       def run: Unit = func()
     })
@@ -71,7 +71,7 @@ private[spark] class SparkHadoopUtil extends Logging {
     ugi
   }
 
-  def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation) {
+  def transferCredentials(source: UserGroupInformation, dest: UserGroupInformation): Unit = {
     dest.addCredentials(source.getCredentials())
   }
 
@@ -79,8 +79,10 @@ private[spark] class SparkHadoopUtil extends Logging {
    * Appends S3-specific, spark.hadoop.*, and spark.buffer.size configurations to a Hadoop
    * configuration.
    */
-  def appendS3AndSparkHadoopConfigurations(conf: SparkConf, hadoopConf: Configuration): Unit = {
-    SparkHadoopUtil.appendS3AndSparkHadoopConfigurations(conf, hadoopConf)
+  def appendS3AndSparkHadoopHiveConfigurations(
+      conf: SparkConf,
+      hadoopConf: Configuration): Unit = {
+    SparkHadoopUtil.appendS3AndSparkHadoopHiveConfigurations(conf, hadoopConf)
   }
 
   /**
@@ -103,6 +105,15 @@ private[spark] class SparkHadoopUtil extends Logging {
     }
   }
 
+  def appendSparkHiveConfigs(
+      srcMap: Map[String, String],
+      destMap: HashMap[String, String]): Unit = {
+    // Copy any "spark.hive.foo=bar" system properties into destMap as "hive.foo=bar"
+    for ((key, value) <- srcMap if key.startsWith("spark.hive.")) {
+      destMap.put(key.substring("spark.".length), value)
+    }
+  }
+
   /**
    * Return an appropriate (subclass) of Configuration. Creating config can initialize some Hadoop
    * subsystems.
@@ -140,7 +151,7 @@ private[spark] class SparkHadoopUtil extends Logging {
    * Add or overwrite current user's credentials with serialized delegation tokens,
    * also confirms correct hadoop configuration is set.
    */
-  private[spark] def addDelegationTokens(tokens: Array[Byte], sparkConf: SparkConf) {
+  private[spark] def addDelegationTokens(tokens: Array[Byte], sparkConf: SparkConf): Unit = {
     UserGroupInformation.setConfiguration(newConfiguration(sparkConf))
     val creds = deserialize(tokens)
     logInfo("Updating delegation tokens for current user.")
@@ -413,11 +424,11 @@ private[spark] object SparkHadoopUtil {
    */
   private[spark] def newConfiguration(conf: SparkConf): Configuration = {
     val hadoopConf = new Configuration()
-    appendS3AndSparkHadoopConfigurations(conf, hadoopConf)
+    appendS3AndSparkHadoopHiveConfigurations(conf, hadoopConf)
     hadoopConf
   }
 
-  private def appendS3AndSparkHadoopConfigurations(
+  private def appendS3AndSparkHadoopHiveConfigurations(
       conf: SparkConf,
       hadoopConf: Configuration): Unit = {
     // Note: this null check is around more than just access to the "conf" object to maintain
@@ -440,6 +451,7 @@ private[spark] object SparkHadoopUtil {
         }
       }
       appendSparkHadoopConfigs(conf, hadoopConf)
+      appendSparkHiveConfigs(conf, hadoopConf)
       val bufferSize = conf.get(BUFFER_SIZE).toString
       hadoopConf.set("io.file.buffer.size", bufferSize)
     }
@@ -452,37 +464,48 @@ private[spark] object SparkHadoopUtil {
     }
   }
 
+  private def appendSparkHiveConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = {
+    // Copy any "spark.hive.foo=bar" spark properties into conf as "hive.foo=bar"
+    for ((key, value) <- conf.getAll if key.startsWith("spark.hive.")) {
+      hadoopConf.set(key.substring("spark.".length), value)
+    }
+  }
+
   // scalastyle:off line.size.limit
   /**
-   * Create a path that uses replication instead of erasure coding (ec), regardless of the default
-   * configuration in hdfs for the given path.  This can be helpful as hdfs ec doesn't support
-   * hflush(), hsync(), or append()
+   * Create a file on the given file system, optionally making sure erasure coding is disabled.
+   *
+   * Disabling EC can be helpful as HDFS EC doesn't support hflush(), hsync(), or append().
    * https://hadoop.apache.org/docs/r3.0.0/hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html#Limitations
    */
   // scalastyle:on line.size.limit
-  def createNonECFile(fs: FileSystem, path: Path): FSDataOutputStream = {
-    try {
-      // Use reflection as this uses APIs only available in Hadoop 3
-      val builderMethod = fs.getClass().getMethod("createFile", classOf[Path])
-      // the builder api does not resolve relative paths, nor does it create parent dirs, while
-      // the old api does.
-      if (!fs.mkdirs(path.getParent())) {
-        throw new IOException(s"Failed to create parents of $path")
+  def createFile(fs: FileSystem, path: Path, allowEC: Boolean): FSDataOutputStream = {
+    if (allowEC) {
+      fs.create(path)
+    } else {
+      try {
+        // Use reflection as this uses APIs only available in Hadoop 3
+        val builderMethod = fs.getClass().getMethod("createFile", classOf[Path])
+        // the builder api does not resolve relative paths, nor does it create parent dirs, while
+        // the old api does.
+        if (!fs.mkdirs(path.getParent())) {
+          throw new IOException(s"Failed to create parents of $path")
+        }
+        val qualifiedPath = fs.makeQualified(path)
+        val builder = builderMethod.invoke(fs, qualifiedPath)
+        val builderCls = builder.getClass()
+        // this may throw a NoSuchMethodException if the path is not on hdfs
+        val replicateMethod = builderCls.getMethod("replicate")
+        val buildMethod = builderCls.getMethod("build")
+        val b2 = replicateMethod.invoke(builder)
+        buildMethod.invoke(b2).asInstanceOf[FSDataOutputStream]
+      } catch {
+        case  _: NoSuchMethodException =>
+          // No createFile() method, we're using an older hdfs client, which doesn't give us control
+          // over EC vs. replication.  Older hdfs doesn't have EC anyway, so just create a file with
+          // old apis.
+          fs.create(path)
       }
-      val qualifiedPath = fs.makeQualified(path)
-      val builder = builderMethod.invoke(fs, qualifiedPath)
-      val builderCls = builder.getClass()
-      // this may throw a NoSuchMethodException if the path is not on hdfs
-      val replicateMethod = builderCls.getMethod("replicate")
-      val buildMethod = builderCls.getMethod("build")
-      val b2 = replicateMethod.invoke(builder)
-      buildMethod.invoke(b2).asInstanceOf[FSDataOutputStream]
-    } catch {
-      case  _: NoSuchMethodException =>
-        // No createFile() method, we're using an older hdfs client, which doesn't give us control
-        // over EC vs. replication.  Older hdfs doesn't have EC anyway, so just create a file with
-        // old apis.
-        fs.create(path)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 12a8473b22025..b776ec8f81e06 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -229,10 +229,6 @@ private[spark] class SparkSubmit extends Logging {
     // Set the cluster manager
     val clusterManager: Int = args.master match {
       case "yarn" => YARN
-      case "yarn-client" | "yarn-cluster" =>
-        logWarning(s"Master ${args.master} is deprecated since 2.0." +
-          " Please use master \"yarn\" with specified deploy mode instead.")
-        YARN
       case m if m.startsWith("spark") => STANDALONE
       case m if m.startsWith("mesos") => MESOS
       case m if m.startsWith("k8s") => KUBERNETES
@@ -251,22 +247,7 @@ private[spark] class SparkSubmit extends Logging {
         -1
     }
 
-    // Because the deprecated way of specifying "yarn-cluster" and "yarn-client" encapsulate both
-    // the master and deploy mode, we have some logic to infer the master and deploy mode
-    // from each other if only one is specified, or exit early if they are at odds.
     if (clusterManager == YARN) {
-      (args.master, args.deployMode) match {
-        case ("yarn-cluster", null) =>
-          deployMode = CLUSTER
-          args.master = "yarn"
-        case ("yarn-cluster", "client") =>
-          error("Client deploy mode is not compatible with master \"yarn-cluster\"")
-        case ("yarn-client", "cluster") =>
-          error("Cluster deploy mode is not compatible with master \"yarn-client\"")
-        case (_, mode) =>
-          args.master = "yarn"
-      }
-
       // Make sure YARN is included in our build if we're trying to use it
       if (!Utils.classIsLoadable(YARN_CLUSTER_SUBMIT_CLASS) && !Utils.isTesting) {
         error(
@@ -1047,7 +1028,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
    * Return whether the given primary resource requires running R.
    */
   private[deploy] def isR(res: String): Boolean = {
-    res != null && res.endsWith(".R") || res == SPARKR_SHELL
+    res != null && (res.endsWith(".R") || res.endsWith(".r")) || res == SPARKR_SHELL
   }
 
   private[deploy] def isInternal(res: String): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
index 34ade4ce6f39b..8f17159228f8b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
@@ -120,7 +120,7 @@ private[spark] class StandaloneAppClient(
      *
      * nthRetry means this is the nth attempt to register with master.
      */
-    private def registerWithMaster(nthRetry: Int) {
+    private def registerWithMaster(nthRetry: Int): Unit = {
       registerMasterFutures.set(tryRegisterAllMasters())
       registrationRetryTimer.set(registrationRetryThread.schedule(new Runnable {
         override def run(): Unit = {
@@ -246,14 +246,14 @@ private[spark] class StandaloneAppClient(
     /**
      * Notify the listener that we disconnected, if we hadn't already done so before.
      */
-    def markDisconnected() {
+    def markDisconnected(): Unit = {
       if (!alreadyDisconnected) {
         listener.disconnected()
         alreadyDisconnected = true
       }
     }
 
-    def markDead(reason: String) {
+    def markDead(reason: String): Unit = {
       if (!alreadyDead.get) {
         listener.dead(reason)
         alreadyDead.set(true)
@@ -271,12 +271,12 @@ private[spark] class StandaloneAppClient(
 
   }
 
-  def start() {
+  def start(): Unit = {
     // Just launch an rpcEndpoint; it will call back into the listener.
     endpoint.set(rpcEnv.setupEndpoint("AppClient", new ClientEndpoint(rpcEnv)))
   }
 
-  def stop() {
+  def stop(): Unit = {
     if (endpoint.get != null) {
       try {
         val timeout = RpcUtils.askRpcTimeout(conf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
index 8c63fa65b40fd..fb2a67c2ab103 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
@@ -209,9 +209,8 @@ private[history] class ApplicationCache(
 
   /**
    * Register a filter for the web UI which checks for updates to the given app/attempt
-   * @param ui Spark UI to attach filters to
-   * @param appId application ID
-   * @param attemptId attempt ID
+   * @param key consisted of appId and attemptId
+   * @param loadedUI Spark UI to attach filters to
    */
   private def registerFilter(key: CacheKey, loadedUI: LoadedAppUI): Unit = {
     require(loadedUI != null)
@@ -231,7 +230,7 @@ private[history] class ApplicationCache(
 /**
  * An entry in the cache.
  *
- * @param ui Spark UI
+ * @param loadedUI Spark UI
  * @param completed Flag to indicated that the application has completed (and so
  *                 does not need refreshing).
  */
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
index f1c06205bf04c..472b52957ed7f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala
@@ -114,6 +114,12 @@ private[history] abstract class ApplicationHistoryProvider {
    */
   def stop(): Unit = { }
 
+  /**
+   * Called when the server is starting up. Implement this function to init the provider and start
+   * background threads. With this function we can start provider later after it is created.
+   */
+  def start(): Unit = { }
+
   /**
    * Returns configuration data to be shown in the History Server home page.
    *
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 5f9b18ce01279..dce9581be2905 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -200,7 +200,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
   }
 
-  val initThread = initialize()
+  var initThread: Thread = null
 
   private[history] def initialize(): Thread = {
     if (!isFsInSafeMode()) {
@@ -384,6 +384,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     Map("Event log directory" -> logDir.toString) ++ safeMode
   }
 
+  override def start(): Unit = {
+    initThread = initialize()
+  }
+
   override def stop(): Unit = {
     try {
       if (initThread != null && initThread.isAlive()) {
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 878f0cb632c5a..62cac261ae014 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -135,7 +135,7 @@ class HistoryServer(
    * This starts a background thread that periodically synchronizes information displayed on
    * this UI with the event logs in the provided base directory.
    */
-  def initialize() {
+  def initialize(): Unit = {
     attachPage(new HistoryPage(this))
 
     attachHandler(ApiRootResource.getServletHandler(this))
@@ -149,12 +149,12 @@ class HistoryServer(
   }
 
   /** Bind to the HTTP server behind this web interface. */
-  override def bind() {
+  override def bind(): Unit = {
     super.bind()
   }
 
   /** Stop the server and close the file system. */
-  override def stop() {
+  override def stop(): Unit = {
     super.stop()
     provider.stop()
   }
@@ -164,7 +164,7 @@ class HistoryServer(
       appId: String,
       attemptId: Option[String],
       ui: SparkUI,
-      completed: Boolean) {
+      completed: Boolean): Unit = {
     assert(serverInfo.isDefined, "HistoryServer must be bound before attaching SparkUIs")
     ui.getHandlers.foreach { handler =>
       serverInfo.get.addHandler(handler, ui.securityManager)
@@ -297,6 +297,7 @@ object HistoryServer extends Logging {
 
     val server = new HistoryServer(conf, provider, securityManager, port)
     server.bind()
+    provider.start()
 
     ShutdownHookManager.addShutdownHook { () => server.stop() }
 
@@ -326,7 +327,7 @@ object HistoryServer extends Logging {
     new SecurityManager(config)
   }
 
-  def initSecurity() {
+  def initSecurity(): Unit = {
     // If we are accessing HDFS and it has security enabled (Kerberos), we have to login
     // from a keytab file so that we can access HDFS beyond the kerberos ticket expiration.
     // As long as it is using Hadoop rpc (hdfs://), a relogin will automatically
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
index dec89769c030b..01cc59e1d2e6e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
@@ -52,7 +52,7 @@ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[Strin
    // This mutates the SparkConf, so all accesses to it must be made after this line
    Utils.loadDefaultSparkProperties(conf, propertiesFile)
 
-  private def printUsageAndExit(exitCode: Int) {
+  private def printUsageAndExit(exitCode: Int): Unit = {
     // scalastyle:off println
     System.err.println(
       """
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index 6c56807458b27..03965e6dbbf31 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -57,7 +57,7 @@ private[spark] class ApplicationInfo(
     init()
   }
 
-  private def init() {
+  private def init(): Unit = {
     state = ApplicationState.WAITING
     executors = new mutable.HashMap[Int, ExecutorDesc]
     coresGranted = 0
@@ -92,7 +92,7 @@ private[spark] class ApplicationInfo(
     exec
   }
 
-  private[master] def removeExecutor(exec: ExecutorDesc) {
+  private[master] def removeExecutor(exec: ExecutorDesc): Unit = {
     if (executors.contains(exec.id)) {
       removedExecutors += executors(exec.id)
       executors -= exec.id
@@ -115,7 +115,7 @@ private[spark] class ApplicationInfo(
 
   private[master] def resetRetryCount() = _retryCount = 0
 
-  private[master] def markFinished(endState: ApplicationState.Value) {
+  private[master] def markFinished(endState: ApplicationState.Value): Unit = {
     state = endState
     endTime = System.currentTimeMillis()
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
index a8f8492561115..a598d2a1ddd76 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
@@ -33,7 +33,7 @@ private[master] class ExecutorDesc(
   var state = ExecutorState.LAUNCHING
 
   /** Copy all state (non-val) variables from the given on-the-wire ExecutorDescription. */
-  def copyState(execDesc: ExecutorDescription) {
+  def copyState(execDesc: ExecutorDescription): Unit = {
     state = execDesc.state
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
index f2b5ea7e23ec1..ba949e2630e43 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
@@ -56,7 +56,7 @@ private[master] class FileSystemPersistenceEngine(
     files.map(deserializeFromFile[T])
   }
 
-  private def serializeIntoFile(file: File, value: AnyRef) {
+  private def serializeIntoFile(file: File, value: AnyRef): Unit = {
     val created = file.createNewFile()
     if (!created) { throw new IllegalStateException("Could not create file: " + file) }
     val fileOut = new FileOutputStream(file)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
index 52e2854961eda..5bdfd18f37cd0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/LeaderElectionAgent.scala
@@ -27,7 +27,7 @@ import org.apache.spark.annotation.DeveloperApi
 @DeveloperApi
 trait LeaderElectionAgent {
   val masterInstance: LeaderElectable
-  def stop() {} // to avoid noops in implementations.
+  def stop(): Unit = {} // to avoid noops in implementations.
 }
 
 @DeveloperApi
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 5588dc8cff47a..8d3795cae707a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -192,7 +192,7 @@ private[deploy] class Master(
     leaderElectionAgent = leaderElectionAgent_
   }
 
-  override def onStop() {
+  override def onStop(): Unit = {
     masterMetricsSystem.report()
     applicationMetricsSystem.report()
     // prevent the CompleteRecovery message sending to restarted master
@@ -211,11 +211,11 @@ private[deploy] class Master(
     leaderElectionAgent.stop()
   }
 
-  override def electedLeader() {
+  override def electedLeader(): Unit = {
     self.send(ElectedLeader)
   }
 
-  override def revokedLeadership() {
+  override def revokedLeadership(): Unit = {
     self.send(RevokedLeadership)
   }
 
@@ -529,7 +529,7 @@ private[deploy] class Master(
       apps.count(_.state == ApplicationState.UNKNOWN) == 0
 
   private def beginRecovery(storedApps: Seq[ApplicationInfo], storedDrivers: Seq[DriverInfo],
-      storedWorkers: Seq[WorkerInfo]) {
+      storedWorkers: Seq[WorkerInfo]): Unit = {
     for (app <- storedApps) {
       logInfo("Trying to recover app: " + app.id)
       try {
@@ -559,7 +559,7 @@ private[deploy] class Master(
     }
   }
 
-  private def completeRecovery() {
+  private def completeRecovery(): Unit = {
     // Ensure "only-once" recovery semantics using a short synchronization period.
     if (state != RecoveryState.RECOVERING) { return }
     state = RecoveryState.COMPLETING_RECOVERY
@@ -850,7 +850,7 @@ private[deploy] class Master(
     true
   }
 
-  private def removeWorker(worker: WorkerInfo, msg: String) {
+  private def removeWorker(worker: WorkerInfo, msg: String): Unit = {
     logInfo("Removing worker " + worker.id + " on " + worker.host + ":" + worker.port)
     worker.setState(WorkerState.DEAD)
     idToWorker -= worker.id
@@ -879,7 +879,7 @@ private[deploy] class Master(
     persistenceEngine.removeWorker(worker)
   }
 
-  private def relaunchDriver(driver: DriverInfo) {
+  private def relaunchDriver(driver: DriverInfo): Unit = {
     // We must setup a new driver with a new driver id here, because the original driver may
     // be still running. Consider this scenario: a worker is network partitioned with master,
     // the master then relaunches driver driverID1 with a driver id driverID2, then the worker
@@ -919,11 +919,11 @@ private[deploy] class Master(
     waitingApps += app
   }
 
-  private def finishApplication(app: ApplicationInfo) {
+  private def finishApplication(app: ApplicationInfo): Unit = {
     removeApplication(app, ApplicationState.FINISHED)
   }
 
-  def removeApplication(app: ApplicationInfo, state: ApplicationState.Value) {
+  def removeApplication(app: ApplicationInfo, state: ApplicationState.Value): Unit = {
     if (apps.contains(app)) {
       logInfo("Removing app " + app.id)
       apps -= app
@@ -1047,7 +1047,7 @@ private[deploy] class Master(
   }
 
   /** Check for, and remove, any timed-out workers */
-  private def timeOutDeadWorkers() {
+  private def timeOutDeadWorkers(): Unit = {
     // Copy the workers into an array so we don't modify the hashset while iterating through it
     val currentTime = System.currentTimeMillis()
     val toRemove = workers.filter(_.lastHeartbeat < currentTime - workerTimeoutMs).toArray
@@ -1077,7 +1077,7 @@ private[deploy] class Master(
     new DriverInfo(now, newDriverId(date), desc, date)
   }
 
-  private def launchDriver(worker: WorkerInfo, driver: DriverInfo) {
+  private def launchDriver(worker: WorkerInfo, driver: DriverInfo): Unit = {
     logInfo("Launching driver " + driver.id + " on worker " + worker.id)
     worker.addDriver(driver)
     driver.worker = Some(worker)
@@ -1088,7 +1088,7 @@ private[deploy] class Master(
   private def removeDriver(
       driverId: String,
       finalState: DriverState,
-      exception: Option[Exception]) {
+      exception: Option[Exception]): Unit = {
     drivers.find(d => d.id == driverId) match {
       case Some(driver) =>
         logInfo(s"Removing driver: $driverId")
@@ -1113,7 +1113,7 @@ private[deploy] object Master extends Logging {
   val SYSTEM_NAME = "sparkMaster"
   val ENDPOINT_NAME = "Master"
 
-  def main(argStrings: Array[String]) {
+  def main(argStrings: Array[String]): Unit = {
     Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
       exitOnUncaughtException = false))
     Utils.initDaemon(log)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
index cd31bbdcfab59..045a3da74dcd0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
@@ -94,7 +94,7 @@ private[master] class MasterArguments(args: Array[String], conf: SparkConf) exte
   /**
    * Print usage and exit JVM with the given exit code.
    */
-  private def printUsageAndExit(exitCode: Int) {
+  private def printUsageAndExit(exitCode: Int): Unit = {
     // scalastyle:off println
     System.err.println(
       "Usage: Master [options]\n" +
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
index b30bc821b7324..9a695e15a9cea 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/PersistenceEngine.scala
@@ -88,7 +88,7 @@ abstract class PersistenceEngine {
     }
   }
 
-  def close() {}
+  def close(): Unit = {}
 }
 
 private[master] class BlackHolePersistenceEngine extends PersistenceEngine {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
index a33b15354efea..48458819d641c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
@@ -18,9 +18,7 @@
 package org.apache.spark.deploy.master
 
 import scala.collection.mutable
-import scala.reflect.ClassTag
 
-import org.apache.spark.deploy.StandaloneResourceUtils.MutableResourceInfo
 import org.apache.spark.resource.{ResourceAllocator, ResourceInformation, ResourceRequirement}
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.util.Utils
@@ -93,7 +91,7 @@ private[spark] class WorkerInfo(
     init()
   }
 
-  private def init() {
+  private def init(): Unit = {
     executors = new mutable.HashMap
     drivers = new mutable.HashMap
     state = WorkerState.ALIVE
@@ -107,13 +105,13 @@ private[spark] class WorkerInfo(
     host + ":" + port
   }
 
-  def addExecutor(exec: ExecutorDesc) {
+  def addExecutor(exec: ExecutorDesc): Unit = {
     executors(exec.fullId) = exec
     coresUsed += exec.cores
     memoryUsed += exec.memory
   }
 
-  def removeExecutor(exec: ExecutorDesc) {
+  def removeExecutor(exec: ExecutorDesc): Unit = {
     if (executors.contains(exec.fullId)) {
       executors -= exec.fullId
       coresUsed -= exec.cores
@@ -126,13 +124,13 @@ private[spark] class WorkerInfo(
     executors.values.exists(_.application == app)
   }
 
-  def addDriver(driver: DriverInfo) {
+  def addDriver(driver: DriverInfo): Unit = {
     drivers(driver.id) = driver
     memoryUsed += driver.desc.mem
     coresUsed += driver.desc.cores
   }
 
-  def removeDriver(driver: DriverInfo) {
+  def removeDriver(driver: DriverInfo): Unit = {
     drivers -= driver.id
     memoryUsed -= driver.desc.mem
     coresUsed -= driver.desc.cores
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
index 47f309144bdc0..d4ae977b19f4b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperLeaderElectionAgent.scala
@@ -36,7 +36,7 @@ private[master] class ZooKeeperLeaderElectionAgent(val masterInstance: LeaderEle
 
   start()
 
-  private def start() {
+  private def start(): Unit = {
     logInfo("Starting ZooKeeper LeaderElection agent")
     zk = SparkCuratorUtil.newClient(conf)
     leaderLatch = new LeaderLatch(zk, workingDir)
@@ -44,12 +44,12 @@ private[master] class ZooKeeperLeaderElectionAgent(val masterInstance: LeaderEle
     leaderLatch.start()
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     leaderLatch.close()
     zk.close()
   }
 
-  override def isLeader() {
+  override def isLeader(): Unit = {
     synchronized {
       // could have lost leadership by now.
       if (!leaderLatch.hasLeadership) {
@@ -61,7 +61,7 @@ private[master] class ZooKeeperLeaderElectionAgent(val masterInstance: LeaderEle
     }
   }
 
-  override def notLeader() {
+  override def notLeader(): Unit = {
     synchronized {
       // could have gained leadership by now.
       if (leaderLatch.hasLeadership) {
@@ -73,7 +73,7 @@ private[master] class ZooKeeperLeaderElectionAgent(val masterInstance: LeaderEle
     }
   }
 
-  private def updateLeadershipStatus(isLeader: Boolean) {
+  private def updateLeadershipStatus(isLeader: Boolean): Unit = {
     if (isLeader && status == LeadershipStatus.NOT_LEADER) {
       status = LeadershipStatus.LEADER
       masterInstance.electedLeader()
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
index 73dd0de017960..8eae445b439d9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ZooKeeperPersistenceEngine.scala
@@ -55,11 +55,11 @@ private[master] class ZooKeeperPersistenceEngine(conf: SparkConf, val serializer
       .filter(_.startsWith(prefix)).flatMap(deserializeFromFile[T])
   }
 
-  override def close() {
+  override def close(): Unit = {
     zk.close()
   }
 
-  private def serializeIntoFile(path: String, value: AnyRef) {
+  private def serializeIntoFile(path: String, value: AnyRef): Unit = {
     val serialized = serializer.newInstance().serialize(value)
     val bytes = new Array[Byte](serialized.remaining())
     serialized.get(bytes)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index e8b614527f69c..042ec54ee1240 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -27,7 +27,6 @@ import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, MasterStateRe
 import org.apache.spark.deploy.JsonProtocol
 import org.apache.spark.deploy.StandaloneResourceUtils._
 import org.apache.spark.deploy.master._
-import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index be402ae247511..86554ec4ec1c9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -40,7 +40,7 @@ class MasterWebUI(
   initialize()
 
   /** Initialize all components of the server. */
-  def initialize() {
+  def initialize(): Unit = {
     val masterPage = new MasterPage(this)
     attachPage(new ApplicationPage(this))
     attachPage(masterPage)
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index 759d857d56e0e..f769ce468e49c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -140,13 +140,21 @@ private[spark] class HadoopDelegationTokenManager(
    * @param creds Credentials object where to store the delegation tokens.
    */
   def obtainDelegationTokens(creds: Credentials): Unit = {
-    val freshUGI = doLogin()
-    freshUGI.doAs(new PrivilegedExceptionAction[Unit]() {
-      override def run(): Unit = {
-        val (newTokens, _) = obtainDelegationTokens()
-        creds.addAll(newTokens)
-      }
-    })
+    val currentUser = UserGroupInformation.getCurrentUser()
+    val hasKerberosCreds = principal != null ||
+      Option(currentUser.getRealUser()).getOrElse(currentUser).hasKerberosCredentials()
+
+    // Delegation tokens can only be obtained if the real user has Kerberos credentials, so
+    // skip creation when those are not available.
+    if (hasKerberosCreds) {
+      val freshUGI = doLogin()
+      freshUGI.doAs(new PrivilegedExceptionAction[Unit]() {
+        override def run(): Unit = {
+          val (newTokens, _) = obtainDelegationTokens()
+          creds.addAll(newTokens)
+        }
+      })
+    }
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
index 12e0dae3f5e5a..f7423f1fc3f1c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
@@ -102,12 +102,12 @@ object CommandUtils extends Logging {
   }
 
   /** Spawn a thread that will redirect a given stream to a file */
-  def redirectStream(in: InputStream, file: File) {
+  def redirectStream(in: InputStream, file: File): Unit = {
     val out = new FileOutputStream(file, true)
     // TODO: It would be nice to add a shutdown hook here that explains why the output is
     //       terminating. Otherwise if the worker dies the executor logs will silently stop.
     new Thread("redirect output to " + file) {
-      override def run() {
+      override def run(): Unit = {
         try {
           Utils.copyStream(in, out, true)
         } catch {
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index 4934722c0d83e..53ec7b3a88f35 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -84,7 +84,7 @@ private[deploy] class DriverRunner(
   /** Starts a thread to run and manage the driver. */
   private[worker] def start() = {
     new Thread("DriverRunner for " + driverId) {
-      override def run() {
+      override def run(): Unit = {
         var shutdownHook: AnyRef = null
         try {
           shutdownHook = ShutdownHookManager.addShutdownHook { () =>
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
index 56356f5f27e27..45ffdde58d6c3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -32,7 +32,7 @@ import org.apache.spark.util._
  * This is used in standalone cluster mode only.
  */
 object DriverWrapper extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     args.toList match {
       /*
        * IMPORTANT: Spark 1.3 provides a stable application submission gateway that is both
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 97939107f3057..2a5528bbe89cb 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -31,7 +31,7 @@ import org.apache.spark.deploy.StandaloneResourceUtils.prepareResourcesFile
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.SPARK_EXECUTOR_PREFIX
 import org.apache.spark.internal.config.UI._
-import org.apache.spark.resource.{ResourceInformation, ResourceUtils}
+import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.util.{ShutdownHookManager, Utils}
 import org.apache.spark.util.logging.FileAppender
@@ -74,9 +74,9 @@ private[deploy] class ExecutorRunner(
   // make sense to remove this in the future.
   private var shutdownHook: AnyRef = null
 
-  private[worker] def start() {
+  private[worker] def start(): Unit = {
     workerThread = new Thread("ExecutorRunner for " + fullId) {
-      override def run() { fetchAndRunExecutor() }
+      override def run(): Unit = { fetchAndRunExecutor() }
     }
     workerThread.start()
     // Shutdown hook that kills actors on shutdown.
@@ -94,7 +94,7 @@ private[deploy] class ExecutorRunner(
    *
    * @param message the exception message which caused the executor's death
    */
-  private def killProcess(message: Option[String]) {
+  private def killProcess(message: Option[String]): Unit = {
     var exitCode: Option[Int] = None
     if (process != null) {
       logInfo("Killing process!")
@@ -118,7 +118,7 @@ private[deploy] class ExecutorRunner(
   }
 
   /** Stop this executor runner, including killing the process it launched */
-  private[worker] def kill() {
+  private[worker] def kill(): Unit = {
     if (workerThread != null) {
       // the workerThread will kill the child process when interrupted
       workerThread.interrupt()
@@ -145,7 +145,7 @@ private[deploy] class ExecutorRunner(
   /**
    * Download and run the executor described in our ApplicationDescription
    */
-  private def fetchAndRunExecutor() {
+  private def fetchAndRunExecutor(): Unit = {
     try {
       val resourceFileOpt = prepareResourcesFile(SPARK_EXECUTOR_PREFIX, resources, executorDir)
       // Launch the process
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 3731b6aec6522..4be495ac4f13f 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -190,14 +190,14 @@ private[deploy] class Worker(
   def coresFree: Int = cores - coresUsed
   def memoryFree: Int = memory - memoryUsed
 
-  private def createWorkDir() {
+  private def createWorkDir(): Unit = {
     workDir = Option(workDirPath).map(new File(_)).getOrElse(new File(sparkHome, "work"))
     if (!Utils.createDirectory(workDir)) {
       System.exit(1)
     }
   }
 
-  override def onStart() {
+  override def onStart(): Unit = {
     assert(!registered)
     logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format(
       host, port, cores, Utils.megabytesToString(memory)))
@@ -268,7 +268,8 @@ private[deploy] class Worker(
    * @param masterAddress the new master address which the worker should use to connect in case of
    *                      failure
    */
-  private def changeMaster(masterRef: RpcEndpointRef, uiUrl: String, masterAddress: RpcAddress) {
+  private def changeMaster(masterRef: RpcEndpointRef, uiUrl: String,
+      masterAddress: RpcAddress): Unit = {
     // activeMasterUrl it's a valid Spark url since we receive it from master.
     activeMasterUrl = masterRef.address.toSparkURL
     activeMasterWebUiUrl = uiUrl
@@ -391,7 +392,7 @@ private[deploy] class Worker(
     registrationRetryTimer = None
   }
 
-  private def registerWithMaster() {
+  private def registerWithMaster(): Unit = {
     // onDisconnected may be triggered multiple times, so don't attempt registration
     // if there are outstanding registration attempts scheduled.
     registrationRetryTimer match {
@@ -410,7 +411,7 @@ private[deploy] class Worker(
     }
   }
 
-  private def startExternalShuffleService() {
+  private def startExternalShuffleService(): Unit = {
     try {
       shuffleService.startIfEnabled()
     } catch {
@@ -690,7 +691,7 @@ private[deploy] class Worker(
     }
   }
 
-  private def masterDisconnected() {
+  private def masterDisconnected(): Unit = {
     logError("Connection to master failed! Waiting for master to reconnect...")
     connected = false
     registerWithMaster()
@@ -736,7 +737,7 @@ private[deploy] class Worker(
     "worker-%s-%s-%d".format(createDateFormat.format(new Date), host, port)
   }
 
-  override def onStop() {
+  override def onStop(): Unit = {
     releaseResources(conf, SPARK_WORKER_PREFIX, resources, pid)
     cleanupThreadExecutor.shutdownNow()
     metricsSystem.report()
@@ -834,7 +835,7 @@ private[deploy] object Worker extends Logging {
   val ENDPOINT_NAME = "Worker"
   private val SSL_NODE_LOCAL_CONFIG_PATTERN = """\-Dspark\.ssl\.useNodeLocalConf\=(.+)""".r
 
-  def main(argStrings: Array[String]) {
+  def main(argStrings: Array[String]): Unit = {
     Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
       exitOnUncaughtException = false))
     Utils.initDaemon(log)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index 8c87708e960e6..42f684c0a1973 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -122,7 +122,7 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) {
   /**
    * Print usage and exit JVM with the given exit code.
    */
-  def printUsageAndExit(exitCode: Int) {
+  def printUsageAndExit(exitCode: Int): Unit = {
     // scalastyle:off println
     System.err.println(
       "Usage: Worker [options] <master>\n" +
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index 96980c3ff0331..0f5e96c558490 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -43,7 +43,7 @@ class WorkerWebUI(
   initialize()
 
   /** Initialize all components of the server. */
-  def initialize() {
+  def initialize(): Unit = {
     val logPage = new LogPage(this)
     attachPage(logPage)
     attachPage(new WorkerPage(this))
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index e96c41a61b066..fbf2dc73ea075 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -70,7 +70,7 @@ private[spark] class CoarseGrainedExecutorBackend(
    */
   private[executor] val taskResources = new mutable.HashMap[Long, Map[String, ResourceInformation]]
 
-  override def onStart() {
+  override def onStart(): Unit = {
     logInfo("Connecting to driver: " + driverUrl)
     val resources = parseOrFindResources(resourcesFileOpt)
     rpcEnv.asyncSetupEndpointRefByURI(driverUrl).flatMap { ref =>
@@ -186,7 +186,7 @@ private[spark] class CoarseGrainedExecutorBackend(
     }
   }
 
-  override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer) {
+  override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer): Unit = {
     val resources = taskResources.getOrElse(taskId, Map.empty[String, ResourceInformation])
     val msg = StatusUpdate(executorId, taskId, state, data, resources)
     if (TaskState.isFinished(state)) {
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index c337d24381286..ce6d0322bafd5 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -139,20 +139,26 @@ private[spark] class Executor(
   private val executorPlugins: Seq[ExecutorPlugin] = {
     val pluginNames = conf.get(EXECUTOR_PLUGINS)
     if (pluginNames.nonEmpty) {
-      logDebug(s"Initializing the following plugins: ${pluginNames.mkString(", ")}")
+      logInfo(s"Initializing the following plugins: ${pluginNames.mkString(", ")}")
 
       // Plugins need to load using a class loader that includes the executor's user classpath
       val pluginList: Seq[ExecutorPlugin] =
         Utils.withContextClassLoader(replClassLoader) {
           val plugins = Utils.loadExtensions(classOf[ExecutorPlugin], pluginNames, conf)
           plugins.foreach { plugin =>
-            plugin.init()
-            logDebug(s"Successfully loaded plugin " + plugin.getClass().getCanonicalName())
+            val pluginSource = new ExecutorPluginSource(plugin.getClass().getSimpleName())
+            val pluginContext = new ExecutorPluginContext(pluginSource.metricRegistry, conf,
+              executorId, executorHostname, isLocal)
+            plugin.init(pluginContext)
+            logInfo("Successfully loaded plugin " + plugin.getClass().getCanonicalName())
+            if (pluginSource.metricRegistry.getNames.size() > 0) {
+              env.metricsSystem.registerSource(pluginSource)
+            }
           }
           plugins
         }
 
-      logDebug("Finished initializing plugins")
+      logInfo("Finished initializing plugins")
       pluginList
     } else {
       Nil
@@ -623,6 +629,11 @@ private[spark] class Executor(
           setTaskFinishedAndClearInterruptStatus()
           execBackend.statusUpdate(taskId, TaskState.KILLED, ser.serialize(reason))
 
+        case t: Throwable if env.isStopped =>
+          // Log the expected exception after executor.stop without stack traces
+          // see: SPARK-19147
+          logError(s"Exception in $taskName (TID $taskId): ${t.getMessage}")
+
         case t: Throwable =>
           // Attempt to exit cleanly by informing the driver of our failure.
           // If anything goes wrong (or this was a fatal exception), we will delegate to
@@ -846,7 +857,7 @@ private[spark] class Executor(
    * Download any missing dependencies if we receive a new set of files and JARs from the
    * SparkContext. Also adds any new JARs we fetched to the class loader.
    */
-  private def updateDependencies(newFiles: Map[String, Long], newJars: Map[String, Long]) {
+  private def updateDependencies(newFiles: Map[String, Long], newJars: Map[String, Long]): Unit = {
     lazy val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
     synchronized {
       // Fetch missing dependencies
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/HiveTestUtils.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorPluginSource.scala
similarity index 64%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/test/HiveTestUtils.scala
rename to core/src/main/scala/org/apache/spark/executor/ExecutorPluginSource.scala
index 7631efedf46af..5625e953c5e67 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/HiveTestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorPluginSource.scala
@@ -15,18 +15,16 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.hive.test
+package org.apache.spark.executor
 
-import java.io.File
+import com.codahale.metrics.MetricRegistry
 
-import org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax
-import org.apache.hive.hcatalog.data.JsonSerDe
+import org.apache.spark.metrics.source.Source
 
-object HiveTestUtils {
+private[spark]
+class ExecutorPluginSource(name: String) extends Source {
 
-  val getHiveContribJar: File =
-    new File(classOf[UDAFExampleMax].getProtectionDomain.getCodeSource.getLocation.getPath)
+  override val metricRegistry = new MetricRegistry()
 
-  val getHiveHcatalogCoreJar: File =
-    new File(classOf[JsonSerDe].getProtectionDomain.getCodeSource.getLocation.getPath)
+  override val sourceName = name
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
index 2111273d8b35a..0d5dcfb43cbfd 100644
--- a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.executor
 
 import java.io._
-import java.nio.charset.Charset
 import java.nio.charset.StandardCharsets.UTF_8
 import java.nio.file.{Files, Paths}
 import java.util.Locale
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index ea79c7310349d..1470a23884bb0 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -137,6 +137,7 @@ class TaskMetrics private[spark] () extends Serializable {
   private[spark] def setJvmGCTime(v: Long): Unit = _jvmGCTime.setValue(v)
   private[spark] def setResultSerializationTime(v: Long): Unit =
     _resultSerializationTime.setValue(v)
+  private[spark] def setPeakExecutionMemory(v: Long): Unit = _peakExecutionMemory.setValue(v)
   private[spark] def incMemoryBytesSpilled(v: Long): Unit = _memoryBytesSpilled.add(v)
   private[spark] def incDiskBytesSpilled(v: Long): Unit = _diskBytesSpilled.add(v)
   private[spark] def incPeakExecutionMemory(v: Long): Unit = _peakExecutionMemory.add(v)
diff --git a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala
index 549395314ba61..f6902d1bf83a1 100644
--- a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala
+++ b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala
@@ -46,7 +46,7 @@ private[spark] class FixedLengthBinaryRecordReader
   private var recordKey: LongWritable = null
   private var recordValue: BytesWritable = null
 
-  override def close() {
+  override def close(): Unit = {
     if (fileInputStream != null) {
       fileInputStream.close()
     }
@@ -69,7 +69,7 @@ private[spark] class FixedLengthBinaryRecordReader
     }
   }
 
-  override def initialize(inputSplit: InputSplit, context: TaskAttemptContext) {
+  override def initialize(inputSplit: InputSplit, context: TaskAttemptContext): Unit = {
     // the file input
     val fileSplit = inputSplit.asInstanceOf[FileSplit]
 
diff --git a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
index 6a4af01475646..57210da6a48eb 100644
--- a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
+++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
@@ -44,7 +44,7 @@ private[spark] abstract class StreamFileInputFormat[T]
    * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API
    * which is set through setMaxSplitSize
    */
-  def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions: Int) {
+  def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions: Int): Unit = {
     val defaultMaxSplitBytes = sc.getConf.get(config.FILES_MAX_PARTITION_BYTES)
     val openCostInBytes = sc.getConf.get(config.FILES_OPEN_COST_IN_BYTES)
     val defaultParallelism = Math.max(sc.defaultParallelism, minPartitions)
diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
index 04c5c4b90e8a1..692deb7a3282f 100644
--- a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
+++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala
@@ -48,7 +48,7 @@ private[spark] class WholeTextFileInputFormat
    * Allow minPartitions set by end-user in order to keep compatibility with old Hadoop API,
    * which is set through setMaxSplitSize
    */
-  def setMinPartitions(context: JobContext, minPartitions: Int) {
+  def setMinPartitions(context: JobContext, minPartitions: Int): Unit = {
     val files = listStatus(context).asScala
     val totalLen = files.map(file => if (file.isDirectory) 0L else file.getLen).sum
     val maxSplitSize = Math.ceil(totalLen * 1.0 /
diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
index 28fd1ff1b77ca..0bd2d551cc912 100644
--- a/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
+++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.mapreduce.lib.input.{CombineFileRecordReader, CombineFi
  */
 private[spark] trait Configurable extends HConfigurable {
   private var conf: Configuration = _
-  def setConf(c: Configuration) {
+  def setConf(c: Configuration): Unit = {
     conf = c
   }
   def getConf: Configuration = conf
diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala b/core/src/main/scala/org/apache/spark/internal/Logging.scala
index 0987917bac0e7..edfe9446094c8 100644
--- a/core/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -53,44 +53,44 @@ trait Logging {
   }
 
   // Log methods that take only a String
-  protected def logInfo(msg: => String) {
+  protected def logInfo(msg: => String): Unit = {
     if (log.isInfoEnabled) log.info(msg)
   }
 
-  protected def logDebug(msg: => String) {
+  protected def logDebug(msg: => String): Unit = {
     if (log.isDebugEnabled) log.debug(msg)
   }
 
-  protected def logTrace(msg: => String) {
+  protected def logTrace(msg: => String): Unit = {
     if (log.isTraceEnabled) log.trace(msg)
   }
 
-  protected def logWarning(msg: => String) {
+  protected def logWarning(msg: => String): Unit = {
     if (log.isWarnEnabled) log.warn(msg)
   }
 
-  protected def logError(msg: => String) {
+  protected def logError(msg: => String): Unit = {
     if (log.isErrorEnabled) log.error(msg)
   }
 
   // Log methods that take Throwables (Exceptions/Errors) too
-  protected def logInfo(msg: => String, throwable: Throwable) {
+  protected def logInfo(msg: => String, throwable: Throwable): Unit = {
     if (log.isInfoEnabled) log.info(msg, throwable)
   }
 
-  protected def logDebug(msg: => String, throwable: Throwable) {
+  protected def logDebug(msg: => String, throwable: Throwable): Unit = {
     if (log.isDebugEnabled) log.debug(msg, throwable)
   }
 
-  protected def logTrace(msg: => String, throwable: Throwable) {
+  protected def logTrace(msg: => String, throwable: Throwable): Unit = {
     if (log.isTraceEnabled) log.trace(msg, throwable)
   }
 
-  protected def logWarning(msg: => String, throwable: Throwable) {
+  protected def logWarning(msg: => String, throwable: Throwable): Unit = {
     if (log.isWarnEnabled) log.warn(msg, throwable)
   }
 
-  protected def logError(msg: => String, throwable: Throwable) {
+  protected def logError(msg: => String, throwable: Throwable): Unit = {
     if (log.isErrorEnabled) log.error(msg, throwable)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/internal/config/UI.scala b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
index a11970ec73d88..1a8268161160b 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/UI.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
@@ -81,6 +81,13 @@ private[spark] object UI {
     .booleanConf
     .createWithDefault(true)
 
+  val UI_PROMETHEUS_ENABLED = ConfigBuilder("spark.ui.prometheus.enabled")
+    .internal()
+    .doc("Expose executor metrics at /metrics/executors/prometheus. " +
+      "For master/worker/driver metrics, you need to configure `conf/metrics.properties`.")
+    .booleanConf
+    .createWithDefault(false)
+
   val UI_X_XSS_PROTECTION = ConfigBuilder("spark.ui.xXssProtection")
     .doc("Value for HTTP X-XSS-Protection response header")
     .stringConf
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index b898413ac8d76..d142d22929728 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -106,6 +106,11 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val DRIVER_LOG_ALLOW_EC =
+    ConfigBuilder("spark.driver.log.allowErasureCoding")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val EVENT_LOG_ENABLED = ConfigBuilder("spark.eventLog.enabled")
     .booleanConf
     .createWithDefault(false)
@@ -243,7 +248,8 @@ package object config {
     .createWithDefault(false)
 
   private[spark] val MEMORY_OFFHEAP_SIZE = ConfigBuilder("spark.memory.offHeap.size")
-    .doc("The absolute amount of memory in bytes which can be used for off-heap allocation. " +
+    .doc("The absolute amount of memory which can be used for off-heap allocation, " +
+      " in bytes unless otherwise specified. " +
       "This setting has no impact on heap memory usage, so if your executors' total memory " +
       "consumption must fit within some hard limit then be sure to shrink your JVM heap size " +
       "accordingly. This must be set to a positive value when spark.memory.offHeap.enabled=true.")
@@ -1026,7 +1032,7 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
-  private[spark] val SHUFFLE_UNDAFE_FAST_MERGE_ENABLE =
+  private[spark] val SHUFFLE_UNSAFE_FAST_MERGE_ENABLE =
     ConfigBuilder("spark.shuffle.unsafe.fastMergeEnabled")
       .doc("Whether to perform a fast spill merge.")
       .booleanConf
@@ -1047,6 +1053,14 @@ package object config {
       .checkValue(v => v > 0, "The value should be a positive integer.")
       .createWithDefault(2000)
 
+  private[spark] val SHUFFLE_USE_OLD_FETCH_PROTOCOL =
+    ConfigBuilder("spark.shuffle.useOldFetchProtocol")
+      .doc("Whether to use the old protocol while doing the shuffle block fetching. " +
+        "It is only enabled while we need the compatibility in the scenario of new Spark " +
+        "version job fetching shuffle blocks from old version external shuffle service.")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val MEMORY_MAP_LIMIT_FOR_TESTS =
     ConfigBuilder("spark.storage.memoryMapLimitForTests")
       .internal()
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index adbd59c9f03b4..5205a2d568ac3 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -44,6 +44,10 @@ trait CompressionCodec {
 
   def compressedOutputStream(s: OutputStream): OutputStream
 
+  private[spark] def compressedContinuousOutputStream(s: OutputStream): OutputStream = {
+    compressedOutputStream(s)
+  }
+
   def compressedInputStream(s: InputStream): InputStream
 
   private[spark] def compressedContinuousInputStream(s: InputStream): InputStream = {
@@ -220,6 +224,12 @@ class ZStdCompressionCodec(conf: SparkConf) extends CompressionCodec {
     new BufferedOutputStream(new ZstdOutputStream(s, level), bufferSize)
   }
 
+  override private[spark] def compressedContinuousOutputStream(s: OutputStream) = {
+    // SPARK-29322: Set "closeFrameOnFlush" to 'true' to let continuous input stream not being
+    // stuck on reading open frame.
+    new BufferedOutputStream(new ZstdOutputStream(s, level).setCloseFrameOnFlush(true), bufferSize)
+  }
+
   override def compressedInputStream(s: InputStream): InputStream = {
     // Wrap the zstd input stream in a buffered input stream so that we can
     // avoid overhead excessive of JNI call while trying to uncompress small amount of data.
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
index b6be8aaefd351..d98d5e3b81aa0 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
@@ -38,7 +38,7 @@ private[spark] class MetricsConfig(conf: SparkConf) extends Logging {
   private[metrics] val properties = new Properties()
   private[metrics] var perInstanceSubProperties: mutable.HashMap[String, Properties] = null
 
-  private def setDefaultProperties(prop: Properties) {
+  private def setDefaultProperties(prop: Properties): Unit = {
     prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
     prop.setProperty("*.sink.servlet.path", "/metrics/json")
     prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
@@ -49,7 +49,7 @@ private[spark] class MetricsConfig(conf: SparkConf) extends Logging {
    * Load properties from various places, based on precedence
    * If the same property is set again latter on in the method, it overwrites the previous value
    */
-  def initialize() {
+  def initialize(): Unit = {
     // Add default properties in case there's no properties file
     setDefaultProperties(properties)
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index c96640a6fab3f..ead8fde3e0872 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -28,7 +28,7 @@ import org.eclipse.jetty.servlet.ServletContextHandler
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.metrics.sink.{MetricsServlet, Sink}
+import org.apache.spark.metrics.sink.{MetricsServlet, PrometheusServlet, Sink}
 import org.apache.spark.metrics.source.{Source, StaticSources}
 import org.apache.spark.util.Utils
 
@@ -83,18 +83,20 @@ private[spark] class MetricsSystem private (
 
   // Treat MetricsServlet as a special sink as it should be exposed to add handlers to web ui
   private var metricsServlet: Option[MetricsServlet] = None
+  private var prometheusServlet: Option[PrometheusServlet] = None
 
   /**
    * Get any UI handlers used by this metrics system; can only be called after start().
    */
   def getServletHandlers: Array[ServletContextHandler] = {
     require(running, "Can only call getServletHandlers on a running MetricsSystem")
-    metricsServlet.map(_.getHandlers(conf)).getOrElse(Array())
+    metricsServlet.map(_.getHandlers(conf)).getOrElse(Array()) ++
+      prometheusServlet.map(_.getHandlers(conf)).getOrElse(Array())
   }
 
   metricsConfig.initialize()
 
-  def start(registerStaticSources: Boolean = true) {
+  def start(registerStaticSources: Boolean = true): Unit = {
     require(!running, "Attempting to start a MetricsSystem that is already running")
     running = true
     if (registerStaticSources) {
@@ -105,7 +107,7 @@ private[spark] class MetricsSystem private (
     sinks.foreach(_.start)
   }
 
-  def stop() {
+  def stop(): Unit = {
     if (running) {
       sinks.foreach(_.stop)
     } else {
@@ -114,7 +116,7 @@ private[spark] class MetricsSystem private (
     running = false
   }
 
-  def report() {
+  def report(): Unit = {
     sinks.foreach(_.report())
   }
 
@@ -124,7 +126,7 @@ private[spark] class MetricsSystem private (
    * If either ID is not available, this defaults to just using <source name>.
    *
    * @param source Metric source to be named by this method.
-   * @return An unique metric name for each combination of
+   * @return A unique metric name for each combination of
    *         application, executor/driver and metric source.
    */
   private[spark] def buildRegistryName(source: Source): String = {
@@ -155,7 +157,7 @@ private[spark] class MetricsSystem private (
   def getSourcesByName(sourceName: String): Seq[Source] =
     sources.filter(_.sourceName == sourceName)
 
-  def registerSource(source: Source) {
+  def registerSource(source: Source): Unit = {
     sources += source
     try {
       val regName = buildRegistryName(source)
@@ -165,13 +167,13 @@ private[spark] class MetricsSystem private (
     }
   }
 
-  def removeSource(source: Source) {
+  def removeSource(source: Source): Unit = {
     sources -= source
     val regName = buildRegistryName(source)
     registry.removeMatching((name: String, _: Metric) => name.startsWith(regName))
   }
 
-  private def registerSources() {
+  private def registerSources(): Unit = {
     val instConfig = metricsConfig.getInstance(instance)
     val sourceConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SOURCE_REGEX)
 
@@ -187,7 +189,7 @@ private[spark] class MetricsSystem private (
     }
   }
 
-  private def registerSinks() {
+  private def registerSinks(): Unit = {
     val instConfig = metricsConfig.getInstance(instance)
     val sinkConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SINK_REGEX)
 
@@ -201,6 +203,12 @@ private[spark] class MetricsSystem private (
                 classOf[Properties], classOf[MetricRegistry], classOf[SecurityManager])
               .newInstance(kv._2, registry, securityMgr)
             metricsServlet = Some(servlet)
+          } else if (kv._1 == "prometheusServlet") {
+            val servlet = Utils.classForName[PrometheusServlet](classPath)
+              .getConstructor(
+                classOf[Properties], classOf[MetricRegistry], classOf[SecurityManager])
+              .newInstance(kv._2, registry, securityMgr)
+            prometheusServlet = Some(servlet)
           } else {
             val sink = Utils.classForName[Sink](classPath)
               .getConstructor(
@@ -225,7 +233,7 @@ private[spark] object MetricsSystem {
   private[this] val MINIMAL_POLL_UNIT = TimeUnit.SECONDS
   private[this] val MINIMAL_POLL_PERIOD = 1
 
-  def checkMinimalPollingPeriod(pollUnit: TimeUnit, pollPeriod: Int) {
+  def checkMinimalPollingPeriod(pollUnit: TimeUnit, pollPeriod: Int): Unit = {
     val period = MINIMAL_POLL_UNIT.convert(pollPeriod, pollUnit)
     if (period < MINIMAL_POLL_PERIOD) {
       throw new IllegalArgumentException("Polling period " + pollPeriod + " " + pollUnit +
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
index fce556fd0382c..bfd23168e4003 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
@@ -50,15 +50,15 @@ private[spark] class ConsoleSink(val property: Properties, val registry: MetricR
       .convertRatesTo(TimeUnit.SECONDS)
       .build()
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start(pollPeriod, pollUnit)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() {
+  override def report(): Unit = {
     reporter.report()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
index 88bba2fdbd1c6..579b8e0c0e984 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
@@ -59,15 +59,15 @@ private[spark] class CsvSink(val property: Properties, val registry: MetricRegis
       .convertRatesTo(TimeUnit.SECONDS)
       .build(new File(pollDir))
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start(pollPeriod, pollUnit)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() {
+  override def report(): Unit = {
     reporter.report()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
index 05d553ed30ff0..6ce64cd3543fe 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/GraphiteSink.scala
@@ -89,15 +89,15 @@ private[spark] class GraphiteSink(val property: Properties, val registry: Metric
       .filter(filter)
       .build(graphite)
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start(pollPeriod, pollUnit)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() {
+  override def report(): Unit = {
     reporter.report()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
index 1992b42ac7f6b..9e94a868ccc36 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
@@ -28,14 +28,14 @@ private[spark] class JmxSink(val property: Properties, val registry: MetricRegis
 
   val reporter: JmxReporter = JmxReporter.forRegistry(registry).build()
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start()
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() { }
+  override def report(): Unit = { }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
index bea24ca7807e4..7dd27d4fb9bf3 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
@@ -59,9 +59,9 @@ private[spark] class MetricsServlet(
     mapper.writeValueAsString(registry)
   }
 
-  override def start() { }
+  override def start(): Unit = { }
 
-  override def stop() { }
+  override def stop(): Unit = { }
 
-  override def report() { }
+  override def report(): Unit = { }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala
new file mode 100644
index 0000000000000..7c33bce78378d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import java.util.Properties
+import javax.servlet.http.HttpServletRequest
+
+import com.codahale.metrics.MetricRegistry
+import org.eclipse.jetty.servlet.ServletContextHandler
+
+import org.apache.spark.{SecurityManager, SparkConf}
+import org.apache.spark.ui.JettyUtils._
+
+/**
+ * This exposes the metrics of the given registry with Prometheus format.
+ *
+ * The output is consistent with /metrics/json result in terms of item ordering
+ * and with the previous result of Spark JMX Sink + Prometheus JMX Converter combination
+ * in terms of key string format.
+ */
+private[spark] class PrometheusServlet(
+    val property: Properties,
+    val registry: MetricRegistry,
+    securityMgr: SecurityManager)
+  extends Sink {
+
+  val SERVLET_KEY_PATH = "path"
+
+  val servletPath = property.getProperty(SERVLET_KEY_PATH)
+
+  def getHandlers(conf: SparkConf): Array[ServletContextHandler] = {
+    Array[ServletContextHandler](
+      createServletHandler(servletPath,
+        new ServletParams(request => getMetricsSnapshot(request), "text/plain"), conf)
+    )
+  }
+
+  def getMetricsSnapshot(request: HttpServletRequest): String = {
+    import scala.collection.JavaConverters._
+
+    val sb = new StringBuilder()
+    registry.getGauges.asScala.foreach { case (k, v) =>
+      if (!v.getValue.isInstanceOf[String]) {
+        sb.append(s"${normalizeKey(k)}Value ${v.getValue}\n")
+      }
+    }
+    registry.getCounters.asScala.foreach { case (k, v) =>
+      sb.append(s"${normalizeKey(k)}Count ${v.getCount}\n")
+    }
+    registry.getHistograms.asScala.foreach { case (k, h) =>
+      val snapshot = h.getSnapshot
+      val prefix = normalizeKey(k)
+      sb.append(s"${prefix}Count ${h.getCount}\n")
+      sb.append(s"${prefix}Max ${snapshot.getMax}\n")
+      sb.append(s"${prefix}Mean ${snapshot.getMean}\n")
+      sb.append(s"${prefix}Min ${snapshot.getMin}\n")
+      sb.append(s"${prefix}50thPercentile ${snapshot.getMedian}\n")
+      sb.append(s"${prefix}75thPercentile ${snapshot.get75thPercentile}\n")
+      sb.append(s"${prefix}95thPercentile ${snapshot.get95thPercentile}\n")
+      sb.append(s"${prefix}98thPercentile ${snapshot.get98thPercentile}\n")
+      sb.append(s"${prefix}99thPercentile ${snapshot.get99thPercentile}\n")
+      sb.append(s"${prefix}999thPercentile ${snapshot.get999thPercentile}\n")
+      sb.append(s"${prefix}StdDev ${snapshot.getStdDev}\n")
+    }
+    registry.getMeters.entrySet.iterator.asScala.foreach { kv =>
+      val prefix = normalizeKey(kv.getKey)
+      val meter = kv.getValue
+      sb.append(s"${prefix}Count ${meter.getCount}\n")
+      sb.append(s"${prefix}MeanRate ${meter.getMeanRate}\n")
+      sb.append(s"${prefix}OneMinuteRate ${meter.getOneMinuteRate}\n")
+      sb.append(s"${prefix}FiveMinuteRate ${meter.getFiveMinuteRate}\n")
+      sb.append(s"${prefix}FifteenMinuteRate ${meter.getFifteenMinuteRate}\n")
+    }
+    registry.getTimers.entrySet.iterator.asScala.foreach { kv =>
+      val prefix = normalizeKey(kv.getKey)
+      val timer = kv.getValue
+      val snapshot = timer.getSnapshot
+      sb.append(s"${prefix}Count ${timer.getCount}\n")
+      sb.append(s"${prefix}Max ${snapshot.getMax}\n")
+      sb.append(s"${prefix}Mean ${snapshot.getMax}\n")
+      sb.append(s"${prefix}Min ${snapshot.getMin}\n")
+      sb.append(s"${prefix}50thPercentile ${snapshot.getMedian}\n")
+      sb.append(s"${prefix}75thPercentile ${snapshot.get75thPercentile}\n")
+      sb.append(s"${prefix}95thPercentile ${snapshot.get95thPercentile}\n")
+      sb.append(s"${prefix}98thPercentile ${snapshot.get98thPercentile}\n")
+      sb.append(s"${prefix}99thPercentile ${snapshot.get99thPercentile}\n")
+      sb.append(s"${prefix}999thPercentile ${snapshot.get999thPercentile}\n")
+      sb.append(s"${prefix}StdDev ${snapshot.getStdDev}\n")
+      sb.append(s"${prefix}FifteenMinuteRate ${timer.getFifteenMinuteRate}\n")
+      sb.append(s"${prefix}FiveMinuteRate ${timer.getFiveMinuteRate}\n")
+      sb.append(s"${prefix}OneMinuteRate ${timer.getOneMinuteRate}\n")
+      sb.append(s"${prefix}MeanRate ${timer.getMeanRate}\n")
+    }
+    sb.toString()
+  }
+
+  private def normalizeKey(key: String): String = {
+    s"metrics_${key.replaceAll("[^a-zA-Z0-9]", "_")}_"
+  }
+
+  override def start(): Unit = { }
+
+  override def stop(): Unit = { }
+
+  override def report(): Unit = { }
+}
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala
index 7fa4ba7622980..968d5ca809e72 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/Slf4jSink.scala
@@ -53,15 +53,15 @@ private[spark] class Slf4jSink(
     .convertRatesTo(TimeUnit.SECONDS)
     .build()
 
-  override def start() {
+  override def start(): Unit = {
     reporter.start(pollPeriod, pollUnit)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reporter.stop()
   }
 
-  override def report() {
+  override def report(): Unit = {
     reporter.report()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 1d27fe7db193f..ffb696029a033 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -116,7 +116,8 @@ private[spark] class NettyBlockTransferService(
     logTrace(s"Fetch blocks from $host:$port (executor id $execId)")
     try {
       val blockFetchStarter = new RetryingBlockFetcher.BlockFetchStarter {
-        override def createAndStart(blockIds: Array[String], listener: BlockFetchingListener) {
+        override def createAndStart(blockIds: Array[String],
+            listener: BlockFetchingListener): Unit = {
           try {
             val client = clientFactory.createClient(host, port)
             new OneForOneBlockFetcher(client, appId, execId, blockIds, listener,
diff --git a/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
index b089bbd7e972e..34c04f4025a96 100644
--- a/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
+++ b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
@@ -43,7 +43,7 @@ private[spark] class ApproximateActionListener[T, U, R](
   var failure: Option[Exception] = None             // Set if the job has failed (permanently)
   var resultObject: Option[PartialResult[R]] = None // Set if we've already returned a PartialResult
 
-  override def taskSucceeded(index: Int, result: Any) {
+  override def taskSucceeded(index: Int, result: Any): Unit = {
     synchronized {
       evaluator.merge(index, result.asInstanceOf[U])
       finishedTasks += 1
@@ -56,7 +56,7 @@ private[spark] class ApproximateActionListener[T, U, R](
     }
   }
 
-  override def jobFailed(exception: Exception) {
+  override def jobFailed(exception: Exception): Unit = {
     synchronized {
       failure = Some(exception)
       this.notifyAll()
diff --git a/core/src/main/scala/org/apache/spark/partial/PartialResult.scala b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
index 25cb7490aa9c9..012d4769617f6 100644
--- a/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
+++ b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
@@ -61,7 +61,7 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
    * Set a handler to be called if this PartialResult's job fails. Only one failure handler
    * is supported per PartialResult.
    */
-  def onFail(handler: Exception => Unit) {
+  def onFail(handler: Exception => Unit): Unit = {
     synchronized {
       if (failureHandler.isDefined) {
         throw new UnsupportedOperationException("onFail cannot be called twice")
@@ -85,7 +85,7 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
        override def onComplete(handler: T => Unit): PartialResult[T] = synchronized {
          PartialResult.this.onComplete(handler.compose(f)).map(f)
        }
-      override def onFail(handler: Exception => Unit) {
+      override def onFail(handler: Exception => Unit): Unit = {
         synchronized {
           PartialResult.this.onFail(handler)
         }
@@ -100,7 +100,7 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
     }
   }
 
-  private[spark] def setFinalValue(value: R) {
+  private[spark] def setFinalValue(value: R): Unit = {
     synchronized {
       if (finalValue.isDefined) {
         throw new UnsupportedOperationException("setFinalValue called twice on a PartialResult")
@@ -115,7 +115,7 @@ class PartialResult[R](initialVal: R, isFinal: Boolean) {
 
   private def getFinalValueInternal() = finalValue
 
-  private[spark] def setFailure(exception: Exception) {
+  private[spark] def setFailure(exception: Exception): Unit = {
     synchronized {
       if (failure.isDefined) {
         throw new UnsupportedOperationException("setFailure called twice on a PartialResult")
diff --git a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
index 23cf19d55b4ae..a5c3e2a2dfe2a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
@@ -61,7 +61,7 @@ class BlockRDD[T: ClassTag](sc: SparkContext, @transient val blockIds: Array[Blo
    * irreversible operation, as the data in the blocks cannot be recovered back
    * once removed. Use it with caution.
    */
-  private[spark] def removeBlocks() {
+  private[spark] def removeBlocks(): Unit = {
     blockIds.foreach { blockId =>
       sparkContext.env.blockManager.master.removeBlock(blockId)
     }
@@ -77,7 +77,7 @@ class BlockRDD[T: ClassTag](sc: SparkContext, @transient val blockIds: Array[Blo
   }
 
   /** Check if this BlockRDD is valid. If not valid, exception is thrown. */
-  private[spark] def assertValid() {
+  private[spark] def assertValid(): Unit = {
     if (!isValid) {
       throw new SparkException(
         "Attempted to use %s after its blocks have been removed!".format(toString))
diff --git a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
index 57108dcedcf0c..fddd35b657479 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
@@ -85,7 +85,7 @@ class CartesianRDD[T: ClassTag, U: ClassTag](
     }
   )
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 909f58512153b..500d306f336ac 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -187,7 +187,7 @@ class CoGroupedRDD[K: ClassTag](
       createCombiner, mergeValue, mergeCombiners)
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdds = null
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
index 55c141c2b8a0a..58a0c0c400e09 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -107,7 +107,7 @@ private[spark] class CoalescedRDD[T: ClassTag](
     })
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     prev = null
   }
@@ -239,7 +239,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
    * locations (2 * n log(n))
    * @param targetLen The number of desired partition groups
    */
-  def setupGroups(targetLen: Int, partitionLocs: PartitionLocations) {
+  def setupGroups(targetLen: Int, partitionLocs: PartitionLocations): Unit = {
     // deal with empty case, just create targetLen partition groups with no preferred location
     if (partitionLocs.partsWithLocs.isEmpty) {
       (1 to targetLen).foreach(_ => groupArr += new PartitionGroup())
@@ -328,7 +328,7 @@ private class DefaultPartitionCoalescer(val balanceSlack: Double = 0.10)
   def throwBalls(
       maxPartitions: Int,
       prev: RDD[_],
-      balanceSlack: Double, partitionLocs: PartitionLocations) {
+      balanceSlack: Double, partitionLocs: PartitionLocations): Unit = {
     if (noLocality) {  // no preferredLocations in parent RDD, no randomization needed
       if (maxPartitions > groupArr.size) { // just return prev.partitions
         for ((p, i) <- prev.partitions.zipWithIndex) {
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index f3f9be3562922..ff4928dae6bf8 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -375,7 +375,7 @@ class HadoopRDD[K, V](
     locs.getOrElse(hsplit.getLocations.filter(_ != "localhost"))
   }
 
-  override def checkpoint() {
+  override def checkpoint(): Unit = {
     // Do nothing. Hadoop RDD should not be checkpointed.
   }
 
@@ -412,7 +412,7 @@ private[spark] object HadoopRDD extends Logging {
 
   /** Add Hadoop configuration specific to a single partition and attempt. */
   def addLocalConfiguration(jobTrackerId: String, jobId: Int, splitId: Int, attemptId: Int,
-                            conf: JobConf) {
+                            conf: JobConf): Unit = {
     val jobID = new JobID(jobTrackerId, jobId)
     val taId = new TaskAttemptID(new TaskID(jobID, TaskType.MAP, splitId), attemptId)
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index 56ef3e107a980..fccabcdd169c6 100644
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -109,7 +109,7 @@ class JdbcRDD[T: ClassTag](
       }
     }
 
-    override def close() {
+    override def close(): Unit = {
       try {
         if (null != rs) {
           rs.close()
diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
index aa61997122cf4..39520a9734b06 100644
--- a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
@@ -51,7 +51,7 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
   override def compute(split: Partition, context: TaskContext): Iterator[U] =
     f(context, split.index, firstParent[T].iterator(split, context))
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     prev = null
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index e23133682360f..1e39e10856877 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -261,7 +261,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     } else {
       StratifiedSamplingUtils.getBernoulliSamplingFunction(self, fractions, false, seed)
     }
-    self.mapPartitionsWithIndex(samplingFunc, preservesPartitioning = true)
+    self.mapPartitionsWithIndex(samplingFunc, preservesPartitioning = true, isOrderSensitive = true)
   }
 
   /**
@@ -291,7 +291,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     } else {
       StratifiedSamplingUtils.getBernoulliSamplingFunction(self, fractions, true, seed)
     }
-    self.mapPartitionsWithIndex(samplingFunc, preservesPartitioning = true)
+    self.mapPartitionsWithIndex(samplingFunc, preservesPartitioning = true, isOrderSensitive = true)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
index d744d67592545..965618ee827d1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
@@ -101,7 +101,7 @@ class PartitionerAwareUnionRDD[T: ClassTag](
     }
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdds = null
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
index 15691a8fc8eaa..c8cdaa60e4335 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionwiseSampledRDD.scala
@@ -67,4 +67,12 @@ private[spark] class PartitionwiseSampledRDD[T: ClassTag, U: ClassTag](
     thisSampler.setSeed(split.seed)
     thisSampler.sample(firstParent[T].iterator(split.prev, context))
   }
+
+  override protected def getOutputDeterministicLevel = {
+    if (prev.outputDeterministicLevel == DeterministicLevel.UNORDERED) {
+      DeterministicLevel.INDETERMINATE
+    } else {
+      super.getOutputDeterministicLevel
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index eafe3b17c2136..08fc309d5238e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -430,8 +430,6 @@ abstract class RDD[T: ClassTag](
    *
    * If you are decreasing the number of partitions in this RDD, consider using `coalesce`,
    * which can avoid performing a shuffle.
-   *
-   * TODO Fix the Shuffle+Repartition data loss issue described in SPARK-23207.
    */
   def repartition(numPartitions: Int)(implicit ord: Ordering[T] = null): RDD[T] = withScope {
     coalesce(numPartitions, shuffle = true)
@@ -557,7 +555,7 @@ abstract class RDD[T: ClassTag](
       val sampler = new BernoulliCellSampler[T](lb, ub)
       sampler.setSeed(seed + index)
       sampler.sample(partition)
-    }, preservesPartitioning = true)
+    }, isOrderSensitive = true, preservesPartitioning = true)
   }
 
   /**
@@ -870,6 +868,29 @@ abstract class RDD[T: ClassTag](
       preservesPartitioning)
   }
 
+  /**
+   * Return a new RDD by applying a function to each partition of this RDD, while tracking the index
+   * of the original partition.
+   *
+   * `preservesPartitioning` indicates whether the input function preserves the partitioner, which
+   * should be `false` unless this is a pair RDD and the input function doesn't modify the keys.
+   *
+   * `isOrderSensitive` indicates whether the function is order-sensitive. If it is order
+   * sensitive, it may return totally different result when the input order
+   * is changed. Mostly stateful functions are order-sensitive.
+   */
+  private[spark] def mapPartitionsWithIndex[U: ClassTag](
+      f: (Int, Iterator[T]) => Iterator[U],
+      preservesPartitioning: Boolean,
+      isOrderSensitive: Boolean): RDD[U] = withScope {
+    val cleanedF = sc.clean(f)
+    new MapPartitionsRDD(
+      this,
+      (_: TaskContext, index: Int, iter: Iterator[T]) => cleanedF(index, iter),
+      preservesPartitioning,
+      isOrderSensitive = isOrderSensitive)
+  }
+
   /**
    * Zips this RDD with another one, returning key-value pairs with the first element in each RDD,
    * second element in each RDD, etc. Assumes that the two RDDs have the *same number of
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
index d165610291f1d..2caf9761b4432 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
@@ -166,7 +166,7 @@ private[spark] object ReliableCheckpointRDD extends Logging {
   def writePartitionToCheckpointFile[T: ClassTag](
       path: String,
       broadcastedConf: Broadcast[SerializableConfiguration],
-      blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]) {
+      blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]): Unit = {
     val env = SparkEnv.get
     val outputDir = new Path(path)
     val fs = outputDir.getFileSystem(broadcastedConf.value.value)
diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index 5ec99b7f4f3ab..0930a5c9cfb96 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -108,7 +108,7 @@ class ShuffledRDD[K: ClassTag, V: ClassTag, C: ClassTag](
       .asInstanceOf[Iterator[(K, C)]]
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     prev = null
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
index 42d190377f104..d5a811d4dc3fd 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
@@ -127,7 +127,7 @@ private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag](
     map.asScala.iterator.map(t => t._2.iterator.map((t._1, _))).flatten
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
diff --git a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index 36589e93a1c5e..63fa3c2487c33 100644
--- a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -21,6 +21,7 @@ import java.io.{IOException, ObjectOutputStream}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.parallel.ForkJoinTaskSupport
+import scala.collection.parallel.immutable.ParVector
 import scala.reflect.ClassTag
 
 import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
@@ -75,13 +76,13 @@ class UnionRDD[T: ClassTag](
 
   override def getPartitions: Array[Partition] = {
     val parRDDs = if (isPartitionListingParallel) {
-      val parArray = rdds.par
+      val parArray = new ParVector(rdds.toVector)
       parArray.tasksupport = UnionRDD.partitionEvalTaskSupport
       parArray
     } else {
       rdds
     }
-    val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum)
+    val array = new Array[Partition](parRDDs.map(_.partitions.length).sum)
     var pos = 0
     for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) {
       array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index)
@@ -108,7 +109,7 @@ class UnionRDD[T: ClassTag](
   override def getPreferredLocations(s: Partition): Seq[String] =
     s.asInstanceOf[UnionPartition[T]].preferredLocations()
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdds = null
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
index 3cb1231bd3477..678a48948a3c1 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
@@ -70,7 +70,7 @@ private[spark] abstract class ZippedPartitionsBaseRDD[V: ClassTag](
     s.asInstanceOf[ZippedPartitionsPartition].preferredLocations
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdds = null
   }
@@ -89,7 +89,7 @@ private[spark] class ZippedPartitionsRDD2[A: ClassTag, B: ClassTag, V: ClassTag]
     f(rdd1.iterator(partitions(0), context), rdd2.iterator(partitions(1), context))
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
@@ -114,7 +114,7 @@ private[spark] class ZippedPartitionsRDD3
       rdd3.iterator(partitions(2), context))
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
@@ -142,7 +142,7 @@ private[spark] class ZippedPartitionsRDD4
       rdd4.iterator(partitions(3), context))
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     rdd1 = null
     rdd2 = null
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 9df59459ca799..81e0543ccefef 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -29,8 +29,6 @@ import scala.collection.mutable.{HashMap, HashSet, ListBuffer}
 import scala.concurrent.duration._
 import scala.util.control.NonFatal
 
-import org.apache.commons.lang3.SerializationUtils
-
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
@@ -229,7 +227,7 @@ private[spark] class DAGScheduler(
   /**
    * Called by the TaskSetManager to report task's starting.
    */
-  def taskStarted(task: Task[_], taskInfo: TaskInfo) {
+  def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {
     eventProcessLoop.post(BeginEvent(task, taskInfo))
   }
 
@@ -237,7 +235,7 @@ private[spark] class DAGScheduler(
    * Called by the TaskSetManager to report that a task has completed
    * and results are being fetched remotely.
    */
-  def taskGettingResult(taskInfo: TaskInfo) {
+  def taskGettingResult(taskInfo: TaskInfo): Unit = {
     eventProcessLoop.post(GettingResultEvent(taskInfo))
   }
 
@@ -560,7 +558,7 @@ private[spark] class DAGScheduler(
     // caused by recursively visiting
     val waitingForVisit = new ListBuffer[RDD[_]]
     waitingForVisit += stage.rdd
-    def visit(rdd: RDD[_]) {
+    def visit(rdd: RDD[_]): Unit = {
       if (!visited(rdd)) {
         visited += rdd
         val rddHasUncachedPartitions = getCacheLocs(rdd).contains(Nil)
@@ -591,7 +589,7 @@ private[spark] class DAGScheduler(
    */
   private def updateJobIdStageIdMaps(jobId: Int, stage: Stage): Unit = {
     @tailrec
-    def updateJobIdStageIdMapsList(stages: List[Stage]) {
+    def updateJobIdStageIdMapsList(stages: List[Stage]): Unit = {
       if (stages.nonEmpty) {
         val s = stages.head
         s.jobIds += jobId
@@ -622,7 +620,7 @@ private[spark] class DAGScheduler(
               "Job %d not registered for stage %d even though that stage was registered for the job"
               .format(job.jobId, stageId))
           } else {
-            def removeStage(stageId: Int) {
+            def removeStage(stageId: Int): Unit = {
               // data structures based on Stage
               for (stage <- stageIdToStage.get(stageId)) {
                 if (runningStages.contains(stage)) {
@@ -698,7 +696,7 @@ private[spark] class DAGScheduler(
     if (partitions.isEmpty) {
       val time = clock.getTimeMillis()
       listenerBus.post(
-        SparkListenerJobStart(jobId, time, Seq[StageInfo](), SerializationUtils.clone(properties)))
+        SparkListenerJobStart(jobId, time, Seq[StageInfo](), Utils.cloneProperties(properties)))
       listenerBus.post(
         SparkListenerJobEnd(jobId, time, JobSucceeded))
       // Return immediately if the job is running 0 tasks
@@ -710,7 +708,7 @@ private[spark] class DAGScheduler(
     val waiter = new JobWaiter[U](this, jobId, partitions.size, resultHandler)
     eventProcessLoop.post(JobSubmitted(
       jobId, rdd, func2, partitions.toArray, callSite, waiter,
-      SerializationUtils.clone(properties)))
+      Utils.cloneProperties(properties)))
     waiter
   }
 
@@ -782,7 +780,7 @@ private[spark] class DAGScheduler(
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     eventProcessLoop.post(JobSubmitted(
       jobId, rdd, func2, rdd.partitions.indices.toArray, callSite, listener,
-      SerializationUtils.clone(properties)))
+      Utils.cloneProperties(properties)))
     listener.awaitResult()    // Will throw an exception if the job fails
   }
 
@@ -819,7 +817,7 @@ private[spark] class DAGScheduler(
       this, jobId, 1,
       (_: Int, r: MapOutputStatistics) => callback(r))
     eventProcessLoop.post(MapStageSubmitted(
-      jobId, dependency, callSite, waiter, SerializationUtils.clone(properties)))
+      jobId, dependency, callSite, waiter, Utils.cloneProperties(properties)))
     waiter
   }
 
@@ -846,7 +844,7 @@ private[spark] class DAGScheduler(
     eventProcessLoop.post(AllJobsCancelled)
   }
 
-  private[scheduler] def doCancelAllJobs() {
+  private[scheduler] def doCancelAllJobs(): Unit = {
     // Cancel all running jobs.
     runningStages.map(_.firstJobId).foreach(handleJobCancellation(_,
       Option("as part of cancellation of all jobs")))
@@ -857,7 +855,7 @@ private[spark] class DAGScheduler(
   /**
    * Cancel all jobs associated with a running or scheduled stage.
    */
-  def cancelStage(stageId: Int, reason: Option[String]) {
+  def cancelStage(stageId: Int, reason: Option[String]): Unit = {
     eventProcessLoop.post(StageCancelled(stageId, reason))
   }
 
@@ -874,7 +872,7 @@ private[spark] class DAGScheduler(
    * Resubmit any failed stages. Ordinarily called after a small amount of time has passed since
    * the last fetch failure.
    */
-  private[scheduler] def resubmitFailedStages() {
+  private[scheduler] def resubmitFailedStages(): Unit = {
     if (failedStages.nonEmpty) {
       // Failed stages may be removed by job cancellation, so failed might be empty even if
       // the ResubmitFailedStages event has been scheduled.
@@ -893,7 +891,7 @@ private[spark] class DAGScheduler(
    * Submits stages that depend on the given parent stage. Called when the parent stage completes
    * successfully.
    */
-  private def submitWaitingChildStages(parent: Stage) {
+  private def submitWaitingChildStages(parent: Stage): Unit = {
     logTrace(s"Checking if any dependencies of $parent are now runnable")
     logTrace("running: " + runningStages)
     logTrace("waiting: " + waitingStages)
@@ -915,7 +913,7 @@ private[spark] class DAGScheduler(
     jobsThatUseStage.find(jobIdToActiveJob.contains)
   }
 
-  private[scheduler] def handleJobGroupCancelled(groupId: String) {
+  private[scheduler] def handleJobGroupCancelled(groupId: String): Unit = {
     // Cancel all jobs belonging to this job group.
     // First finds all active jobs with this group id, and then kill stages for them.
     val activeInGroup = activeJobs.filter { activeJob =>
@@ -928,7 +926,7 @@ private[spark] class DAGScheduler(
         Option("part of cancelled job group %s".format(groupId))))
   }
 
-  private[scheduler] def handleBeginEvent(task: Task[_], taskInfo: TaskInfo) {
+  private[scheduler] def handleBeginEvent(task: Task[_], taskInfo: TaskInfo): Unit = {
     // Note that there is a chance that this task is launched after the stage is cancelled.
     // In that case, we wouldn't have the stage anymore in stageIdToStage.
     val stageAttemptId =
@@ -947,7 +945,7 @@ private[spark] class DAGScheduler(
     stageIdToStage.get(taskSet.stageId).foreach { abortStage(_, reason, exception) }
   }
 
-  private[scheduler] def cleanUpAfterSchedulerStop() {
+  private[scheduler] def cleanUpAfterSchedulerStop(): Unit = {
     for (job <- activeJobs) {
       val error =
         new SparkException(s"Job ${job.jobId} cancelled because SparkContext was shut down")
@@ -965,7 +963,7 @@ private[spark] class DAGScheduler(
     }
   }
 
-  private[scheduler] def handleGetTaskResult(taskInfo: TaskInfo) {
+  private[scheduler] def handleGetTaskResult(taskInfo: TaskInfo): Unit = {
     listenerBus.post(SparkListenerTaskGettingResult(taskInfo))
   }
 
@@ -975,7 +973,7 @@ private[spark] class DAGScheduler(
       partitions: Array[Int],
       callSite: CallSite,
       listener: JobListener,
-      properties: Properties) {
+      properties: Properties): Unit = {
     var finalStage: ResultStage = null
     try {
       // New stage creation may throw an exception if, for example, jobs are run on a
@@ -1039,7 +1037,7 @@ private[spark] class DAGScheduler(
       dependency: ShuffleDependency[_, _, _],
       callSite: CallSite,
       listener: JobListener,
-      properties: Properties) {
+      properties: Properties): Unit = {
     // Submitting this map stage might still require the creation of some parent stages, so make
     // sure that happens.
     var finalStage: ShuffleMapStage = null
@@ -1079,7 +1077,7 @@ private[spark] class DAGScheduler(
   }
 
   /** Submits stage, but first recursively submits any missing parents. */
-  private def submitStage(stage: Stage) {
+  private def submitStage(stage: Stage): Unit = {
     val jobId = activeJobForStage(stage)
     if (jobId.isDefined) {
       logDebug("submitStage(" + stage + ")")
@@ -1102,10 +1100,19 @@ private[spark] class DAGScheduler(
   }
 
   /** Called when stage's parents are available and we can now do its task. */
-  private def submitMissingTasks(stage: Stage, jobId: Int) {
+  private def submitMissingTasks(stage: Stage, jobId: Int): Unit = {
     logDebug("submitMissingTasks(" + stage + ")")
 
-    // First figure out the indexes of partition ids to compute.
+    // Before find missing partition, do the intermediate state clean work first.
+    // The operation here can make sure for the partially completed intermediate stage,
+    // `findMissingPartitions()` returns all partitions every time.
+    stage match {
+      case sms: ShuffleMapStage if stage.isIndeterminate && !sms.isAvailable =>
+        mapOutputTracker.unregisterAllMapOutput(sms.shuffleDep.shuffleId)
+      case _ =>
+    }
+
+    // Figure out the indexes of partition ids to compute.
     val partitionsToCompute: Seq[Int] = stage.findMissingPartitions()
 
     // Use the scheduling pool, job group, description, etc. from an ActiveJob associated
@@ -1346,7 +1353,7 @@ private[spark] class DAGScheduler(
    * Responds to a task finishing. This is called inside the event loop so it assumes that it can
    * modify the scheduler's internal state. Use taskEnded() to post a task end event from outside.
    */
-  private[scheduler] def handleTaskCompletion(event: CompletionEvent) {
+  private[scheduler] def handleTaskCompletion(event: CompletionEvent): Unit = {
     val task = event.task
     val stageId = task.stageId
 
@@ -1500,7 +1507,7 @@ private[spark] class DAGScheduler(
             }
         }
 
-      case FetchFailed(bmAddress, shuffleId, mapId, _, failureMessage) =>
+      case FetchFailed(bmAddress, shuffleId, _, mapIndex, _, failureMessage) =>
         val failedStage = stageIdToStage(task.stageId)
         val mapStage = shuffleIdToMapStage(shuffleId)
 
@@ -1523,17 +1530,17 @@ private[spark] class DAGScheduler(
             markStageAsFinished(failedStage, errorMessage = Some(failureMessage),
               willRetry = !shouldAbortStage)
           } else {
-            logDebug(s"Received fetch failure from $task, but its from $failedStage which is no " +
-              s"longer running")
+            logDebug(s"Received fetch failure from $task, but it's from $failedStage which is no " +
+              "longer running")
           }
 
           if (mapStage.rdd.isBarrier()) {
             // Mark all the map as broken in the map stage, to ensure retry all the tasks on
             // resubmitted stage attempt.
             mapOutputTracker.unregisterAllMapOutput(shuffleId)
-          } else if (mapId != -1) {
+          } else if (mapIndex != -1) {
             // Mark the map whose fetch failed as broken in the map stage
-            mapOutputTracker.unregisterMapOutput(shuffleId, mapId, bmAddress)
+            mapOutputTracker.unregisterMapOutput(shuffleId, mapIndex, bmAddress)
           }
 
           if (failedStage.rdd.isBarrier()) {
@@ -1575,7 +1582,7 @@ private[spark] class DAGScheduler(
               // Note that, if map stage is UNORDERED, we are fine. The shuffle partitioner is
               // guaranteed to be determinate, so the input data of the reducers will not change
               // even if the map tasks are re-tried.
-              if (mapStage.rdd.outputDeterministicLevel == DeterministicLevel.INDETERMINATE) {
+              if (mapStage.isIndeterminate) {
                 // It's a little tricky to find all the succeeding stages of `mapStage`, because
                 // each stage only know its parents not children. Here we traverse the stages from
                 // the leaf nodes (the result stages of active jobs), and rollback all the stages
@@ -1603,15 +1610,22 @@ private[spark] class DAGScheduler(
 
                 activeJobs.foreach(job => collectStagesToRollback(job.finalStage :: Nil))
 
+                // The stages will be rolled back after checking
+                val rollingBackStages = HashSet[Stage](mapStage)
                 stagesToRollback.foreach {
                   case mapStage: ShuffleMapStage =>
                     val numMissingPartitions = mapStage.findMissingPartitions().length
                     if (numMissingPartitions < mapStage.numTasks) {
-                      // TODO: support to rollback shuffle files.
-                      // Currently the shuffle writing is "first write wins", so we can't re-run a
-                      // shuffle map stage and overwrite existing shuffle files. We have to finish
-                      // SPARK-8029 first.
-                      abortStage(mapStage, generateErrorMessage(mapStage), None)
+                      if (sc.getConf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)) {
+                        val reason = "A shuffle map stage with indeterminate output was failed " +
+                          "and retried. However, Spark can only do this while using the new " +
+                          "shuffle block fetching protocol. Please check the config " +
+                          "'spark.shuffle.useOldFetchProtocol', see more detail in " +
+                          "SPARK-27665 and SPARK-25341."
+                        abortStage(mapStage, reason, None)
+                      } else {
+                        rollingBackStages += mapStage
+                      }
                     }
 
                   case resultStage: ResultStage if resultStage.activeJob.isDefined =>
@@ -1623,6 +1637,9 @@ private[spark] class DAGScheduler(
 
                   case _ =>
                 }
+                logInfo(s"The shuffle map stage $mapStage with indeterminate output was failed, " +
+                  s"we will roll back and rerun below stages which include itself and all its " +
+                  s"indeterminate child stages: $rollingBackStages")
               }
 
               // We expect one executor failure to trigger many FetchFailures in rapid succession,
@@ -1862,7 +1879,7 @@ private[spark] class DAGScheduler(
     clearCacheLocs()
   }
 
-  private[scheduler] def handleExecutorAdded(execId: String, host: String) {
+  private[scheduler] def handleExecutorAdded(execId: String, host: String): Unit = {
     // remove from failedEpoch(execId) ?
     if (failedEpoch.contains(execId)) {
       logInfo("Host added was in lost list earlier: " + host)
@@ -1870,7 +1887,7 @@ private[spark] class DAGScheduler(
     }
   }
 
-  private[scheduler] def handleStageCancellation(stageId: Int, reason: Option[String]) {
+  private[scheduler] def handleStageCancellation(stageId: Int, reason: Option[String]): Unit = {
     stageIdToStage.get(stageId) match {
       case Some(stage) =>
         val jobsThatUseStage: Array[Int] = stage.jobIds.toArray
@@ -1888,7 +1905,7 @@ private[spark] class DAGScheduler(
     }
   }
 
-  private[scheduler] def handleJobCancellation(jobId: Int, reason: Option[String]) {
+  private[scheduler] def handleJobCancellation(jobId: Int, reason: Option[String]): Unit = {
     if (!jobIdToStageIds.contains(jobId)) {
       logDebug("Trying to cancel unregistered job " + jobId)
     } else {
@@ -2010,7 +2027,7 @@ private[spark] class DAGScheduler(
     // caused by recursively visiting
     val waitingForVisit = new ListBuffer[RDD[_]]
     waitingForVisit += stage.rdd
-    def visit(rdd: RDD[_]) {
+    def visit(rdd: RDD[_]): Unit = {
       if (!visitedRdds(rdd)) {
         visitedRdds += rdd
         for (dep <- rdd.dependencies) {
@@ -2103,7 +2120,7 @@ private[spark] class DAGScheduler(
     listenerBus.post(SparkListenerJobEnd(job.jobId, clock.getTimeMillis(), JobSucceeded))
   }
 
-  def stop() {
+  def stop(): Unit = {
     messageScheduler.shutdownNow()
     eventProcessLoop.stop()
     taskScheduler.stop()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index 48eb2da3015f8..a0a4428dc7f55 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -67,7 +67,6 @@ private[spark] class EventLoggingListener(
   private val shouldCompress = sparkConf.get(EVENT_LOG_COMPRESS)
   private val shouldOverwrite = sparkConf.get(EVENT_LOG_OVERWRITE)
   private val shouldLogBlockUpdates = sparkConf.get(EVENT_LOG_BLOCK_UPDATES)
-  private val shouldAllowECLogs = sparkConf.get(EVENT_LOG_ALLOW_EC)
   private val shouldLogStageExecutorMetrics = sparkConf.get(EVENT_LOG_STAGE_EXECUTOR_METRICS)
   private val testing = sparkConf.get(EVENT_LOG_TESTING)
   private val outputBufferSize = sparkConf.get(EVENT_LOG_OUTPUT_BUFFER_SIZE).toInt
@@ -100,7 +99,7 @@ private[spark] class EventLoggingListener(
   /**
    * Creates the log file in the configured log directory.
    */
-  def start() {
+  def start(): Unit = {
     if (!fileSystem.getFileStatus(new Path(logBaseDir)).isDirectory) {
       throw new IllegalArgumentException(s"Log directory $logBaseDir is not a directory.")
     }
@@ -121,21 +120,19 @@ private[spark] class EventLoggingListener(
       if ((isDefaultLocal && uri.getScheme == null) || uri.getScheme == "file") {
         new FileOutputStream(uri.getPath)
       } else {
-        hadoopDataStream = Some(if (shouldAllowECLogs) {
-          fileSystem.create(path)
-        } else {
-          SparkHadoopUtil.createNonECFile(fileSystem, path)
-        })
+        hadoopDataStream = Some(
+          SparkHadoopUtil.createFile(fileSystem, path, sparkConf.get(EVENT_LOG_ALLOW_EC)))
         hadoopDataStream.get
       }
 
     try {
-      val cstream = compressionCodec.map(_.compressedOutputStream(dstream)).getOrElse(dstream)
+      val cstream = compressionCodec.map(_.compressedContinuousOutputStream(dstream))
+        .getOrElse(dstream)
       val bstream = new BufferedOutputStream(cstream, outputBufferSize)
 
       EventLoggingListener.initEventLog(bstream, testing, loggedEvents)
       fileSystem.setPermission(path, LOG_FILE_PERMISSIONS)
-      writer = Some(new PrintWriter(bstream))
+      writer = Some(new PrintWriter(new OutputStreamWriter(bstream, StandardCharsets.UTF_8)))
       logInfo("Logging events to %s".format(logPath))
     } catch {
       case e: Exception =>
@@ -145,7 +142,7 @@ private[spark] class EventLoggingListener(
   }
 
   /** Log the event as JSON. */
-  private def logEvent(event: SparkListenerEvent, flushLogger: Boolean = false) {
+  private def logEvent(event: SparkListenerEvent, flushLogger: Boolean = false): Unit = {
     val eventJson = JsonProtocol.sparkEventToJson(event)
     // scalastyle:off println
     writer.foreach(_.println(compact(render(eventJson))))
diff --git a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
index 66ab9a52b7781..2d26a314e7a62 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
@@ -64,7 +64,7 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl
     case _ => false
   }
 
-  private def validate() {
+  private def validate(): Unit = {
     logDebug("validate InputFormatInfo : " + inputFormatClazz + ", path  " + path)
 
     try {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
index 65d7184231e24..feed831620840 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
@@ -49,7 +49,7 @@ private[spark] class JobWaiter[T](
    * asynchronously. After the low level scheduler cancels all the tasks belonging to this job, it
    * will fail this job with a SparkException.
    */
-  def cancel() {
+  def cancel(): Unit = {
     dagScheduler.cancelJob(jobId, None)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
index 302ebd30da228..bbbddd86cad39 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
@@ -186,6 +186,17 @@ private[spark] class LiveListenerBus(conf: SparkConf) {
     metricsSystem.registerSource(metrics)
   }
 
+  /**
+   * For testing only. Wait until there are no more events in the queue, or until the default
+   * wait time has elapsed. Throw `TimeoutException` if the specified time elapsed before the queue
+   * emptied.
+   * Exposed for testing.
+   */
+  @throws(classOf[TimeoutException])
+  private[spark] def waitUntilEmpty(): Unit = {
+    waitUntilEmpty(TimeUnit.SECONDS.toMillis(10))
+  }
+
   /**
    * For testing only. Wait until there are no more events in the queue, or until the specified
    * time has elapsed. Throw `TimeoutException` if the specified time elapsed before the queue
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index 64f0a060a247c..c9d37c985d211 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -43,6 +43,11 @@ private[spark] sealed trait MapStatus {
    * necessary for correctness, since block fetchers are allowed to skip zero-size blocks.
    */
   def getSizeForBlock(reduceId: Int): Long
+
+  /**
+   * The unique ID of this shuffle map task, we use taskContext.taskAttemptId to fill this.
+   */
+  def mapTaskId: Long
 }
 
 
@@ -56,11 +61,14 @@ private[spark] object MapStatus {
     .map(_.conf.get(config.SHUFFLE_MIN_NUM_PARTS_TO_HIGHLY_COMPRESS))
     .getOrElse(config.SHUFFLE_MIN_NUM_PARTS_TO_HIGHLY_COMPRESS.defaultValue.get)
 
-  def apply(loc: BlockManagerId, uncompressedSizes: Array[Long]): MapStatus = {
+  def apply(
+      loc: BlockManagerId,
+      uncompressedSizes: Array[Long],
+      mapTaskId: Long): MapStatus = {
     if (uncompressedSizes.length > minPartitionsToUseHighlyCompressMapStatus) {
-      HighlyCompressedMapStatus(loc, uncompressedSizes)
+      HighlyCompressedMapStatus(loc, uncompressedSizes, mapTaskId)
     } else {
-      new CompressedMapStatus(loc, uncompressedSizes)
+      new CompressedMapStatus(loc, uncompressedSizes, mapTaskId)
     }
   }
 
@@ -100,16 +108,19 @@ private[spark] object MapStatus {
  *
  * @param loc location where the task is being executed.
  * @param compressedSizes size of the blocks, indexed by reduce partition id.
+ * @param _mapTaskId unique task id for the task
  */
 private[spark] class CompressedMapStatus(
     private[this] var loc: BlockManagerId,
-    private[this] var compressedSizes: Array[Byte])
+    private[this] var compressedSizes: Array[Byte],
+    private[this] var _mapTaskId: Long)
   extends MapStatus with Externalizable {
 
-  protected def this() = this(null, null.asInstanceOf[Array[Byte]])  // For deserialization only
+  // For deserialization only
+  protected def this() = this(null, null.asInstanceOf[Array[Byte]], -1)
 
-  def this(loc: BlockManagerId, uncompressedSizes: Array[Long]) {
-    this(loc, uncompressedSizes.map(MapStatus.compressSize))
+  def this(loc: BlockManagerId, uncompressedSizes: Array[Long], mapTaskId: Long) {
+    this(loc, uncompressedSizes.map(MapStatus.compressSize), mapTaskId)
   }
 
   override def location: BlockManagerId = loc
@@ -118,10 +129,13 @@ private[spark] class CompressedMapStatus(
     MapStatus.decompressSize(compressedSizes(reduceId))
   }
 
+  override def mapTaskId: Long = _mapTaskId
+
   override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     loc.writeExternal(out)
     out.writeInt(compressedSizes.length)
     out.write(compressedSizes)
+    out.writeLong(_mapTaskId)
   }
 
   override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
@@ -129,6 +143,7 @@ private[spark] class CompressedMapStatus(
     val len = in.readInt()
     compressedSizes = new Array[Byte](len)
     in.readFully(compressedSizes)
+    _mapTaskId = in.readLong()
   }
 }
 
@@ -142,20 +157,23 @@ private[spark] class CompressedMapStatus(
  * @param emptyBlocks a bitmap tracking which blocks are empty
  * @param avgSize average size of the non-empty and non-huge blocks
  * @param hugeBlockSizes sizes of huge blocks by their reduceId.
+ * @param _mapTaskId unique task id for the task
  */
 private[spark] class HighlyCompressedMapStatus private (
     private[this] var loc: BlockManagerId,
     private[this] var numNonEmptyBlocks: Int,
     private[this] var emptyBlocks: RoaringBitmap,
     private[this] var avgSize: Long,
-    private[this] var hugeBlockSizes: scala.collection.Map[Int, Byte])
+    private[this] var hugeBlockSizes: scala.collection.Map[Int, Byte],
+    private[this] var _mapTaskId: Long)
   extends MapStatus with Externalizable {
 
   // loc could be null when the default constructor is called during deserialization
-  require(loc == null || avgSize > 0 || hugeBlockSizes.size > 0 || numNonEmptyBlocks == 0,
+  require(loc == null || avgSize > 0 || hugeBlockSizes.size > 0
+    || numNonEmptyBlocks == 0 || _mapTaskId > 0,
     "Average size can only be zero for map stages that produced no output")
 
-  protected def this() = this(null, -1, null, -1, null)  // For deserialization only
+  protected def this() = this(null, -1, null, -1, null, -1)  // For deserialization only
 
   override def location: BlockManagerId = loc
 
@@ -171,6 +189,8 @@ private[spark] class HighlyCompressedMapStatus private (
     }
   }
 
+  override def mapTaskId: Long = _mapTaskId
+
   override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
     loc.writeExternal(out)
     emptyBlocks.writeExternal(out)
@@ -180,6 +200,7 @@ private[spark] class HighlyCompressedMapStatus private (
       out.writeInt(kv._1)
       out.writeByte(kv._2)
     }
+    out.writeLong(_mapTaskId)
   }
 
   override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
@@ -195,11 +216,15 @@ private[spark] class HighlyCompressedMapStatus private (
       hugeBlockSizesImpl(block) = size
     }
     hugeBlockSizes = hugeBlockSizesImpl
+    _mapTaskId = in.readLong()
   }
 }
 
 private[spark] object HighlyCompressedMapStatus {
-  def apply(loc: BlockManagerId, uncompressedSizes: Array[Long]): HighlyCompressedMapStatus = {
+  def apply(
+      loc: BlockManagerId,
+      uncompressedSizes: Array[Long],
+      mapTaskId: Long): HighlyCompressedMapStatus = {
     // We must keep track of which blocks are empty so that we don't report a zero-sized
     // block as being non-empty (or vice-versa) when using the average block size.
     var i = 0
@@ -240,6 +265,6 @@ private[spark] object HighlyCompressedMapStatus {
     emptyBlocks.trim()
     emptyBlocks.runOptimize()
     new HighlyCompressedMapStatus(loc, numNonEmptyBlocks, emptyBlocks, avgSize,
-      hugeBlockSizes)
+      hugeBlockSizes, mapTaskId)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
index f4b0ab10155a2..80805df256a15 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Pool.scala
@@ -59,14 +59,14 @@ private[spark] class Pool(
     }
   }
 
-  override def addSchedulable(schedulable: Schedulable) {
+  override def addSchedulable(schedulable: Schedulable): Unit = {
     require(schedulable != null)
     schedulableQueue.add(schedulable)
     schedulableNameToSchedulable.put(schedulable.name, schedulable)
     schedulable.parent = this
   }
 
-  override def removeSchedulable(schedulable: Schedulable) {
+  override def removeSchedulable(schedulable: Schedulable): Unit = {
     schedulableQueue.remove(schedulable)
     schedulableNameToSchedulable.remove(schedulable.name)
   }
@@ -84,7 +84,7 @@ private[spark] class Pool(
     null
   }
 
-  override def executorLost(executorId: String, host: String, reason: ExecutorLossReason) {
+  override def executorLost(executorId: String, host: String, reason: ExecutorLossReason): Unit = {
     schedulableQueue.asScala.foreach(_.executorLost(executorId, host, reason))
   }
 
@@ -106,14 +106,14 @@ private[spark] class Pool(
     sortedTaskSetQueue
   }
 
-  def increaseRunningTasks(taskNum: Int) {
+  def increaseRunningTasks(taskNum: Int): Unit = {
     runningTasks += taskNum
     if (parent != null) {
       parent.increaseRunningTasks(taskNum)
     }
   }
 
-  def decreaseRunningTasks(taskNum: Int) {
+  def decreaseRunningTasks(taskNum: Int): Unit = {
     runningTasks -= taskNum
     if (parent != null) {
       parent.decreaseRunningTasks(taskNum)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 226c23733c870..699042dd967bc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler
 
 import java.io.{EOFException, InputStream, IOException}
 
-import scala.io.Source
+import scala.io.{Codec, Source}
 
 import com.fasterxml.jackson.core.JsonParseException
 import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException
@@ -54,7 +54,7 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
       sourceName: String,
       maybeTruncated: Boolean = false,
       eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {
-    val lines = Source.fromInputStream(logData).getLines()
+    val lines = Source.fromInputStream(logData)(Codec.UTF8).getLines()
     replay(lines, sourceName, maybeTruncated, eventsFilter)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
index c85c74f2fb973..8f6a22177a5b8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
@@ -45,11 +45,11 @@ private[spark] trait SchedulableBuilder {
 private[spark] class FIFOSchedulableBuilder(val rootPool: Pool)
   extends SchedulableBuilder with Logging {
 
-  override def buildPools() {
+  override def buildPools(): Unit = {
     // nothing
   }
 
-  override def addTaskSetManager(manager: Schedulable, properties: Properties) {
+  override def addTaskSetManager(manager: Schedulable, properties: Properties): Unit = {
     rootPool.addSchedulable(manager)
   }
 }
@@ -70,7 +70,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
   val DEFAULT_MINIMUM_SHARE = 0
   val DEFAULT_WEIGHT = 1
 
-  override def buildPools() {
+  override def buildPools(): Unit = {
     var fileData: Option[(InputStream, String)] = None
     try {
       fileData = schedulerAllocFile.map { f =>
@@ -106,7 +106,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
     buildDefaultPool()
   }
 
-  private def buildDefaultPool() {
+  private def buildDefaultPool(): Unit = {
     if (rootPool.getSchedulableByName(DEFAULT_POOL_NAME) == null) {
       val pool = new Pool(DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE,
         DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
@@ -116,7 +116,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
     }
   }
 
-  private def buildFairSchedulerPool(is: InputStream, fileName: String) {
+  private def buildFairSchedulerPool(is: InputStream, fileName: String): Unit = {
     val xml = XML.load(is)
     for (poolNode <- (xml \\ POOLS_PROPERTY)) {
 
@@ -180,7 +180,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, conf: SparkConf)
     }
   }
 
-  override def addTaskSetManager(manager: Schedulable, properties: Properties) {
+  override def addTaskSetManager(manager: Schedulable, properties: Properties): Unit = {
     val poolName = if (properties != null) {
         properties.getProperty(FAIR_SCHEDULER_PROPERTIES, DEFAULT_POOL_NAME)
       } else {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 710f5eb211dde..06e5d8ab0302a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -91,7 +91,7 @@ private[spark] class ShuffleMapTask(
 
     val rdd = rddAndDep._1
     val dep = rddAndDep._2
-    dep.shuffleWriterProcessor.write(rdd, dep, partitionId, context, partition)
+    dep.shuffleWriterProcessor.write(rdd, dep, context, partition)
   }
 
   override def preferredLocations: Seq[TaskLocation] = preferredLocs
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
index 26cca334d3bd5..a9f72eae71368 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.HashSet
 
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.Logging
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{DeterministicLevel, RDD}
 import org.apache.spark.util.CallSite
 
 /**
@@ -116,4 +116,8 @@ private[scheduler] abstract class Stage(
 
   /** Returns the sequence of partition ids that are missing (i.e. needs to be computed). */
   def findMissingPartitions(): Seq[Int]
+
+  def isIndeterminate: Boolean = {
+    rdd.outputDeterministicLevel == DeterministicLevel.INDETERMINATE
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
index e3216151462bd..fdc50328b43d8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
@@ -52,7 +52,7 @@ class StageInfo(
    */
   val accumulables = HashMap[Long, AccumulableInfo]()
 
-  def stageFailed(reason: String) {
+  def stageFailed(reason: String): Unit = {
     failureReason = Some(reason)
     completionTime = Some(System.currentTimeMillis)
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala b/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
index 3c7af4f6146fa..ca48775e77f27 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
@@ -36,7 +36,7 @@ class StatsReportListener extends SparkListener with Logging {
 
   private val taskInfoMetrics = mutable.Buffer[(TaskInfo, TaskMetrics)]()
 
-  override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+  override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
     val info = taskEnd.taskInfo
     val metrics = taskEnd.taskMetrics
     if (info != null && metrics != null) {
@@ -44,7 +44,7 @@ class StatsReportListener extends SparkListener with Logging {
     }
   }
 
-  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) {
+  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
     implicit val sc = stageCompleted
     this.logInfo(s"Finished stage: ${getStatusDetail(stageCompleted.stageInfo)}")
     showMillisDistribution("task runtime:", (info, _) => info.duration, taskInfoMetrics)
@@ -108,7 +108,7 @@ private[spark] object StatsReportListener extends Logging {
       (info, metric) => { getMetric(info, metric).toDouble })
   }
 
-  def showDistribution(heading: String, d: Distribution, formatNumber: Double => String) {
+  def showDistribution(heading: String, d: Distribution, formatNumber: Double => String): Unit = {
     val stats = d.statCounter
     val quantiles = d.getQuantiles(probabilities).map(formatNumber)
     logInfo(heading + stats)
@@ -119,11 +119,11 @@ private[spark] object StatsReportListener extends Logging {
   def showDistribution(
       heading: String,
       dOpt: Option[Distribution],
-      formatNumber: Double => String) {
+      formatNumber: Double => String): Unit = {
     dOpt.foreach { d => showDistribution(heading, d, formatNumber)}
   }
 
-  def showDistribution(heading: String, dOpt: Option[Distribution], format: String) {
+  def showDistribution(heading: String, dOpt: Option[Distribution], format: String): Unit = {
     def f(d: Double): String = format.format(d)
     showDistribution(heading, dOpt, f _)
   }
@@ -132,26 +132,26 @@ private[spark] object StatsReportListener extends Logging {
       heading: String,
       format: String,
       getMetric: (TaskInfo, TaskMetrics) => Double,
-      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]) {
+      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]): Unit = {
     showDistribution(heading, extractDoubleDistribution(taskInfoMetrics, getMetric), format)
   }
 
   def showBytesDistribution(
       heading: String,
       getMetric: (TaskInfo, TaskMetrics) => Long,
-      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]) {
+      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]): Unit = {
     showBytesDistribution(heading, extractLongDistribution(taskInfoMetrics, getMetric))
   }
 
-  def showBytesDistribution(heading: String, dOpt: Option[Distribution]) {
+  def showBytesDistribution(heading: String, dOpt: Option[Distribution]): Unit = {
     dOpt.foreach { dist => showBytesDistribution(heading, dist) }
   }
 
-  def showBytesDistribution(heading: String, dist: Distribution) {
+  def showBytesDistribution(heading: String, dist: Distribution): Unit = {
     showDistribution(heading, dist, (d => Utils.bytesToString(d.toLong)): Double => String)
   }
 
-  def showMillisDistribution(heading: String, dOpt: Option[Distribution]) {
+  def showMillisDistribution(heading: String, dOpt: Option[Distribution]): Unit = {
     showDistribution(heading, dOpt,
       (d => StatsReportListener.millisToString(d.toLong)): Double => String)
   }
@@ -159,7 +159,7 @@ private[spark] object StatsReportListener extends Logging {
   def showMillisDistribution(
       heading: String,
       getMetric: (TaskInfo, TaskMetrics) => Long,
-      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]) {
+      taskInfoMetrics: Seq[(TaskInfo, TaskMetrics)]): Unit = {
     showMillisDistribution(heading, extractLongDistribution(taskInfoMetrics, getMetric))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 01828f860bd5e..ebc1c05435fee 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -225,7 +225,7 @@ private[spark] abstract class Task[T](
    * be called multiple times.
    * If interruptThread is true, we will also call Thread.interrupt() on the Task's executor thread.
    */
-  def kill(interruptThread: Boolean, reason: String) {
+  def kill(interruptThread: Boolean, reason: String): Unit = {
     require(reason != null)
     _reasonIfKilled = reason
     if (context != null) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
index 9843eab4f1346..921562bd15dae 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
@@ -70,11 +70,11 @@ class TaskInfo(
 
   var killed = false
 
-  private[spark] def markGettingResult(time: Long) {
+  private[spark] def markGettingResult(time: Long): Unit = {
     gettingResultTime = time
   }
 
-  private[spark] def markFinished(state: TaskState, time: Long) {
+  private[spark] def markFinished(state: TaskState, time: Long): Unit = {
     // finishTime should be set larger than 0, otherwise "finished" below will return false.
     assert(time > 0)
     finishTime = time
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 9b7f901c55e00..6c3d2a4ee3125 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -64,6 +64,9 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
           val (result, size) = serializer.get().deserialize[TaskResult[_]](serializedData) match {
             case directResult: DirectTaskResult[_] =>
               if (!taskSetManager.canFetchMoreResults(serializedData.limit())) {
+                // kill the task so that it will not become zombie task
+                scheduler.handleFailedTask(taskSetManager, tid, TaskState.KILLED, TaskKilled(
+                  "Tasks result size has exceeded maxResultSize"))
                 return
               }
               // deserialize "value" without holding any lock so that it won't block other threads.
@@ -75,6 +78,9 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
               if (!taskSetManager.canFetchMoreResults(size)) {
                 // dropped by executor if size is larger than maxResultSize
                 sparkEnv.blockManager.master.removeBlock(blockId)
+                // kill the task so that it will not become zombie task
+                scheduler.handleFailedTask(taskSetManager, tid, TaskState.KILLED, TaskKilled(
+                  "Tasks result size has exceeded maxResultSize"))
                 return
               }
               logDebug("Fetching indirect task result for TID %s".format(tid))
@@ -125,7 +131,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
   }
 
   def enqueueFailedTask(taskSetManager: TaskSetManager, tid: Long, taskState: TaskState,
-    serializedData: ByteBuffer) {
+    serializedData: ByteBuffer): Unit = {
     var reason : TaskFailedReason = UnknownReason
     try {
       getTaskResultExecutor.execute(() => Utils.logUncaughtExceptions {
@@ -164,7 +170,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
     })
   }
 
-  def stop() {
+  def stop(): Unit = {
     getTaskResultExecutor.shutdownNow()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index 8c73d563043c2..15f5d20e9be75 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -46,7 +46,7 @@ private[spark] trait TaskScheduler {
   // Invoked after system has successfully initialized (typically in spark context).
   // Yarn uses this to bootstrap allocation of resources based on preferred locations,
   // wait for slave registrations, etc.
-  def postStartHook() { }
+  def postStartHook(): Unit = { }
 
   // Disconnect from the cluster.
   def stop(): Unit
@@ -72,7 +72,7 @@ private[spark] trait TaskScheduler {
 
   // Notify the corresponding `TaskSetManager`s of the stage, that a partition has already completed
   // and they can skip running tasks for it.
-  def notifyPartitionCompletion(stageId: Int, partitionId: Int)
+  def notifyPartitionCompletion(stageId: Int, partitionId: Int): Unit
 
   // Set the DAG scheduler for upcalls. This is guaranteed to be set before submitTasks is called.
   def setDAGScheduler(dagScheduler: DAGScheduler): Unit
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 1496dff31a4dc..f25a36c7af22a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -170,11 +170,11 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
-  override def setDAGScheduler(dagScheduler: DAGScheduler) {
+  override def setDAGScheduler(dagScheduler: DAGScheduler): Unit = {
     this.dagScheduler = dagScheduler
   }
 
-  def initialize(backend: SchedulerBackend) {
+  def initialize(backend: SchedulerBackend): Unit = {
     this.backend = backend
     schedulableBuilder = {
       schedulingMode match {
@@ -192,7 +192,7 @@ private[spark] class TaskSchedulerImpl(
 
   def newTaskId(): Long = nextTaskId.getAndIncrement()
 
-  override def start() {
+  override def start(): Unit = {
     backend.start()
 
     if (!isLocal && conf.get(SPECULATION_ENABLED)) {
@@ -203,11 +203,11 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
-  override def postStartHook() {
+  override def postStartHook(): Unit = {
     waitBackendReady()
   }
 
-  override def submitTasks(taskSet: TaskSet) {
+  override def submitTasks(taskSet: TaskSet): Unit = {
     val tasks = taskSet.tasks
     logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
     this.synchronized {
@@ -233,7 +233,7 @@ private[spark] class TaskSchedulerImpl(
 
       if (!isLocal && !hasReceivedTask) {
         starvationTimer.scheduleAtFixedRate(new TimerTask() {
-          override def run() {
+          override def run(): Unit = {
             if (!hasLaunchedTask) {
               logWarning("Initial job has not accepted any resources; " +
                 "check your cluster UI to ensure that workers are registered " +
@@ -430,7 +430,6 @@ private[spark] class TaskSchedulerImpl(
     val tasks = shuffledOffers.map(o => new ArrayBuffer[TaskDescription](o.cores / CPUS_PER_TASK))
     val availableResources = shuffledOffers.map(_.resources).toArray
     val availableCpus = shuffledOffers.map(o => o.cores).toArray
-    val availableSlots = shuffledOffers.map(o => o.cores / CPUS_PER_TASK).sum
     val sortedTaskSets = rootPool.getSortedTaskSetQueue
     for (taskSet <- sortedTaskSets) {
       logDebug("parentName: %s, name: %s, runningTasks: %s".format(
@@ -444,6 +443,7 @@ private[spark] class TaskSchedulerImpl(
     // of locality levels so that it gets a chance to launch local tasks on all of them.
     // NOTE: the preferredLocality order: PROCESS_LOCAL, NODE_LOCAL, NO_PREF, RACK_LOCAL, ANY
     for (taskSet <- sortedTaskSets) {
+      val availableSlots = availableCpus.map(c => c / CPUS_PER_TASK).sum
       // Skip the barrier taskSet if the available slots are less than the number of pending tasks.
       if (taskSet.isBarrier && availableSlots < taskSet.numTasks) {
         // Skip the launch process.
@@ -572,7 +572,7 @@ private[spark] class TaskSchedulerImpl(
     Random.shuffle(offers)
   }
 
-  def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+  def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer): Unit = {
     var failedExecutor: Option[String] = None
     var reason: Option[ExecutorLossReason] = None
     synchronized {
@@ -681,7 +681,7 @@ private[spark] class TaskSchedulerImpl(
     })
   }
 
-  def error(message: String) {
+  def error(message: String): Unit = {
     synchronized {
       if (taskSetsByStageIdAndAttempt.nonEmpty) {
         // Have each task set throw a SparkException with the error
@@ -704,7 +704,7 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     speculationScheduler.shutdown()
     if (backend != null) {
       backend.stop()
@@ -722,7 +722,7 @@ private[spark] class TaskSchedulerImpl(
   override def defaultParallelism(): Int = backend.defaultParallelism()
 
   // Check for speculatable tasks in all our active jobs.
-  def checkSpeculatableTasks() {
+  def checkSpeculatableTasks(): Unit = {
     var shouldRevive = false
     synchronized {
       shouldRevive = rootPool.checkSpeculatableTasks(MIN_TIME_TO_SPECULATION)
@@ -798,7 +798,7 @@ private[spark] class TaskSchedulerImpl(
    * reason is not yet known, do not yet remove its association with its host nor update the status
    * of any running tasks, since the loss reason defines whether we'll fail those tasks.
    */
-  private def removeExecutor(executorId: String, reason: ExecutorLossReason) {
+  private def removeExecutor(executorId: String, reason: ExecutorLossReason): Unit = {
     // The tasks on the lost executor may not send any more status updates (because the executor
     // has been lost), so they should be cleaned up here.
     executorIdToRunningTaskIds.remove(executorId).foreach { taskIds =>
@@ -829,7 +829,7 @@ private[spark] class TaskSchedulerImpl(
     blacklistTrackerOpt.foreach(_.handleRemovedExecutor(executorId))
   }
 
-  def executorAdded(execId: String, host: String) {
+  def executorAdded(execId: String, host: String): Unit = {
     dagScheduler.executorAdded(execId, host)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala
index b680979a466a5..4df2889089ee9 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetBlacklist.scala
@@ -69,7 +69,6 @@ private[scheduler] class TaskSetBlacklist(
 
   /**
    * Get the most recent failure reason of this TaskSet.
-   * @return
    */
   def getLatestFailureReason: String = {
     latestFailureReason
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 49bd55e553482..9defbefabb86a 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -474,7 +474,7 @@ private[spark] class TaskSetManager(
     }
   }
 
-  private def maybeFinishTaskSet() {
+  private def maybeFinishTaskSet(): Unit = {
     if (isZombie && runningTasks == 0) {
       sched.taskSetFinished(this)
       if (tasksSuccessful == numTasks) {
@@ -758,7 +758,7 @@ private[spark] class TaskSetManager(
    * Marks the task as failed, re-adds it to the list of pending tasks, and notifies the
    * DAG Scheduler.
    */
-  def handleFailedTask(tid: Long, state: TaskState, reason: TaskFailedReason) {
+  def handleFailedTask(tid: Long, state: TaskState, reason: TaskFailedReason): Unit = {
     val info = taskInfos(tid)
     if (info.failed || info.killed) {
       return
@@ -886,14 +886,14 @@ private[spark] class TaskSetManager(
    *
    * Used to keep track of the number of running tasks, for enforcing scheduling policies.
    */
-  def addRunningTask(tid: Long) {
+  def addRunningTask(tid: Long): Unit = {
     if (runningTasksSet.add(tid) && parent != null) {
       parent.increaseRunningTasks(1)
     }
   }
 
   /** If the given task ID is in the set of running tasks, removes it. */
-  def removeRunningTask(tid: Long) {
+  def removeRunningTask(tid: Long): Unit = {
     if (runningTasksSet.remove(tid) && parent != null) {
       parent.decreaseRunningTasks(1)
     }
@@ -903,9 +903,9 @@ private[spark] class TaskSetManager(
     null
   }
 
-  override def addSchedulable(schedulable: Schedulable) {}
+  override def addSchedulable(schedulable: Schedulable): Unit = {}
 
-  override def removeSchedulable(schedulable: Schedulable) {}
+  override def removeSchedulable(schedulable: Schedulable): Unit = {}
 
   override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
     val sortedTaskSetQueue = new ArrayBuffer[TaskSetManager]()
@@ -914,7 +914,7 @@ private[spark] class TaskSetManager(
   }
 
   /** Called by TaskScheduler when an executor is lost so we can re-enqueue our tasks */
-  override def executorLost(execId: String, host: String, reason: ExecutorLossReason) {
+  override def executorLost(execId: String, host: String, reason: ExecutorLossReason): Unit = {
     // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage,
     // and we are not using an external shuffle server which could serve the shuffle outputs.
     // The reason is the next stage wouldn't be able to fetch the data from this dead executor
@@ -1035,14 +1035,14 @@ private[spark] class TaskSetManager(
     levels.toArray
   }
 
-  def recomputeLocality() {
+  def recomputeLocality(): Unit = {
     val previousLocalityLevel = myLocalityLevels(currentLocalityIndex)
     myLocalityLevels = computeValidLocalityLevels()
     localityWaits = myLocalityLevels.map(getLocalityWait)
     currentLocalityIndex = getLocalityIndex(previousLocalityLevel)
   }
 
-  def executorAdded() {
+  def executorAdded(): Unit = {
     recomputeLocality()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index d81070c362ba6..4958389ae4257 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -68,10 +68,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     conf.get(SCHEDULER_MAX_REGISTERED_RESOURCE_WAITING_TIME))
   private val createTimeNs = System.nanoTime()
 
-  // Accessing `executorDataMap` in `DriverEndpoint.receive/receiveAndReply` doesn't need any
-  // protection. But accessing `executorDataMap` out of `DriverEndpoint.receive/receiveAndReply`
-  // must be protected by `CoarseGrainedSchedulerBackend.this`. Besides, `executorDataMap` should
-  // only be modified in `DriverEndpoint.receive/receiveAndReply` with protection by
+  // Accessing `executorDataMap` in the inherited methods from ThreadSafeRpcEndpoint doesn't need
+  // any protection. But accessing `executorDataMap` out of the inherited methods must be
+  // protected by `CoarseGrainedSchedulerBackend.this`. Besides, `executorDataMap` should only
+  // be modified in the inherited methods from ThreadSafeRpcEndpoint with protection by
   // `CoarseGrainedSchedulerBackend.this`.
   private val executorDataMap = new HashMap[String, ExecutorData]
 
@@ -129,7 +129,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     private val logUrlHandler: ExecutorLogUrlHandler = new ExecutorLogUrlHandler(
       conf.get(UI.CUSTOM_EXECUTOR_LOG_URL))
 
-    override def onStart() {
+    override def onStart(): Unit = {
       // Periodically revive offers to allow delay scheduling to work
       val reviveIntervalMs = conf.get(SCHEDULER_REVIVE_INTERVAL).getOrElse(1000L)
 
@@ -263,7 +263,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
 
     // Make fake resource offers on all executors
-    private def makeOffers() {
+    private def makeOffers(): Unit = {
       // Make sure no executor is killed while some task is launching on it
       val taskDescs = withLock {
         // Filter out executors under killing
@@ -292,7 +292,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
 
     // Make fake resource offers on just one executor
-    private def makeOffers(executorId: String) {
+    private def makeOffers(executorId: String): Unit = {
       // Make sure no executor is killed while some task is launching on it
       val taskDescs = withLock {
         // Filter out executors under killing
@@ -320,7 +320,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
 
     // Launch tasks returned by a set of resource offers
-    private def launchTasks(tasks: Seq[Seq[TaskDescription]]) {
+    private def launchTasks(tasks: Seq[Seq[TaskDescription]]): Unit = {
       for (task <- tasks.flatten) {
         val serializedTask = TaskDescription.encode(task)
         if (serializedTask.limit() >= maxRpcMessageSize) {
@@ -420,19 +420,21 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
   protected def minRegisteredRatio: Double = _minRegisteredRatio
 
-  override def start() {
+  override def start(): Unit = {
     if (UserGroupInformation.isSecurityEnabled()) {
       delegationTokenManager = createTokenManager()
       delegationTokenManager.foreach { dtm =>
         val ugi = UserGroupInformation.getCurrentUser()
         val tokens = if (dtm.renewalEnabled) {
           dtm.start()
-        } else if (ugi.hasKerberosCredentials() || SparkHadoopUtil.get.isProxyUser(ugi)) {
+        } else {
           val creds = ugi.getCredentials()
           dtm.obtainDelegationTokens(creds)
-          SparkHadoopUtil.get.serialize(creds)
-        } else {
-          null
+          if (creds.numberOfTokens() > 0 || creds.numberOfSecretKeys() > 0) {
+            SparkHadoopUtil.get.serialize(creds)
+          } else {
+            null
+          }
         }
         if (tokens != null) {
           updateDelegationTokens(tokens)
@@ -443,7 +445,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
   protected def createDriverEndpoint(): DriverEndpoint = new DriverEndpoint()
 
-  def stopExecutors() {
+  def stopExecutors(): Unit = {
     try {
       if (driverEndpoint != null) {
         logInfo("Shutting down all executors")
@@ -455,7 +457,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     reviveThread.shutdownNow()
     stopExecutors()
     delegationTokenManager.foreach(_.stop())
@@ -488,12 +490,12 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
   }
 
-  override def reviveOffers() {
+  override def reviveOffers(): Unit = {
     driverEndpoint.send(ReviveOffers)
   }
 
   override def killTask(
-      taskId: Long, executorId: String, interruptThread: Boolean, reason: String) {
+      taskId: Long, executorId: String, interruptThread: Boolean, reason: String): Unit = {
     driverEndpoint.send(KillTask(taskId, executorId, interruptThread, reason))
   }
 
@@ -533,9 +535,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   /**
    * Return the number of executors currently registered with this backend.
    */
-  private def numExistingExecutors: Int = executorDataMap.size
+  private def numExistingExecutors: Int = synchronized { executorDataMap.size }
 
-  override def getExecutorIds(): Seq[String] = {
+  override def getExecutorIds(): Seq[String] = synchronized {
     executorDataMap.keySet.toSeq
   }
 
@@ -543,14 +545,15 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     executorDataMap.contains(id) && !executorsPendingToRemove.contains(id)
   }
 
-  override def maxNumConcurrentTasks(): Int = {
+  override def maxNumConcurrentTasks(): Int = synchronized {
     executorDataMap.values.map { executor =>
       executor.totalCores / scheduler.CPUS_PER_TASK
     }.sum
   }
 
   // this function is for testing only
-  def getExecutorAvailableResources(executorId: String): Map[String, ExecutorResourceInfo] = {
+  def getExecutorAvailableResources(
+      executorId: String): Map[String, ExecutorResourceInfo] = synchronized {
     executorDataMap.get(executorId).map(_.resourcesInfo).getOrElse(Map.empty)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 2025a7dc24821..a9b607d8cc38c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -59,7 +59,7 @@ private[spark] class StandaloneSchedulerBackend(
   private val maxCores = conf.get(config.CORES_MAX)
   private val totalExpectedCores = maxCores.getOrElse(0)
 
-  override def start() {
+  override def start(): Unit = {
     super.start()
 
     // SPARK-21159. The scheduler backend should only try to connect to the launcher when in client
@@ -129,21 +129,21 @@ private[spark] class StandaloneSchedulerBackend(
     stop(SparkAppHandle.State.FINISHED)
   }
 
-  override def connected(appId: String) {
+  override def connected(appId: String): Unit = {
     logInfo("Connected to Spark cluster with app ID " + appId)
     this.appId = appId
     notifyContext()
     launcherBackend.setAppId(appId)
   }
 
-  override def disconnected() {
+  override def disconnected(): Unit = {
     notifyContext()
     if (!stopping.get) {
       logWarning("Disconnected from Spark cluster! Waiting for reconnection...")
     }
   }
 
-  override def dead(reason: String) {
+  override def dead(reason: String): Unit = {
     notifyContext()
     if (!stopping.get) {
       launcherBackend.setState(SparkAppHandle.State.KILLED)
@@ -158,13 +158,13 @@ private[spark] class StandaloneSchedulerBackend(
   }
 
   override def executorAdded(fullId: String, workerId: String, hostPort: String, cores: Int,
-    memory: Int) {
+    memory: Int): Unit = {
     logInfo("Granted executor ID %s on hostPort %s with %d core(s), %s RAM".format(
       fullId, hostPort, cores, Utils.megabytesToString(memory)))
   }
 
   override def executorRemoved(
-      fullId: String, message: String, exitStatus: Option[Int], workerLost: Boolean) {
+      fullId: String, message: String, exitStatus: Option[Int], workerLost: Boolean): Unit = {
     val reason: ExecutorLossReason = exitStatus match {
       case Some(code) => ExecutorExited(code, exitCausedByApp = true, message)
       case None => SlaveLost(message, workerLost = workerLost)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
index cbcc5310a59f0..d2c0dc88d987e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
@@ -29,6 +29,7 @@ import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
+import org.apache.spark.util.Utils
 
 private case class ReviveOffers()
 
@@ -54,7 +55,7 @@ private[spark] class LocalEndpoint(
   private var freeCores = totalCores
 
   val localExecutorId = SparkContext.DRIVER_IDENTIFIER
-  val localExecutorHostname = "localhost"
+  val localExecutorHostname = Utils.localCanonicalHostName()
 
   private val executor = new Executor(
     localExecutorId, localExecutorHostname, SparkEnv.get, userClassPath, isLocal = true)
@@ -80,7 +81,7 @@ private[spark] class LocalEndpoint(
       context.reply(true)
   }
 
-  def reviveOffers() {
+  def reviveOffers(): Unit = {
     // local mode doesn't support extra resources like GPUs right now
     val offers = IndexedSeq(new WorkerOffer(localExecutorId, localExecutorHostname, freeCores,
       Some(rpcEnv.address.hostPort)))
@@ -123,7 +124,7 @@ private[spark] class LocalSchedulerBackend(
 
   launcherBackend.connect()
 
-  override def start() {
+  override def start(): Unit = {
     val rpcEnv = SparkEnv.get.rpcEnv
     val executorEndpoint = new LocalEndpoint(rpcEnv, userClassPath, scheduler, this, totalCores)
     localEndpoint = rpcEnv.setupEndpoint("LocalSchedulerBackendEndpoint", executorEndpoint)
@@ -136,11 +137,11 @@ private[spark] class LocalSchedulerBackend(
     launcherBackend.setState(SparkAppHandle.State.RUNNING)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     stop(SparkAppHandle.State.FINISHED)
   }
 
-  override def reviveOffers() {
+  override def reviveOffers(): Unit = {
     localEndpoint.send(ReviveOffers)
   }
 
@@ -148,11 +149,11 @@ private[spark] class LocalSchedulerBackend(
     scheduler.conf.getInt("spark.default.parallelism", totalCores)
 
   override def killTask(
-      taskId: Long, executorId: String, interruptThread: Boolean, reason: String) {
+      taskId: Long, executorId: String, interruptThread: Boolean, reason: String): Unit = {
     localEndpoint.send(KillTask(taskId, interruptThread, reason))
   }
 
-  override def statusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer) {
+  override def statusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer): Unit = {
     localEndpoint.send(StatusUpdate(taskId, state, serializedData))
   }
 
diff --git a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index 70564eeefda88..077b035f3d079 100644
--- a/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -54,8 +54,8 @@ private[spark] class JavaSerializationStream(
     this
   }
 
-  def flush() { objOut.flush() }
-  def close() { objOut.close() }
+  def flush(): Unit = { objOut.flush() }
+  def close(): Unit = { objOut.close() }
 }
 
 private[spark] class JavaDeserializationStream(in: InputStream, loader: ClassLoader)
@@ -74,7 +74,7 @@ private[spark] class JavaDeserializationStream(in: InputStream, loader: ClassLoa
   }
 
   def readObject[T: ClassTag](): T = objIn.readObject().asInstanceOf[T]
-  def close() { objIn.close() }
+  def close(): Unit = { objIn.close() }
 }
 
 private object JavaDeserializationStream {
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 20774c8d999c1..6efb8b35733ef 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -259,14 +259,14 @@ class KryoSerializationStream(
     this
   }
 
-  override def flush() {
+  override def flush(): Unit = {
     if (output == null) {
       throw new IOException("Stream is closed")
     }
     output.flush()
   }
 
-  override def close() {
+  override def close(): Unit = {
     if (output != null) {
       try {
         output.close()
@@ -301,7 +301,7 @@ class KryoDeserializationStream(
     }
   }
 
-  override def close() {
+  override def close(): Unit = {
     if (input != null) {
       try {
         // Kryo's Input automatically closes the input stream it is using.
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
index 5e7a98c8aa89c..75dc3982ab872 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
@@ -303,7 +303,7 @@ private[spark] object SerializationDebugger extends Logging {
 
   /** An output stream that emulates /dev/null */
   private class NullOutputStream extends OutputStream {
-    override def write(b: Int) { }
+    override def write(b: Int): Unit = { }
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index cb8b1cc077637..0c53a84af6e2f 100644
--- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -173,7 +173,7 @@ abstract class DeserializationStream extends Closeable {
       }
     }
 
-    override protected def close() {
+    override protected def close(): Unit = {
       DeserializationStream.this.close()
     }
   }
@@ -193,7 +193,7 @@ abstract class DeserializationStream extends Closeable {
       }
     }
 
-    override protected def close() {
+    override protected def close(): Unit = {
       DeserializationStream.this.close()
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala b/core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala
index 04e4cf88d7063..6fe183c078089 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BaseShuffleHandle.scala
@@ -24,6 +24,5 @@ import org.apache.spark.ShuffleDependency
  */
 private[spark] class BaseShuffleHandle[K, V, C](
     shuffleId: Int,
-    val numMaps: Int,
     val dependency: ShuffleDependency[K, V, C])
   extends ShuffleHandle(shuffleId)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index 4329824b1b627..8a0e84d901c2f 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -47,7 +47,8 @@ private[spark] class BlockStoreShuffleReader[K, C](
       context,
       blockManager.blockStoreClient,
       blockManager,
-      mapOutputTracker.getMapSizesByExecutorId(handle.shuffleId, startPartition, endPartition),
+      mapOutputTracker.getMapSizesByExecutorId(handle.shuffleId, startPartition, endPartition,
+        SparkEnv.get.conf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)),
       serializerManager.wrapStream,
       // Note: we use getSizeAsMb when no suffix is provided for backwards compatibility
       SparkEnv.get.conf.get(config.REDUCER_MAX_SIZE_IN_FLIGHT) * 1024 * 1024,
diff --git a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
index 265a8acfa8d61..6509a04dc4893 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/FetchFailedException.scala
@@ -35,7 +35,8 @@ import org.apache.spark.util.Utils
 private[spark] class FetchFailedException(
     bmAddress: BlockManagerId,
     shuffleId: Int,
-    mapId: Int,
+    mapId: Long,
+    mapIndex: Int,
     reduceId: Int,
     message: String,
     cause: Throwable = null)
@@ -44,10 +45,11 @@ private[spark] class FetchFailedException(
   def this(
       bmAddress: BlockManagerId,
       shuffleId: Int,
-      mapId: Int,
+      mapTaskId: Long,
+      mapIndex: Int,
       reduceId: Int,
       cause: Throwable) {
-    this(bmAddress, shuffleId, mapId, reduceId, cause.getMessage, cause)
+    this(bmAddress, shuffleId, mapTaskId, mapIndex, reduceId, cause.getMessage, cause)
   }
 
   // SPARK-19276. We set the fetch failure in the task context, so that even if there is user-code
@@ -56,8 +58,8 @@ private[spark] class FetchFailedException(
   // because the TaskContext is not defined in some test cases.
   Option(TaskContext.get()).map(_.setFetchFailed(this))
 
-  def toTaskFailedReason: TaskFailedReason = FetchFailed(bmAddress, shuffleId, mapId, reduceId,
-    Utils.exceptionString(this))
+  def toTaskFailedReason: TaskFailedReason = FetchFailed(
+    bmAddress, shuffleId, mapId, mapIndex, reduceId, Utils.exceptionString(this))
 }
 
 /**
@@ -67,4 +69,4 @@ private[spark] class MetadataFetchFailedException(
     shuffleId: Int,
     reduceId: Int,
     message: String)
-  extends FetchFailedException(null, shuffleId, -1, reduceId, message)
+  extends FetchFailedException(null, shuffleId, -1L, -1, reduceId, message)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index d3f1c7ec1bbee..332164a7be3e7 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -51,18 +51,18 @@ private[spark] class IndexShuffleBlockResolver(
 
   private val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle")
 
-  def getDataFile(shuffleId: Int, mapId: Int): File = {
+  def getDataFile(shuffleId: Int, mapId: Long): File = {
     blockManager.diskBlockManager.getFile(ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID))
   }
 
-  private def getIndexFile(shuffleId: Int, mapId: Int): File = {
+  private def getIndexFile(shuffleId: Int, mapId: Long): File = {
     blockManager.diskBlockManager.getFile(ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID))
   }
 
   /**
    * Remove data file and index file that contain the output data from one map.
    */
-  def removeDataByMap(shuffleId: Int, mapId: Int): Unit = {
+  def removeDataByMap(shuffleId: Int, mapId: Long): Unit = {
     var file = getDataFile(shuffleId, mapId)
     if (file.exists()) {
       if (!file.delete()) {
@@ -135,7 +135,7 @@ private[spark] class IndexShuffleBlockResolver(
    */
   def writeIndexFileAndCommit(
       shuffleId: Int,
-      mapId: Int,
+      mapId: Long,
       lengths: Array[Long],
       dataTmp: File): Unit = {
     val indexFile = getIndexFile(shuffleId, mapId)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
index 18a743fbfa6fc..a717ef242ea7c 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
@@ -34,13 +34,12 @@ private[spark] trait ShuffleManager {
    */
   def registerShuffle[K, V, C](
       shuffleId: Int,
-      numMaps: Int,
       dependency: ShuffleDependency[K, V, C]): ShuffleHandle
 
   /** Get a writer for a given partition. Called on executors by map tasks. */
   def getWriter[K, V](
       handle: ShuffleHandle,
-      mapId: Int,
+      mapId: Long,
       context: TaskContext,
       metrics: ShuffleWriteMetricsReporter): ShuffleWriter[K, V]
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShufflePartitionPairsWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/ShufflePartitionPairsWriter.scala
index a988c5e126a76..e0affb858c359 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShufflePartitionPairsWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShufflePartitionPairsWriter.scala
@@ -21,7 +21,7 @@ import java.io.{Closeable, IOException, OutputStream}
 
 import org.apache.spark.serializer.{SerializationStream, SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.api.ShufflePartitionWriter
-import org.apache.spark.storage.BlockId
+import org.apache.spark.storage.{BlockId, TimeTrackingOutputStream}
 import org.apache.spark.util.Utils
 import org.apache.spark.util.collection.PairsWriter
 
@@ -39,6 +39,7 @@ private[spark] class ShufflePartitionPairsWriter(
 
   private var isClosed = false
   private var partitionStream: OutputStream = _
+  private var timeTrackingStream: OutputStream = _
   private var wrappedStream: OutputStream = _
   private var objOut: SerializationStream = _
   private var numRecordsWritten = 0
@@ -59,7 +60,8 @@ private[spark] class ShufflePartitionPairsWriter(
   private def open(): Unit = {
     try {
       partitionStream = partitionWriter.openStream
-      wrappedStream = serializerManager.wrapStream(blockId, partitionStream)
+      timeTrackingStream = new TimeTrackingOutputStream(writeMetrics, partitionStream)
+      wrappedStream = serializerManager.wrapStream(blockId, timeTrackingStream)
       objOut = serializerInstance.serializeStream(wrappedStream)
     } catch {
       case e: Exception =>
@@ -78,6 +80,7 @@ private[spark] class ShufflePartitionPairsWriter(
           // Setting these to null will prevent the underlying streams from being closed twice
           // just in case any stream's close() implementation is not idempotent.
           wrappedStream = null
+          timeTrackingStream = null
           partitionStream = null
         } {
           // Normally closing objOut would close the inner streams as well, but just in case there
@@ -86,9 +89,15 @@ private[spark] class ShufflePartitionPairsWriter(
             wrappedStream = closeIfNonNull(wrappedStream)
             // Same as above - if wrappedStream closes then assume it closes underlying
             // partitionStream and don't close again in the finally
+            timeTrackingStream = null
             partitionStream = null
           } {
-            partitionStream = closeIfNonNull(partitionStream)
+            Utils.tryWithSafeFinally {
+              timeTrackingStream = closeIfNonNull(timeTrackingStream)
+              partitionStream = null
+            } {
+              partitionStream = closeIfNonNull(partitionStream)
+            }
           }
         }
         updateBytesWritten()
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
index 5b0c7e9f2b0b4..f222200a7816c 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
@@ -44,7 +44,6 @@ private[spark] class ShuffleWriteProcessor extends Serializable with Logging {
   def write(
       rdd: RDD[_],
       dep: ShuffleDependency[_, _, _],
-      partitionId: Int,
       context: TaskContext,
       partition: Partition): MapStatus = {
     var writer: ShuffleWriter[Any, Any] = null
@@ -52,7 +51,7 @@ private[spark] class ShuffleWriteProcessor extends Serializable with Logging {
       val manager = SparkEnv.get.shuffleManager
       writer = manager.getWriter[Any, Any](
         dep.shuffleHandle,
-        partitionId,
+        context.taskAttemptId(),
         context,
         createMetricsReporter(context))
       writer.write(
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 2a99c93b32af4..d96bcb3d073df 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -24,6 +24,7 @@ import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.shuffle._
 import org.apache.spark.shuffle.api.{ShuffleDataIO, ShuffleExecutorComponents}
 import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.OpenHashSet
 
 /**
  * In sort-based shuffle, incoming records are sorted according to their target partition ids, then
@@ -79,9 +80,9 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
   }
 
   /**
-   * A mapping from shuffle ids to the number of mappers producing output for those shuffles.
+   * A mapping from shuffle ids to the task ids of mappers producing output for those shuffles.
    */
-  private[this] val numMapsForShuffle = new ConcurrentHashMap[Int, Int]()
+  private[this] val taskIdMapsForShuffle = new ConcurrentHashMap[Int, OpenHashSet[Long]]()
 
   private lazy val shuffleExecutorComponents = loadShuffleExecutorComponents(conf)
 
@@ -92,7 +93,6 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
    */
   override def registerShuffle[K, V, C](
       shuffleId: Int,
-      numMaps: Int,
       dependency: ShuffleDependency[K, V, C]): ShuffleHandle = {
     if (SortShuffleWriter.shouldBypassMergeSort(conf, dependency)) {
       // If there are fewer than spark.shuffle.sort.bypassMergeThreshold partitions and we don't
@@ -101,14 +101,14 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
       // together the spilled files, which would happen with the normal code path. The downside is
       // having multiple files open at a time and thus more memory allocated to buffers.
       new BypassMergeSortShuffleHandle[K, V](
-        shuffleId, numMaps, dependency.asInstanceOf[ShuffleDependency[K, V, V]])
+        shuffleId, dependency.asInstanceOf[ShuffleDependency[K, V, V]])
     } else if (SortShuffleManager.canUseSerializedShuffle(dependency)) {
       // Otherwise, try to buffer map outputs in a serialized form, since this is more efficient:
       new SerializedShuffleHandle[K, V](
-        shuffleId, numMaps, dependency.asInstanceOf[ShuffleDependency[K, V, V]])
+        shuffleId, dependency.asInstanceOf[ShuffleDependency[K, V, V]])
     } else {
       // Otherwise, buffer map outputs in a deserialized form:
-      new BaseShuffleHandle(shuffleId, numMaps, dependency)
+      new BaseShuffleHandle(shuffleId, dependency)
     }
   }
 
@@ -130,29 +130,29 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
   /** Get a writer for a given partition. Called on executors by map tasks. */
   override def getWriter[K, V](
       handle: ShuffleHandle,
-      mapId: Int,
+      mapId: Long,
       context: TaskContext,
       metrics: ShuffleWriteMetricsReporter): ShuffleWriter[K, V] = {
-    numMapsForShuffle.putIfAbsent(
-      handle.shuffleId, handle.asInstanceOf[BaseShuffleHandle[_, _, _]].numMaps)
+    val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
+      handle.shuffleId, _ => new OpenHashSet[Long](16))
+    mapTaskIds.synchronized { mapTaskIds.add(context.taskAttemptId()) }
     val env = SparkEnv.get
     handle match {
       case unsafeShuffleHandle: SerializedShuffleHandle[K @unchecked, V @unchecked] =>
         new UnsafeShuffleWriter(
           env.blockManager,
-          shuffleBlockResolver,
           context.taskMemoryManager(),
           unsafeShuffleHandle,
           mapId,
           context,
           env.conf,
-          metrics)
+          metrics,
+          shuffleExecutorComponents)
       case bypassMergeSortHandle: BypassMergeSortShuffleHandle[K @unchecked, V @unchecked] =>
         new BypassMergeSortShuffleWriter(
           env.blockManager,
           bypassMergeSortHandle,
           mapId,
-          context.taskAttemptId(),
           env.conf,
           metrics,
           shuffleExecutorComponents)
@@ -164,9 +164,9 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
 
   /** Remove a shuffle's metadata from the ShuffleManager. */
   override def unregisterShuffle(shuffleId: Int): Boolean = {
-    Option(numMapsForShuffle.remove(shuffleId)).foreach { numMaps =>
-      (0 until numMaps).foreach { mapId =>
-        shuffleBlockResolver.removeDataByMap(shuffleId, mapId)
+    Option(taskIdMapsForShuffle.remove(shuffleId)).foreach { mapTaskIds =>
+      mapTaskIds.iterator.foreach { mapTaskId =>
+        shuffleBlockResolver.removeDataByMap(shuffleId, mapTaskId)
       }
     }
     true
@@ -231,9 +231,8 @@ private[spark] object SortShuffleManager extends Logging {
  */
 private[spark] class SerializedShuffleHandle[K, V](
   shuffleId: Int,
-  numMaps: Int,
   dependency: ShuffleDependency[K, V, V])
-  extends BaseShuffleHandle(shuffleId, numMaps, dependency) {
+  extends BaseShuffleHandle(shuffleId, dependency) {
 }
 
 /**
@@ -242,7 +241,6 @@ private[spark] class SerializedShuffleHandle[K, V](
  */
 private[spark] class BypassMergeSortShuffleHandle[K, V](
   shuffleId: Int,
-  numMaps: Int,
   dependency: ShuffleDependency[K, V, V])
-  extends BaseShuffleHandle(shuffleId, numMaps, dependency) {
+  extends BaseShuffleHandle(shuffleId, dependency) {
 }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index a781b16252432..a391bdf2db44e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.collection.ExternalSorter
 private[spark] class SortShuffleWriter[K, V, C](
     shuffleBlockResolver: IndexShuffleBlockResolver,
     handle: BaseShuffleHandle[K, V, C],
-    mapId: Int,
+    mapId: Long,
     context: TaskContext,
     shuffleExecutorComponents: ShuffleExecutorComponents)
   extends ShuffleWriter[K, V] with Logging {
@@ -65,10 +65,10 @@ private[spark] class SortShuffleWriter[K, V, C](
     // because it just opens a single file, so is typically too fast to measure accurately
     // (see SPARK-3570).
     val mapOutputWriter = shuffleExecutorComponents.createMapOutputWriter(
-      dep.shuffleId, mapId, context.taskAttemptId(), dep.partitioner.numPartitions)
+      dep.shuffleId, mapId, dep.partitioner.numPartitions)
     sorter.writePartitionedMapOutput(dep.shuffleId, mapId, mapOutputWriter)
     val partitionLengths = mapOutputWriter.commitAllPartitions()
-    mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths)
+    mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths, mapId)
   }
 
   /** Close this writer, passing along whether the map completed */
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/PrometheusResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/PrometheusResource.scala
new file mode 100644
index 0000000000000..6e52e213bda8e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/PrometheusResource.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.status.api.v1
+
+import javax.ws.rs._
+import javax.ws.rs.core.MediaType
+
+import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
+import org.glassfish.jersey.server.ServerProperties
+import org.glassfish.jersey.servlet.ServletContainer
+
+import org.apache.spark.ui.SparkUI
+
+/**
+ * This aims to expose Executor metrics like REST API which is documented in
+ *
+ *    https://spark.apache.org/docs/3.0.0/monitoring.html#executor-metrics
+ *
+ * Note that this is based on ExecutorSummary which is different from ExecutorSource.
+ */
+@Path("/executors")
+private[v1] class PrometheusResource extends ApiRequestContext {
+  @GET
+  @Path("prometheus")
+  @Produces(Array(MediaType.TEXT_PLAIN))
+  def executors(): String = {
+    val sb = new StringBuilder
+    val store = uiRoot.asInstanceOf[SparkUI].store
+    val appId = store.applicationInfo.id.replaceAll("[^a-zA-Z0-9]", "_")
+    store.executorList(true).foreach { executor =>
+      val prefix = s"metrics_${appId}_${executor.id}_executor_"
+      sb.append(s"${prefix}rddBlocks_Count ${executor.rddBlocks}\n")
+      sb.append(s"${prefix}memoryUsed_Count ${executor.memoryUsed}\n")
+      sb.append(s"${prefix}diskUsed_Count ${executor.diskUsed}\n")
+      sb.append(s"${prefix}totalCores_Count ${executor.totalCores}\n")
+      sb.append(s"${prefix}maxTasks_Count ${executor.maxTasks}\n")
+      sb.append(s"${prefix}activeTasks_Count ${executor.activeTasks}\n")
+      sb.append(s"${prefix}failedTasks_Count ${executor.failedTasks}\n")
+      sb.append(s"${prefix}completedTasks_Count ${executor.completedTasks}\n")
+      sb.append(s"${prefix}totalTasks_Count ${executor.totalTasks}\n")
+      sb.append(s"${prefix}totalDuration_Value ${executor.totalDuration}\n")
+      sb.append(s"${prefix}totalGCTime_Value ${executor.totalGCTime}\n")
+      sb.append(s"${prefix}totalInputBytes_Count ${executor.totalInputBytes}\n")
+      sb.append(s"${prefix}totalShuffleRead_Count ${executor.totalShuffleRead}\n")
+      sb.append(s"${prefix}totalShuffleWrite_Count ${executor.totalShuffleWrite}\n")
+      sb.append(s"${prefix}maxMemory_Count ${executor.maxMemory}\n")
+      executor.executorLogs.foreach { case (k, v) => }
+      executor.memoryMetrics.foreach { m =>
+        sb.append(s"${prefix}usedOnHeapStorageMemory_Count ${m.usedOnHeapStorageMemory}\n")
+        sb.append(s"${prefix}usedOffHeapStorageMemory_Count ${m.usedOffHeapStorageMemory}\n")
+        sb.append(s"${prefix}totalOnHeapStorageMemory_Count ${m.totalOnHeapStorageMemory}\n")
+        sb.append(s"${prefix}totalOffHeapStorageMemory_Count ${m.totalOffHeapStorageMemory}\n")
+      }
+      executor.peakMemoryMetrics.foreach { m =>
+        val names = Array(
+          "JVMHeapMemory",
+          "JVMOffHeapMemory",
+          "OnHeapExecutionMemory",
+          "OffHeapExecutionMemory",
+          "OnHeapStorageMemory",
+          "OffHeapStorageMemory",
+          "OnHeapUnifiedMemory",
+          "OffHeapUnifiedMemory",
+          "DirectPoolMemory",
+          "MappedPoolMemory",
+          "ProcessTreeJVMVMemory",
+          "ProcessTreeJVMRSSMemory",
+          "ProcessTreePythonVMemory",
+          "ProcessTreePythonRSSMemory",
+          "ProcessTreeOtherVMemory",
+          "ProcessTreeOtherRSSMemory",
+          "MinorGCCount",
+          "MinorGCTime",
+          "MajorGCCount",
+          "MajorGCTime"
+        )
+        names.foreach { name =>
+          sb.append(s"$prefix${name}_Count ${m.getMetricValue(name)}\n")
+        }
+      }
+    }
+    sb.toString
+  }
+}
+
+private[spark] object PrometheusResource {
+  def getServletHandler(uiRoot: UIRoot): ServletContextHandler = {
+    val jerseyContext = new ServletContextHandler(ServletContextHandler.NO_SESSIONS)
+    jerseyContext.setContextPath("/metrics")
+    val holder: ServletHolder = new ServletHolder(classOf[ServletContainer])
+    holder.setInitParameter(ServerProperties.PROVIDER_PACKAGES, "org.apache.spark.status.api.v1")
+    UIRootFromServletContext.setUiRoot(jerseyContext, uiRoot)
+    jerseyContext.addServlet(holder, "/*")
+    jerseyContext
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockId.scala b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
index 7ac2c71c18eb3..9c5b7f64e7abe 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
@@ -52,17 +52,17 @@ case class RDDBlockId(rddId: Int, splitIndex: Int) extends BlockId {
 // Format of the shuffle block ids (including data and index) should be kept in sync with
 // org.apache.spark.network.shuffle.ExternalShuffleBlockResolver#getBlockData().
 @DeveloperApi
-case class ShuffleBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
+case class ShuffleBlockId(shuffleId: Int, mapId: Long, reduceId: Int) extends BlockId {
   override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId
 }
 
 @DeveloperApi
-case class ShuffleDataBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
+case class ShuffleDataBlockId(shuffleId: Int, mapId: Long, reduceId: Int) extends BlockId {
   override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".data"
 }
 
 @DeveloperApi
-case class ShuffleIndexBlockId(shuffleId: Int, mapId: Int, reduceId: Int) extends BlockId {
+case class ShuffleIndexBlockId(shuffleId: Int, mapId: Long, reduceId: Int) extends BlockId {
   override def name: String = "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".index"
 }
 
@@ -117,11 +117,11 @@ object BlockId {
     case RDD(rddId, splitIndex) =>
       RDDBlockId(rddId.toInt, splitIndex.toInt)
     case SHUFFLE(shuffleId, mapId, reduceId) =>
-      ShuffleBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
+      ShuffleBlockId(shuffleId.toInt, mapId.toLong, reduceId.toInt)
     case SHUFFLE_DATA(shuffleId, mapId, reduceId) =>
-      ShuffleDataBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
+      ShuffleDataBlockId(shuffleId.toInt, mapId.toLong, reduceId.toInt)
     case SHUFFLE_INDEX(shuffleId, mapId, reduceId) =>
-      ShuffleIndexBlockId(shuffleId.toInt, mapId.toInt, reduceId.toInt)
+      ShuffleIndexBlockId(shuffleId.toInt, mapId.toLong, reduceId.toInt)
     case BROADCAST(broadcastId, field) =>
       BroadcastBlockId(broadcastId.toLong, field.stripPrefix("_"))
     case TASKRESULT(taskId) =>
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 4b71dc1fff345..a7dfc20d15ebc 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -446,7 +446,7 @@ private[spark] class BlockManager(
     }
   }
 
-  private def registerWithExternalShuffleServer() {
+  private def registerWithExternalShuffleServer(): Unit = {
     logInfo("Registering executor with local external shuffle service.")
     val shuffleConfig = new ExecutorShuffleInfo(
       diskBlockManager.localDirsString,
@@ -853,7 +853,6 @@ private[spark] class BlockManager(
    * @param bufferTransformer this transformer expected to open the file if the block is backed by a
    *                          file by this it is guaranteed the whole content can be loaded
    * @tparam T result type
-   * @return
    */
   private[spark] def getRemoteBlock[T](
       blockId: BlockId,
@@ -1725,15 +1724,23 @@ private[spark] class BlockManager(
    * lock on the block.
    */
   private def removeBlockInternal(blockId: BlockId, tellMaster: Boolean): Unit = {
+    val blockStatus = if (tellMaster) {
+      val blockInfo = blockInfoManager.assertBlockIsLockedForWriting(blockId)
+      Some(getCurrentBlockStatus(blockId, blockInfo))
+    } else None
+
     // Removals are idempotent in disk store and memory store. At worst, we get a warning.
     val removedFromMemory = memoryStore.remove(blockId)
     val removedFromDisk = diskStore.remove(blockId)
     if (!removedFromMemory && !removedFromDisk) {
       logWarning(s"Block $blockId could not be removed as it was not found on disk or in memory")
     }
+
     blockInfoManager.removeBlock(blockId)
     if (tellMaster) {
-      reportBlockStatus(blockId, BlockStatus.empty)
+      // Only update storage level from the captured block status before deleting, so that
+      // memory size and disk size are being kept for calculating delta.
+      reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = StorageLevel.NONE))
     }
   }
 
@@ -1831,7 +1838,7 @@ private[spark] object BlockManager {
     private val POLL_TIMEOUT = 1000
     @volatile private var stopped = false
 
-    private val cleaningThread = new Thread() { override def run() { keepCleaning() } }
+    private val cleaningThread = new Thread() { override def run(): Unit = { keepCleaning() } }
     cleaningThread.setDaemon(true)
     cleaningThread.setName("RemoteBlock-temp-file-clean-thread")
     cleaningThread.start()
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index d188bdd912e5e..49e32d04d450a 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.Utils
 
 /**
  * :: DeveloperApi ::
- * This class represent an unique identifier for a BlockManager.
+ * This class represent a unique identifier for a BlockManager.
  *
  * The first 2 constructors of this class are made private to ensure that BlockManagerId objects
  * can be created only using the apply method in the companion object. This allows de-duplication
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index 9d13fedfb0c58..525304fe3c9d3 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -37,7 +37,7 @@ class BlockManagerMaster(
   val timeout = RpcUtils.askRpcTimeout(conf)
 
   /** Remove a dead executor from the driver endpoint. This is only called on the driver side. */
-  def removeExecutor(execId: String) {
+  def removeExecutor(execId: String): Unit = {
     tell(RemoveExecutor(execId))
     logInfo("Removed " + execId + " successfully in removeExecutor")
   }
@@ -45,7 +45,7 @@ class BlockManagerMaster(
   /** Request removal of a dead executor from the driver endpoint.
    *  This is only called on the driver side. Non-blocking
    */
-  def removeExecutorAsync(execId: String) {
+  def removeExecutorAsync(execId: String): Unit = {
     driverEndpoint.ask[Boolean](RemoveExecutor(execId))
     logInfo("Removal of executor " + execId + " requested")
   }
@@ -120,12 +120,12 @@ class BlockManagerMaster(
    * Remove a block from the slaves that have it. This can only be used to remove
    * blocks that the driver knows about.
    */
-  def removeBlock(blockId: BlockId) {
+  def removeBlock(blockId: BlockId): Unit = {
     driverEndpoint.askSync[Boolean](RemoveBlock(blockId))
   }
 
   /** Remove all blocks belonging to the given RDD. */
-  def removeRdd(rddId: Int, blocking: Boolean) {
+  def removeRdd(rddId: Int, blocking: Boolean): Unit = {
     val future = driverEndpoint.askSync[Future[Seq[Int]]](RemoveRdd(rddId))
     future.failed.foreach(e =>
       logWarning(s"Failed to remove RDD $rddId - ${e.getMessage}", e)
@@ -136,7 +136,7 @@ class BlockManagerMaster(
   }
 
   /** Remove all blocks belonging to the given shuffle. */
-  def removeShuffle(shuffleId: Int, blocking: Boolean) {
+  def removeShuffle(shuffleId: Int, blocking: Boolean): Unit = {
     val future = driverEndpoint.askSync[Future[Seq[Boolean]]](RemoveShuffle(shuffleId))
     future.failed.foreach(e =>
       logWarning(s"Failed to remove shuffle $shuffleId - ${e.getMessage}", e)
@@ -147,7 +147,7 @@ class BlockManagerMaster(
   }
 
   /** Remove all blocks belonging to the given broadcast. */
-  def removeBroadcast(broadcastId: Long, removeFromMaster: Boolean, blocking: Boolean) {
+  def removeBroadcast(broadcastId: Long, removeFromMaster: Boolean, blocking: Boolean): Unit = {
     val future = driverEndpoint.askSync[Future[Seq[Int]]](
       RemoveBroadcast(broadcastId, removeFromMaster))
     future.failed.foreach(e =>
@@ -226,7 +226,7 @@ class BlockManagerMaster(
   }
 
   /** Stop the driver endpoint, called only on the Spark driver node */
-  def stop() {
+  def stop(): Unit = {
     if (driverEndpoint != null && isDriver) {
       tell(StopBlockManagerMaster)
       driverEndpoint = null
@@ -235,7 +235,7 @@ class BlockManagerMaster(
   }
 
   /** Send a one-way message to the master endpoint, to which we expect it to reply with true. */
-  private def tell(message: Any) {
+  private def tell(message: Any): Unit = {
     if (!driverEndpoint.askSync[Boolean](message)) {
       throw new SparkException("BlockManagerMasterEndpoint returned false, expected true.")
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 5e021b334fd2b..faf6f713c838f 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -243,7 +243,7 @@ class BlockManagerMasterEndpoint(
     Future.sequence(futures)
   }
 
-  private def removeBlockManager(blockManagerId: BlockManagerId) {
+  private def removeBlockManager(blockManagerId: BlockManagerId): Unit = {
     val info = blockManagerInfo(blockManagerId)
 
     // Remove the block manager from blockManagerIdByExecutor.
@@ -285,7 +285,7 @@ class BlockManagerMasterEndpoint(
 
   }
 
-  private def removeExecutor(execId: String) {
+  private def removeExecutor(execId: String): Unit = {
     logInfo("Trying to remove executor " + execId + " from BlockManagerMaster.")
     blockManagerIdByExecutor.get(execId).foreach(removeBlockManager)
   }
@@ -305,7 +305,7 @@ class BlockManagerMasterEndpoint(
 
   // Remove a block from the slaves that have it. This can only be used to remove
   // blocks that the master knows about.
-  private def removeBlockFromWorkers(blockId: BlockId) {
+  private def removeBlockFromWorkers(blockId: BlockId): Unit = {
     val locations = blockLocations.get(blockId)
     if (locations != null) {
       locations.foreach { blockManagerId: BlockManagerId =>
@@ -593,7 +593,7 @@ private[spark] class BlockManagerInfo(
 
   def getStatus(blockId: BlockId): Option[BlockStatus] = Option(_blocks.get(blockId))
 
-  def updateLastSeenMs() {
+  def updateLastSeenMs(): Unit = {
     _lastSeenMs = System.currentTimeMillis()
   }
 
@@ -601,7 +601,7 @@ private[spark] class BlockManagerInfo(
       blockId: BlockId,
       storageLevel: StorageLevel,
       memSize: Long,
-      diskSize: Long) {
+      diskSize: Long): Unit = {
 
     updateLastSeenMs()
 
@@ -681,7 +681,7 @@ private[spark] class BlockManagerInfo(
     }
   }
 
-  def removeBlock(blockId: BlockId) {
+  def removeBlock(blockId: BlockId): Unit = {
     if (_blocks.containsKey(blockId)) {
       _remainingMem += _blocks.get(blockId).memSize
       _blocks.remove(blockId)
@@ -699,7 +699,7 @@ private[spark] class BlockManagerInfo(
 
   override def toString: String = "BlockManagerInfo " + timeMs + " " + _remainingMem
 
-  def clear() {
+  def clear(): Unit = {
     _blocks.clear()
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala
index 67544b20408a6..f90595ab924b4 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveEndpoint.scala
@@ -80,7 +80,7 @@ class BlockManagerSlaveEndpoint(
 
   }
 
-  private def doAsync[T](actionMessage: String, context: RpcCallContext)(body: => T) {
+  private def doAsync[T](actionMessage: String, context: RpcCallContext)(body: => T): Unit = {
     val future = Future {
       logDebug(actionMessage)
       body
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index c3990bf71e604..f2113947f6bf5 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -161,7 +161,7 @@ private[spark] class DiskBlockManager(conf: SparkConf, deleteFilesOnStop: Boolea
   }
 
   /** Cleanup local dirs and stop shuffle sender. */
-  private[spark] def stop() {
+  private[spark] def stop(): Unit = {
     // Remove the shutdown hook.  It causes memory leaks if we leave it around.
     try {
       ShutdownHookManager.removeShutdownHook(shutdownHook)
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index 758621c52495b..e55c09274cd9a 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.storage
 
 import java.io.{BufferedOutputStream, File, FileOutputStream, OutputStream}
-import java.nio.channels.FileChannel
+import java.nio.channels.{ClosedByInterruptException, FileChannel}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.serializer.{SerializationStream, SerializerInstance, SerializerManager}
@@ -150,7 +150,7 @@ private[spark] class DiskBlockObjectWriter(
   /**
    * Commits any remaining partial writes and closes resources.
    */
-  override def close() {
+  override def close(): Unit = {
     if (initialized) {
       Utils.tryWithSafeFinally {
         commitAndGet()
@@ -219,6 +219,12 @@ private[spark] class DiskBlockObjectWriter(
         truncateStream = new FileOutputStream(file, true)
         truncateStream.getChannel.truncate(committedPosition)
       } catch {
+        // ClosedByInterruptException is an excepted exception when kill task,
+        // don't log the exception stack trace to avoid confusing users.
+        // See: SPARK-28340
+        case ce: ClosedByInterruptException =>
+          logError("Exception occurred while reverting partial writes to file "
+            + file + ", " + ce.getMessage)
         case e: Exception =>
           logError("Uncaught exception while reverting partial writes to file " + file, e)
       } finally {
@@ -234,7 +240,7 @@ private[spark] class DiskBlockObjectWriter(
   /**
    * Writes a key-value pair.
    */
-  override def write(key: Any, value: Any) {
+  override def write(key: Any, value: Any): Unit = {
     if (!streamOpen) {
       open()
     }
@@ -270,14 +276,14 @@ private[spark] class DiskBlockObjectWriter(
    * Report the number of bytes written in this writer's shuffle write metrics.
    * Note that this is only valid before the underlying streams are closed.
    */
-  private def updateBytesWritten() {
+  private def updateBytesWritten(): Unit = {
     val pos = channel.position()
     writeMetrics.incBytesWritten(pos - reportedPosition)
     reportedPosition = pos
   }
 
   // For testing
-  private[spark] override def flush() {
+  private[spark] override def flush(): Unit = {
     objOut.flush()
     bs.flush()
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index a5b7ee5762c49..dce5ebaebbae5 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.storage
 
 import java.io.{InputStream, IOException}
+import java.nio.channels.ClosedByInterruptException
 import java.util.concurrent.{LinkedBlockingQueue, TimeUnit}
 import javax.annotation.concurrent.GuardedBy
 
@@ -48,9 +49,10 @@ import org.apache.spark.util.{CompletionIterator, TaskCompletionListener, Utils}
  * @param shuffleClient [[BlockStoreClient]] for fetching remote blocks
  * @param blockManager [[BlockManager]] for reading local blocks
  * @param blocksByAddress list of blocks to fetch grouped by the [[BlockManagerId]].
- *                        For each block we also require the size (in bytes as a long field) in
- *                        order to throttle the memory usage. Note that zero-sized blocks are
- *                        already excluded, which happened in
+ *                        For each block we also require two info: 1. the size (in bytes as a long
+ *                        field) in order to throttle the memory usage; 2. the mapIndex for this
+ *                        block, which indicate the index in the map stage.
+ *                        Note that zero-sized blocks are already excluded, which happened in
  *                        [[org.apache.spark.MapOutputTracker.convertMapStatuses]].
  * @param streamWrapper A function to wrap the returned input stream.
  * @param maxBytesInFlight max size (in bytes) of remote blocks to fetch at any given point.
@@ -66,7 +68,7 @@ final class ShuffleBlockFetcherIterator(
     context: TaskContext,
     shuffleClient: BlockStoreClient,
     blockManager: BlockManager,
-    blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long)])],
+    blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])],
     streamWrapper: (BlockId, InputStream) => InputStream,
     maxBytesInFlight: Long,
     maxReqsInFlight: Int,
@@ -96,7 +98,7 @@ final class ShuffleBlockFetcherIterator(
   private[this] val startTimeNs = System.nanoTime()
 
   /** Local blocks to fetch, excluding zero-sized blocks. */
-  private[this] val localBlocks = scala.collection.mutable.LinkedHashSet[BlockId]()
+  private[this] val localBlocks = scala.collection.mutable.LinkedHashSet[(BlockId, Int)]()
 
   /** Remote blocks to fetch, excluding zero-sized blocks. */
   private[this] val remoteBlocks = new HashSet[BlockId]()
@@ -188,7 +190,7 @@ final class ShuffleBlockFetcherIterator(
   /**
    * Mark the iterator as zombie, and release all buffers that haven't been deserialized yet.
    */
-  private[storage] def cleanup() {
+  private[storage] def cleanup(): Unit = {
     synchronized {
       isZombie = true
     }
@@ -198,7 +200,7 @@ final class ShuffleBlockFetcherIterator(
     while (iter.hasNext) {
       val result = iter.next()
       result match {
-        case SuccessFetchResult(_, address, _, buf, _) =>
+        case SuccessFetchResult(_, _, address, _, buf, _) =>
           if (address != blockManager.blockManagerId) {
             shuffleMetrics.incRemoteBytesRead(buf.size)
             if (buf.isInstanceOf[FileSegmentManagedBuffer]) {
@@ -217,16 +219,18 @@ final class ShuffleBlockFetcherIterator(
     }
   }
 
-  private[this] def sendRequest(req: FetchRequest) {
+  private[this] def sendRequest(req: FetchRequest): Unit = {
     logDebug("Sending request for %d blocks (%s) from %s".format(
       req.blocks.size, Utils.bytesToString(req.size), req.address.hostPort))
     bytesInFlight += req.size
     reqsInFlight += 1
 
-    // so we can look up the size of each blockID
-    val sizeMap = req.blocks.map { case (blockId, size) => (blockId.toString, size) }.toMap
-    val remainingBlocks = new HashSet[String]() ++= sizeMap.keys
-    val blockIds = req.blocks.map(_._1.toString)
+    // so we can look up the block info of each blockID
+    val infoMap = req.blocks.map {
+      case FetchBlockInfo(blockId, size, mapIndex) => (blockId.toString, (size, mapIndex))
+    }.toMap
+    val remainingBlocks = new HashSet[String]() ++= infoMap.keys
+    val blockIds = req.blocks.map(_.blockId.toString)
     val address = req.address
 
     val blockFetchingListener = new BlockFetchingListener {
@@ -239,8 +243,8 @@ final class ShuffleBlockFetcherIterator(
             // This needs to be released after use.
             buf.retain()
             remainingBlocks -= blockId
-            results.put(new SuccessFetchResult(BlockId(blockId), address, sizeMap(blockId), buf,
-              remainingBlocks.isEmpty))
+            results.put(new SuccessFetchResult(BlockId(blockId), infoMap(blockId)._2,
+              address, infoMap(blockId)._1, buf, remainingBlocks.isEmpty))
             logDebug("remainingBlocks: " + remainingBlocks)
           }
         }
@@ -249,7 +253,7 @@ final class ShuffleBlockFetcherIterator(
 
       override def onBlockFetchFailure(blockId: String, e: Throwable): Unit = {
         logError(s"Failed to get block(s) from ${req.address.host}:${req.address.port}", e)
-        results.put(new FailureFetchResult(BlockId(blockId), address, e))
+        results.put(new FailureFetchResult(BlockId(blockId), infoMap(blockId)._2, address, e))
       }
     }
 
@@ -282,28 +286,28 @@ final class ShuffleBlockFetcherIterator(
     for ((address, blockInfos) <- blocksByAddress) {
       if (address.executorId == blockManager.blockManagerId.executorId) {
         blockInfos.find(_._2 <= 0) match {
-          case Some((blockId, size)) if size < 0 =>
+          case Some((blockId, size, _)) if size < 0 =>
             throw new BlockException(blockId, "Negative block size " + size)
-          case Some((blockId, size)) if size == 0 =>
+          case Some((blockId, size, _)) if size == 0 =>
             throw new BlockException(blockId, "Zero-sized blocks should be excluded.")
           case None => // do nothing.
         }
-        localBlocks ++= blockInfos.map(_._1)
+        localBlocks ++= blockInfos.map(info => (info._1, info._3))
         localBlockBytes += blockInfos.map(_._2).sum
         numBlocksToFetch += localBlocks.size
       } else {
         val iterator = blockInfos.iterator
         var curRequestSize = 0L
-        var curBlocks = new ArrayBuffer[(BlockId, Long)]
+        var curBlocks = new ArrayBuffer[FetchBlockInfo]
         while (iterator.hasNext) {
-          val (blockId, size) = iterator.next()
+          val (blockId, size, mapIndex) = iterator.next()
           remoteBlockBytes += size
           if (size < 0) {
             throw new BlockException(blockId, "Negative block size " + size)
           } else if (size == 0) {
             throw new BlockException(blockId, "Zero-sized blocks should be excluded.")
           } else {
-            curBlocks += ((blockId, size))
+            curBlocks += FetchBlockInfo(blockId, size, mapIndex)
             remoteBlocks += blockId
             numBlocksToFetch += 1
             curRequestSize += size
@@ -314,7 +318,7 @@ final class ShuffleBlockFetcherIterator(
             remoteRequests += new FetchRequest(address, curBlocks)
             logDebug(s"Creating fetch request of $curRequestSize at $address "
               + s"with ${curBlocks.size} blocks")
-            curBlocks = new ArrayBuffer[(BlockId, Long)]
+            curBlocks = new ArrayBuffer[FetchBlockInfo]
             curRequestSize = 0
           }
         }
@@ -336,23 +340,30 @@ final class ShuffleBlockFetcherIterator(
    * `ManagedBuffer`'s memory is allocated lazily when we create the input stream, so all we
    * track in-memory are the ManagedBuffer references themselves.
    */
-  private[this] def fetchLocalBlocks() {
+  private[this] def fetchLocalBlocks(): Unit = {
     logDebug(s"Start fetching local blocks: ${localBlocks.mkString(", ")}")
     val iter = localBlocks.iterator
     while (iter.hasNext) {
-      val blockId = iter.next()
+      val (blockId, mapIndex) = iter.next()
       try {
         val buf = blockManager.getBlockData(blockId)
         shuffleMetrics.incLocalBlocksFetched(1)
         shuffleMetrics.incLocalBytesRead(buf.size)
         buf.retain()
-        results.put(new SuccessFetchResult(blockId, blockManager.blockManagerId,
+        results.put(new SuccessFetchResult(blockId, mapIndex, blockManager.blockManagerId,
           buf.size(), buf, false))
       } catch {
+        // If we see an exception, stop immediately.
         case e: Exception =>
-          // If we see an exception, stop immediately.
-          logError(s"Error occurred while fetching local blocks", e)
-          results.put(new FailureFetchResult(blockId, blockManager.blockManagerId, e))
+          e match {
+            // ClosedByInterruptException is an excepted exception when kill task,
+            // don't log the exception stack trace to avoid confusing users.
+            // See: SPARK-28340
+            case ce: ClosedByInterruptException =>
+              logError("Error occurred while fetching local blocks, " + ce.getMessage)
+            case ex: Exception => logError("Error occurred while fetching local blocks", ex)
+          }
+          results.put(new FailureFetchResult(blockId, mapIndex, blockManager.blockManagerId, e))
           return
       }
     }
@@ -412,7 +423,7 @@ final class ShuffleBlockFetcherIterator(
       shuffleMetrics.incFetchWaitTime(fetchWaitTime)
 
       result match {
-        case r @ SuccessFetchResult(blockId, address, size, buf, isNetworkReqDone) =>
+        case r @ SuccessFetchResult(blockId, mapIndex, address, size, buf, isNetworkReqDone) =>
           if (address != blockManager.blockManagerId) {
             numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
             shuffleMetrics.incRemoteBytesRead(buf.size)
@@ -421,7 +432,7 @@ final class ShuffleBlockFetcherIterator(
             }
             shuffleMetrics.incRemoteBlocksFetched(1)
           }
-          if (!localBlocks.contains(blockId)) {
+          if (!localBlocks.contains((blockId, mapIndex))) {
             bytesInFlight -= size
           }
           if (isNetworkReqDone) {
@@ -445,7 +456,7 @@ final class ShuffleBlockFetcherIterator(
             // since the last call.
             val msg = s"Received a zero-size buffer for block $blockId from $address " +
               s"(expectedApproxSize = $size, isNetworkReqDone=$isNetworkReqDone)"
-            throwFetchFailedException(blockId, address, new IOException(msg))
+            throwFetchFailedException(blockId, mapIndex, address, new IOException(msg))
           }
 
           val in = try {
@@ -454,9 +465,14 @@ final class ShuffleBlockFetcherIterator(
             // The exception could only be throwed by local shuffle block
             case e: IOException =>
               assert(buf.isInstanceOf[FileSegmentManagedBuffer])
-              logError("Failed to create input stream from local block", e)
+              e match {
+                case ce: ClosedByInterruptException =>
+                  logError("Failed to create input stream from local block, " +
+                    ce.getMessage)
+                case e: IOException => logError("Failed to create input stream from local block", e)
+              }
               buf.release()
-              throwFetchFailedException(blockId, address, e)
+              throwFetchFailedException(blockId, mapIndex, address, e)
           }
           try {
             input = streamWrapper(blockId, in)
@@ -474,11 +490,12 @@ final class ShuffleBlockFetcherIterator(
               buf.release()
               if (buf.isInstanceOf[FileSegmentManagedBuffer]
                   || corruptedBlocks.contains(blockId)) {
-                throwFetchFailedException(blockId, address, e)
+                throwFetchFailedException(blockId, mapIndex, address, e)
               } else {
                 logWarning(s"got an corrupted block $blockId from $address, fetch again", e)
                 corruptedBlocks += blockId
-                fetchRequests += FetchRequest(address, Array((blockId, size)))
+                fetchRequests += FetchRequest(
+                  address, Array(FetchBlockInfo(blockId, size, mapIndex)))
                 result = null
               }
           } finally {
@@ -490,8 +507,8 @@ final class ShuffleBlockFetcherIterator(
             }
           }
 
-        case FailureFetchResult(blockId, address, e) =>
-          throwFetchFailedException(blockId, address, e)
+        case FailureFetchResult(blockId, mapIndex, address, e) =>
+          throwFetchFailedException(blockId, mapIndex, address, e)
       }
 
       // Send fetch requests up to maxBytesInFlight
@@ -504,6 +521,7 @@ final class ShuffleBlockFetcherIterator(
         input,
         this,
         currentResult.blockId,
+        currentResult.mapIndex,
         currentResult.address,
         detectCorrupt && streamCompressedOrEncrypted))
   }
@@ -570,11 +588,12 @@ final class ShuffleBlockFetcherIterator(
 
   private[storage] def throwFetchFailedException(
       blockId: BlockId,
+      mapIndex: Int,
       address: BlockManagerId,
       e: Throwable) = {
     blockId match {
       case ShuffleBlockId(shufId, mapId, reduceId) =>
-        throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e)
+        throw new FetchFailedException(address, shufId, mapId, mapIndex, reduceId, e)
       case _ =>
         throw new SparkException(
           "Failed to get block " + blockId + ", which is not a shuffle block", e)
@@ -591,6 +610,7 @@ private class BufferReleasingInputStream(
     private[storage] val delegate: InputStream,
     private val iterator: ShuffleBlockFetcherIterator,
     private val blockId: BlockId,
+    private val mapIndex: Int,
     private val address: BlockManagerId,
     private val detectCorruption: Boolean)
   extends InputStream {
@@ -602,7 +622,7 @@ private class BufferReleasingInputStream(
     } catch {
       case e: IOException if detectCorruption =>
         IOUtils.closeQuietly(this)
-        iterator.throwFetchFailedException(blockId, address, e)
+        iterator.throwFetchFailedException(blockId, mapIndex, address, e)
     }
   }
 
@@ -624,7 +644,7 @@ private class BufferReleasingInputStream(
     } catch {
       case e: IOException if detectCorruption =>
         IOUtils.closeQuietly(this)
-        iterator.throwFetchFailedException(blockId, address, e)
+        iterator.throwFetchFailedException(blockId, mapIndex, address, e)
     }
   }
 
@@ -636,7 +656,7 @@ private class BufferReleasingInputStream(
     } catch {
       case e: IOException if detectCorruption =>
         IOUtils.closeQuietly(this)
-        iterator.throwFetchFailedException(blockId, address, e)
+        iterator.throwFetchFailedException(blockId, mapIndex, address, e)
     }
   }
 
@@ -646,7 +666,7 @@ private class BufferReleasingInputStream(
     } catch {
       case e: IOException if detectCorruption =>
         IOUtils.closeQuietly(this)
-        iterator.throwFetchFailedException(blockId, address, e)
+        iterator.throwFetchFailedException(blockId, mapIndex, address, e)
     }
   }
 
@@ -677,14 +697,25 @@ private class ShuffleFetchCompletionListener(var data: ShuffleBlockFetcherIterat
 private[storage]
 object ShuffleBlockFetcherIterator {
 
+  /**
+   * The block information to fetch used in FetchRequest.
+   * @param blockId block id
+   * @param size estimated size of the block. Note that this is NOT the exact bytes.
+   *             Size of remote block is used to calculate bytesInFlight.
+   * @param mapIndex the mapIndex for this block, which indicate the index in the map stage.
+   */
+  private[storage] case class FetchBlockInfo(
+    blockId: BlockId,
+    size: Long,
+    mapIndex: Int)
+
   /**
    * A request to fetch blocks from a remote BlockManager.
    * @param address remote BlockManager to fetch from.
-   * @param blocks Sequence of tuple, where the first element is the block id,
-   *               and the second element is the estimated size, used to calculate bytesInFlight.
+   * @param blocks Sequence of the information for blocks to fetch from the same address.
    */
-  case class FetchRequest(address: BlockManagerId, blocks: Seq[(BlockId, Long)]) {
-    val size = blocks.map(_._2).sum
+  case class FetchRequest(address: BlockManagerId, blocks: Seq[FetchBlockInfo]) {
+    val size = blocks.map(_.size).sum
   }
 
   /**
@@ -698,6 +729,7 @@ object ShuffleBlockFetcherIterator {
   /**
    * Result of a fetch from a remote block successfully.
    * @param blockId block id
+   * @param mapIndex the mapIndex for this block, which indicate the index in the map stage.
    * @param address BlockManager that the block was fetched from.
    * @param size estimated size of the block. Note that this is NOT the exact bytes.
    *             Size of remote block is used to calculate bytesInFlight.
@@ -706,6 +738,7 @@ object ShuffleBlockFetcherIterator {
    */
   private[storage] case class SuccessFetchResult(
       blockId: BlockId,
+      mapIndex: Int,
       address: BlockManagerId,
       size: Long,
       buf: ManagedBuffer,
@@ -717,11 +750,13 @@ object ShuffleBlockFetcherIterator {
   /**
    * Result of a fetch from a remote block unsuccessfully.
    * @param blockId block id
+   * @param mapIndex the mapIndex for this block, which indicate the index in the map stage
    * @param address BlockManager that the block was attempted to be fetched from
    * @param e the failure exception
    */
   private[storage] case class FailureFetchResult(
       blockId: BlockId,
+      mapIndex: Int,
       address: BlockManagerId,
       e: Throwable)
     extends FetchResult
diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
index f36b31c65a63d..5c59859d14f76 100644
--- a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
@@ -48,7 +48,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
   // Schedule a refresh thread to run periodically
   private val timer = new Timer("refresh progress", true)
   timer.schedule(new TimerTask{
-    override def run() {
+    override def run(): Unit = {
       refresh()
     }
   }, firstDelayMSec, updatePeriodMSec)
@@ -73,7 +73,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
    * after your last output, keeps overwriting itself to hold in one line. The logging will follow
    * the progress bar, then progress bar will be showed in next line without overwrite logs.
    */
-  private def show(now: Long, stages: Seq[StageData]) {
+  private def show(now: Long, stages: Seq[StageData]): Unit = {
     val width = TerminalWidth / stages.size
     val bar = stages.map { s =>
       val total = s.numTasks
@@ -103,7 +103,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
   /**
    * Clear the progress bar if showed.
    */
-  private def clear() {
+  private def clear(): Unit = {
     if (!lastProgressBar.isEmpty) {
       System.err.printf(CR + " " * TerminalWidth + CR)
       lastProgressBar = ""
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index ff7baf4d9419b..cd4104731d400 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -73,7 +73,7 @@ private[spark] object JettyUtils extends Logging {
       servletParams: ServletParams[T],
       conf: SparkConf): HttpServlet = {
     new HttpServlet {
-      override def doGet(request: HttpServletRequest, response: HttpServletResponse) {
+      override def doGet(request: HttpServletRequest, response: HttpServletResponse): Unit = {
         try {
           response.setContentType("%s;charset=utf-8".format(servletParams.contentType))
           response.setStatus(HttpServletResponse.SC_OK)
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index 6fb8e458a789c..8ae9828c3fee1 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -66,6 +66,9 @@ private[spark] class SparkUI private (
     addStaticHandler(SparkUI.STATIC_RESOURCE_DIR)
     attachHandler(createRedirectHandler("/", "/jobs/", basePath = basePath))
     attachHandler(ApiRootResource.getServletHandler(this))
+    if (sc.map(_.conf.get(UI_PROMETHEUS_ENABLED)).getOrElse(false)) {
+      attachHandler(PrometheusResource.getServletHandler(this))
+    }
 
     // These should be POST only, but, the YARN AM proxy won't proxy POSTs
     attachHandler(createRedirectHandler(
@@ -94,7 +97,7 @@ private[spark] class SparkUI private (
   }
 
   /** Stop the server behind this web interface. Only valid after bind(). */
-  override def stop() {
+  override def stop(): Unit = {
     super.stop()
     logInfo(s"Stopped Spark web UI at $webUrl")
   }
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 70e24bd0e7ecd..6dbe63b564e69 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -309,9 +309,13 @@ private[spark] object UIUtils extends Logging {
       data: Iterable[T],
       fixedWidth: Boolean = false,
       id: Option[String] = None,
+      // When headerClasses is not empty, it should have the same length as headers parameter
       headerClasses: Seq[String] = Seq.empty,
       stripeRowsWithCss: Boolean = true,
-      sortable: Boolean = true): Seq[Node] = {
+      sortable: Boolean = true,
+      // The tooltip information could be None, which indicates header does not have a tooltip.
+      // When tooltipHeaders is not empty, it should have the same length as headers parameter
+      tooltipHeaders: Seq[Option[String]] = Seq.empty): Seq[Node] = {
 
     val listingTableClass = {
       val _tableClass = if (stripeRowsWithCss) TABLE_CLASS_STRIPED else TABLE_CLASS_NOT_STRIPED
@@ -332,6 +336,14 @@ private[spark] object UIUtils extends Logging {
       }
     }
 
+    def getTooltip(index: Int): Option[String] = {
+      if (index < tooltipHeaders.size) {
+        tooltipHeaders(index)
+      } else {
+        None
+      }
+    }
+
     val newlinesInHeader = headers.exists(_.contains("\n"))
     def getHeaderContent(header: String): Seq[Node] = {
       if (newlinesInHeader) {
@@ -345,7 +357,15 @@ private[spark] object UIUtils extends Logging {
 
     val headerRow: Seq[Node] = {
       headers.view.zipWithIndex.map { x =>
-        <th width={colWidthAttr} class={getClass(x._2)}>{getHeaderContent(x._1)}</th>
+        getTooltip(x._2) match {
+          case Some(tooltip) =>
+            <th width={colWidthAttr} class={getClass(x._2)}>
+              <span data-toggle="tooltip" title={tooltip}>
+                {getHeaderContent(x._1)}
+              </span>
+            </th>
+          case None => <th width={colWidthAttr} class={getClass(x._2)}>{getHeaderContent(x._1)}</th>
+        }
       }
     }
     <table class={listingTableClass} id={id.map(Text.apply)}>
diff --git a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
index 8845dcf48a844..ca111a8d00a64 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
@@ -37,7 +37,7 @@ private[spark] object UIWorkloadGenerator {
   val NUM_PARTITIONS = 100
   val INTER_JOB_WAIT_MS = 5000
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 3) {
       // scalastyle:off println
       println(
@@ -98,7 +98,7 @@ private[spark] object UIWorkloadGenerator {
     (1 to nJobSet).foreach { _ =>
       for ((desc, job) <- jobs) {
         new Thread {
-          override def run() {
+          override def run(): Unit = {
             // scalastyle:off println
             try {
               setProperties(desc)
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index 1fe822a0e3b57..9faa3dcf2cdf2 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -184,7 +184,7 @@ private[spark] abstract class WebUITab(parent: WebUI, val prefix: String) {
   val name = prefix.capitalize
 
   /** Attach a page to this tab. This prepends the page's prefix with the tab's own prefix. */
-  def attachPage(page: WebUIPage) {
+  def attachPage(page: WebUIPage): Unit = {
     page.prefix = (prefix + "/" + page.prefix).stripSuffix("/")
     pages += page
   }
@@ -236,4 +236,8 @@ private[spark] class DelegatingServletContextHandler(handler: ServletContextHand
   def filterCount(): Int = {
     handler.getServletHandler.getFilters.length
   }
+
+  def getContextPath(): String = {
+    handler.getContextPath
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala b/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala
index eff0aa4453f08..0827395fea0bb 100644
--- a/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala
+++ b/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala
@@ -59,7 +59,7 @@ private[spark] class BoundedPriorityQueue[A](maxSize: Int)(implicit ord: Orderin
     this += elem1 += elem2 ++= elems
   }
 
-  override def clear() { underlying.clear() }
+  override def clear(): Unit = { underlying.clear() }
 
   private def maybeReplaceLowest(a: A): Boolean = {
     val head = underlying.peek()
diff --git a/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
index a5ee0ff16b5df..1383e1835028c 100644
--- a/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
@@ -67,7 +67,7 @@ class ByteBufferInputStream(private var buffer: ByteBuffer)
   /**
    * Clean up the buffer, and potentially dispose of it using StorageUtils.dispose().
    */
-  private def cleanUp() {
+  private def cleanUp(): Unit = {
     if (buffer != null) {
       buffer = null
     }
diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index 6d6ef5a744204..d2ad14f2a1a96 100644
--- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -387,7 +387,7 @@ private[spark] object ClosureCleaner extends Logging {
     }
   }
 
-  private def ensureSerializable(func: AnyRef) {
+  private def ensureSerializable(func: AnyRef): Unit = {
     try {
       if (SparkEnv.get != null) {
         SparkEnv.get.closureSerializer.newInstance().serialize(func)
@@ -433,7 +433,7 @@ private class ReturnStatementFinder(targetMethodName: Option[String] = None)
         name == targetMethodName.get || name == targetMethodName.get.stripSuffix("$adapted")
 
       new MethodVisitor(ASM7) {
-        override def visitTypeInsn(op: Int, tp: String) {
+        override def visitTypeInsn(op: Int, tp: String): Unit = {
           if (op == NEW && tp.contains("scala/runtime/NonLocalReturnControl") && isTargetMethod) {
             throw new ReturnStatementInClosureException
           }
@@ -480,7 +480,7 @@ private[util] class FieldAccessFinder(
     }
 
     new MethodVisitor(ASM7) {
-      override def visitFieldInsn(op: Int, owner: String, name: String, desc: String) {
+      override def visitFieldInsn(op: Int, owner: String, name: String, desc: String): Unit = {
         if (op == GETFIELD) {
           for (cl <- fields.keys if cl.getName == owner.replace('/', '.')) {
             fields(cl) += name
@@ -489,7 +489,7 @@ private[util] class FieldAccessFinder(
       }
 
       override def visitMethodInsn(
-          op: Int, owner: String, name: String, desc: String, itf: Boolean) {
+          op: Int, owner: String, name: String, desc: String, itf: Boolean): Unit = {
         for (cl <- fields.keys if cl.getName == owner.replace('/', '.')) {
           // Check for calls a getter method for a variable in an interpreter wrapper object.
           // This means that the corresponding field will be accessed, so we should save it.
@@ -528,7 +528,7 @@ private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM
   // The second closure technically has two inner closures, but this finder only finds one
 
   override def visit(version: Int, access: Int, name: String, sig: String,
-      superName: String, interfaces: Array[String]) {
+      superName: String, interfaces: Array[String]): Unit = {
     myName = name
   }
 
@@ -536,7 +536,7 @@ private class InnerClosureFinder(output: Set[Class[_]]) extends ClassVisitor(ASM
       sig: String, exceptions: Array[String]): MethodVisitor = {
     new MethodVisitor(ASM7) {
       override def visitMethodInsn(
-          op: Int, owner: String, name: String, desc: String, itf: Boolean) {
+          op: Int, owner: String, name: String, desc: String, itf: Boolean): Unit = {
         val argTypes = Type.getArgumentTypes(desc)
         if (op == INVOKESPECIAL && name == "<init>" && argTypes.length > 0
             && argTypes(0).toString.startsWith("L") // is it an object?
diff --git a/core/src/main/scala/org/apache/spark/util/Distribution.scala b/core/src/main/scala/org/apache/spark/util/Distribution.scala
index 240dcfbab60ac..550884c873297 100644
--- a/core/src/main/scala/org/apache/spark/util/Distribution.scala
+++ b/core/src/main/scala/org/apache/spark/util/Distribution.scala
@@ -65,7 +65,7 @@ private[spark] class Distribution(val data: Array[Double], val startIdx: Int, va
    * print a summary of this distribution to the given PrintStream.
    * @param out
    */
-  def summary(out: PrintStream = System.out) {
+  def summary(out: PrintStream = System.out): Unit = {
     // scalastyle:off println
     out.println(statCounter)
     showQuantiles(out)
@@ -83,7 +83,7 @@ private[spark] object Distribution {
     }
   }
 
-  def showQuantiles(out: PrintStream = System.out, quantiles: Iterable[Double]) {
+  def showQuantiles(out: PrintStream = System.out, quantiles: Iterable[Double]): Unit = {
     // scalastyle:off println
     out.println("min\t25%\t50%\t75%\tmax")
     quantiles.foreach{q => out.print(q + "\t")}
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 73ef80980e73f..4d89c4f079f29 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -391,6 +391,7 @@ private[spark] object JsonProtocol {
     ("Executor Deserialize CPU Time" -> taskMetrics.executorDeserializeCpuTime) ~
     ("Executor Run Time" -> taskMetrics.executorRunTime) ~
     ("Executor CPU Time" -> taskMetrics.executorCpuTime) ~
+    ("Peak Execution Memory" -> taskMetrics.peakExecutionMemory) ~
     ("Result Size" -> taskMetrics.resultSize) ~
     ("JVM GC Time" -> taskMetrics.jvmGCTime) ~
     ("Result Serialization Time" -> taskMetrics.resultSerializationTime) ~
@@ -420,6 +421,7 @@ private[spark] object JsonProtocol {
         ("Block Manager Address" -> blockManagerAddress) ~
         ("Shuffle ID" -> fetchFailed.shuffleId) ~
         ("Map ID" -> fetchFailed.mapId) ~
+        ("Map Index" -> fetchFailed.mapIndex) ~
         ("Reduce ID" -> fetchFailed.reduceId) ~
         ("Message" -> fetchFailed.message)
       case exceptionFailure: ExceptionFailure =>
@@ -893,6 +895,10 @@ private[spark] object JsonProtocol {
       case JNothing => 0
       case x => x.extract[Long]
     })
+    metrics.setPeakExecutionMemory((json \ "Peak Execution Memory") match {
+      case JNothing => 0
+      case x => x.extract[Long]
+    })
     metrics.setResultSize((json \ "Result Size").extract[Long])
     metrics.setJvmGCTime((json \ "JVM GC Time").extract[Long])
     metrics.setResultSerializationTime((json \ "Result Serialization Time").extract[Long])
@@ -974,10 +980,11 @@ private[spark] object JsonProtocol {
       case `fetchFailed` =>
         val blockManagerAddress = blockManagerIdFromJson(json \ "Block Manager Address")
         val shuffleId = (json \ "Shuffle ID").extract[Int]
-        val mapId = (json \ "Map ID").extract[Int]
+        val mapId = (json \ "Map ID").extract[Long]
+        val mapIndex = (json \ "Map Index").extract[Int]
         val reduceId = (json \ "Reduce ID").extract[Int]
         val message = jsonOption(json \ "Message").map(_.extract[String])
-        new FetchFailed(blockManagerAddress, shuffleId, mapId, reduceId,
+        new FetchFailed(blockManagerAddress, shuffleId, mapId, mapIndex, reduceId,
           message.getOrElse("Unknown reason"))
       case `exceptionFailure` =>
         val className = (json \ "Class Name").extract[String]
diff --git a/core/src/main/scala/org/apache/spark/util/NextIterator.scala b/core/src/main/scala/org/apache/spark/util/NextIterator.scala
index 0b505a576768c..0e289025da110 100644
--- a/core/src/main/scala/org/apache/spark/util/NextIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/NextIterator.scala
@@ -50,7 +50,7 @@ private[spark] abstract class NextIterator[U] extends Iterator[U] {
    * Ideally you should have another try/catch, as in HadoopRDD, that
    * ensures any resources are closed should iteration fail.
    */
-  protected def close()
+  protected def close(): Unit
 
   /**
    * Calls the subclass-defined close method, but only once.
@@ -58,7 +58,7 @@ private[spark] abstract class NextIterator[U] extends Iterator[U] {
    * Usually calling `close` multiple times should be fine, but historically
    * there have been issues with some InputFormats throwing exceptions.
    */
-  def closeIfNeeded() {
+  def closeIfNeeded(): Unit = {
     if (!closed) {
       // Note: it's important that we set closed = true before calling close(), since setting it
       // afterwards would permit us to call close() multiple times if close() threw an exception.
diff --git a/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala b/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
index c105f3229af09..f01645d82303e 100644
--- a/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
+++ b/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
@@ -24,7 +24,6 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
-import org.apache.spark.storage.StorageLevel
 
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala b/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala
index 3354a923273ff..42d7f71404594 100644
--- a/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala
+++ b/core/src/main/scala/org/apache/spark/util/SerializableConfiguration.scala
@@ -20,7 +20,14 @@ import java.io.{ObjectInputStream, ObjectOutputStream}
 
 import org.apache.hadoop.conf.Configuration
 
-private[spark]
+import org.apache.spark.annotation.{DeveloperApi, Unstable}
+
+/**
+ * Hadoop configuration but serializable. Use `value` to access the Hadoop configuration.
+ *
+ * @param value Hadoop configuration
+ */
+@DeveloperApi @Unstable
 class SerializableConfiguration(@transient var value: Configuration) extends Serializable {
   private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
     out.defaultWriteObject()
diff --git a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
index b702838fa257f..4f1311224bb95 100644
--- a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
+++ b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
@@ -70,7 +70,7 @@ private[spark] object ShutdownHookManager extends Logging {
   }
 
   // Register the path to be deleted via shutdown hook
-  def registerShutdownDeleteDir(file: File) {
+  def registerShutdownDeleteDir(file: File): Unit = {
     val absolutePath = file.getAbsolutePath()
     shutdownDeletePaths.synchronized {
       shutdownDeletePaths += absolutePath
@@ -78,7 +78,7 @@ private[spark] object ShutdownHookManager extends Logging {
   }
 
   // Remove the path to be deleted via shutdown hook
-  def removeShutdownDeleteDir(file: File) {
+  def removeShutdownDeleteDir(file: File): Unit = {
     val absolutePath = file.getAbsolutePath()
     shutdownDeletePaths.synchronized {
       shutdownDeletePaths.remove(absolutePath)
@@ -120,7 +120,7 @@ private[spark] object ShutdownHookManager extends Logging {
   def inShutdown(): Boolean = {
     try {
       val hook = new Thread {
-        override def run() {}
+        override def run(): Unit = {}
       }
       // scalastyle:off runtimeaddshutdownhook
       Runtime.getRuntime.addShutdownHook(hook)
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 09c69f5c68b03..2caf2a36f9dc6 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -107,7 +107,7 @@ object SizeEstimator extends Logging {
 
   // Sets object size, pointer size based on architecture and CompressedOops settings
   // from the JVM.
-  private def initialize() {
+  private def initialize(): Unit = {
     val arch = System.getProperty("os.arch")
     is64bit = arch.contains("64") || arch.contains("s390x")
     isCompressedOops = getIsCompressedOops
@@ -171,7 +171,7 @@ object SizeEstimator extends Logging {
     val stack = new ArrayBuffer[AnyRef]
     var size = 0L
 
-    def enqueue(obj: AnyRef) {
+    def enqueue(obj: AnyRef): Unit = {
       if (obj != null && !visited.containsKey(obj)) {
         visited.put(obj, null)
         stack += obj
@@ -205,7 +205,7 @@ object SizeEstimator extends Logging {
     state.size
   }
 
-  private def visitSingleObject(obj: AnyRef, state: SearchState) {
+  private def visitSingleObject(obj: AnyRef, state: SearchState): Unit = {
     val cls = obj.getClass
     if (cls.isArray) {
       visitArray(obj, cls, state)
@@ -234,7 +234,7 @@ object SizeEstimator extends Logging {
   private val ARRAY_SIZE_FOR_SAMPLING = 400
   private val ARRAY_SAMPLE_SIZE = 100 // should be lower than ARRAY_SIZE_FOR_SAMPLING
 
-  private def visitArray(array: AnyRef, arrayClass: Class[_], state: SearchState) {
+  private def visitArray(array: AnyRef, arrayClass: Class[_], state: SearchState): Unit = {
     val length = ScalaRunTime.array_length(array)
     val elementClass = arrayClass.getComponentType()
 
diff --git a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
index 1b34fbde38cd6..2550634681453 100644
--- a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
+++ b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
@@ -28,7 +28,7 @@ import org.apache.spark.internal.Logging
 private[spark] class SparkUncaughtExceptionHandler(val exitOnUncaughtException: Boolean = true)
   extends Thread.UncaughtExceptionHandler with Logging {
 
-  override def uncaughtException(thread: Thread, exception: Throwable) {
+  override def uncaughtException(thread: Thread, exception: Throwable): Unit = {
     try {
       // Make it explicit that uncaught exceptions are thrown when container is shutting down.
       // It will help users when they analyze the executor logs
@@ -56,7 +56,7 @@ private[spark] class SparkUncaughtExceptionHandler(val exitOnUncaughtException:
     }
   }
 
-  def uncaughtException(exception: Throwable) {
+  def uncaughtException(exception: Throwable): Unit = {
     uncaughtException(Thread.currentThread(), exception)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
index 32af0127bbf38..550e0674a14e0 100644
--- a/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
@@ -81,7 +81,7 @@ private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = fa
     this
   }
 
-  override def update(key: A, value: B) {
+  override def update(key: A, value: B): Unit = {
     this += ((key, value))
   }
 
@@ -97,7 +97,7 @@ private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = fa
 
   override def size: Int = internalMap.size
 
-  override def foreach[U](f: ((A, B)) => U) {
+  override def foreach[U](f: ((A, B)) => U): Unit = {
     val it = getEntrySet.iterator
     while(it.hasNext) {
       val entry = it.next()
@@ -111,13 +111,13 @@ private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = fa
     Option(prev).map(_.value)
   }
 
-  def putAll(map: Map[A, B]) {
+  def putAll(map: Map[A, B]): Unit = {
     map.foreach { case (k, v) => update(k, v) }
   }
 
   def toMap: Map[A, B] = iterator.toMap
 
-  def clearOldValues(threshTime: Long, f: (A, B) => Unit) {
+  def clearOldValues(threshTime: Long, f: (A, B) => Unit): Unit = {
     val it = getEntrySet.iterator
     while (it.hasNext) {
       val entry = it.next()
@@ -130,7 +130,7 @@ private[spark] class TimeStampedHashMap[A, B](updateTimeStampOnGet: Boolean = fa
   }
 
   /** Removes old key-value pairs that have timestamp earlier than `threshTime`. */
-  def clearOldValues(threshTime: Long) {
+  def clearOldValues(threshTime: Long): Unit = {
     clearOldValues(threshTime, (_, _) => ())
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 9c1f21fa236ba..f853ec8368366 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -999,7 +999,7 @@ private[spark] object Utils extends Logging {
    * Allow setting a custom host name because when we run on Mesos we need to use the same
    * hostname it reports to the master.
    */
-  def setCustomHostname(hostname: String) {
+  def setCustomHostname(hostname: String): Unit = {
     // DEBUG code
     Utils.checkHost(hostname)
     customHostname = Some(hostname)
@@ -1026,11 +1026,11 @@ private[spark] object Utils extends Logging {
     customHostname.getOrElse(InetAddresses.toUriString(localIpAddress))
   }
 
-  def checkHost(host: String) {
+  def checkHost(host: String): Unit = {
     assert(host != null && host.indexOf(':') == -1, s"Expected hostname (not IP) but got $host")
   }
 
-  def checkHostPort(hostPort: String) {
+  def checkHostPort(hostPort: String): Unit = {
     assert(hostPort != null && hostPort.indexOf(':') != -1,
       s"Expected host and port but got $hostPort")
   }
@@ -1280,7 +1280,7 @@ private[spark] object Utils extends Logging {
       inputStream: InputStream,
       processLine: String => Unit): Thread = {
     val t = new Thread(threadName) {
-      override def run() {
+      override def run(): Unit = {
         for (line <- Source.fromInputStream(inputStream).getLines()) {
           processLine(line)
         }
@@ -1297,7 +1297,7 @@ private[spark] object Utils extends Logging {
    *
    * NOTE: This method is to be called by the spark-started JVM process.
    */
-  def tryOrExit(block: => Unit) {
+  def tryOrExit(block: => Unit): Unit = {
     try {
       block
     } catch {
@@ -1314,7 +1314,7 @@ private[spark] object Utils extends Logging {
    * user-started JVM process completely; in contrast, tryOrExit is to be called in the
    * spark-started JVM process .
    */
-  def tryOrStopSparkContext(sc: SparkContext)(block: => Unit) {
+  def tryOrStopSparkContext(sc: SparkContext)(block: => Unit): Unit = {
     try {
       block
     } catch {
@@ -1352,7 +1352,7 @@ private[spark] object Utils extends Logging {
   }
 
   /** Executes the given block. Log non-fatal errors if any, and only throw fatal errors */
-  def tryLogNonFatalError(block: => Unit) {
+  def tryLogNonFatalError(block: => Unit): Unit = {
     try {
       block
     } catch {
@@ -1671,7 +1671,7 @@ private[spark] object Utils extends Logging {
     var inSingleQuote = false
     var inDoubleQuote = false
     val curWord = new StringBuilder
-    def endWord() {
+    def endWord(): Unit = {
       buf += curWord.toString
       curWord.clear()
     }
@@ -2342,7 +2342,7 @@ private[spark] object Utils extends Logging {
   /**
    * configure a new log4j level
    */
-  def setLogLevel(l: org.apache.log4j.Level) {
+  def setLogLevel(l: org.apache.log4j.Level): Unit = {
     val rootLogger = org.apache.log4j.Logger.getRootLogger()
     rootLogger.setLevel(l)
     // Setting threshold to null as rootLevel will define log level for spark-shell
@@ -2950,6 +2950,13 @@ private[spark] object Utils extends Logging {
     val codec = codecFactory.getCodec(path)
     codec == null || codec.isInstanceOf[SplittableCompressionCodec]
   }
+
+  /** Create a new properties object with the same values as `props` */
+  def cloneProperties(props: Properties): Properties = {
+    val resultProps = new Properties()
+    props.forEach((k, v) => resultProps.put(k, v))
+    resultProps
+  }
 }
 
 private[util] object CallerContext extends Logging {
@@ -3033,7 +3040,8 @@ private[spark] class CallerContext(
     if (CallerContext.callerContextSupported) {
       try {
         val callerContext = Utils.classForName("org.apache.hadoop.ipc.CallerContext")
-        val builder = Utils.classForName("org.apache.hadoop.ipc.CallerContext$Builder")
+        val builder: Class[AnyRef] =
+          Utils.classForName("org.apache.hadoop.ipc.CallerContext$Builder")
         val builderInst = builder.getConstructor(classOf[String]).newInstance(context)
         val hdfsContext = builder.getMethod("build").invoke(builderInst)
         callerContext.getMethod("setCurrent", callerContext).invoke(null, hdfsContext)
@@ -3056,7 +3064,7 @@ private[spark] class RedirectThread(
   extends Thread(name) {
 
   setDaemon(true)
-  override def run() {
+  override def run(): Unit = {
     scala.util.control.Exception.ignoring(classOf[IOException]) {
       // FIXME: We copy the stream on the level of bytes to avoid encoding problems.
       Utils.tryWithSafeFinally {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
index bcb95b416dd25..46e311d8b0476 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
@@ -198,7 +198,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64)
   override def size: Int = curSize
 
   /** Increase table size by 1, rehashing if necessary */
-  private def incrementSize() {
+  private def incrementSize(): Unit = {
     curSize += 1
     if (curSize > growThreshold) {
       growTable()
@@ -211,7 +211,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64)
   private def rehash(h: Int): Int = Hashing.murmur3_32().hashInt(h).asInt()
 
   /** Double the table's size and re-hash everything */
-  protected def growTable() {
+  protected def growTable(): Unit = {
     // capacity < MAXIMUM_CAPACITY (2 ^ 29) so capacity * 2 won't overflow
     val newCapacity = capacity * 2
     require(newCapacity <= MAXIMUM_CAPACITY, s"Can't contain more than ${growThreshold} elements")
diff --git a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
index e63e0e3e1f68f..098f389829ec5 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
@@ -150,12 +150,12 @@ class BitSet(numBits: Int) extends Serializable {
    * Sets the bit at the specified index to true.
    * @param index the bit index
    */
-  def set(index: Int) {
+  def set(index: Int): Unit = {
     val bitmask = 1L << (index & 0x3f)  // mod 64 and shift
     words(index >> 6) |= bitmask        // div by 64 and mask
   }
 
-  def unset(index: Int) {
+  def unset(index: Int): Unit = {
     val bitmask = 1L << (index & 0x3f)  // mod 64 and shift
     words(index >> 6) &= ~bitmask        // div by 64 and mask
   }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 1ba3b7875f8dc..14409c3661baa 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -549,7 +549,7 @@ class ExternalAppendOnlyMap[K, V, C](
       item
     }
 
-    private def cleanup() {
+    private def cleanup(): Unit = {
       batchIndex = batchOffsets.length  // Prevent reading any other batch
       if (deserializeStream != null) {
         deserializeStream.close()
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index 7a822e137e556..cc97bbfa7201f 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -23,7 +23,7 @@ import java.util.Comparator
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
-import com.google.common.io.{ByteStreams, Closeables}
+import com.google.common.io.ByteStreams
 
 import org.apache.spark._
 import org.apache.spark.executor.ShuffleWriteMetrics
@@ -534,7 +534,7 @@ private[spark] class ExternalSorter[K, V, C](
      * Update partitionId if we have reached the end of our current partition, possibly skipping
      * empty partitions on the way.
      */
-    private def skipToNextPartition() {
+    private def skipToNextPartition(): Unit = {
       while (partitionId < numPartitions &&
           indexInPartition == spill.elementsPerPartition(partitionId)) {
         partitionId += 1
@@ -605,7 +605,7 @@ private[spark] class ExternalSorter[K, V, C](
     }
 
     // Clean up our open streams and put us in a state where we can't read any more data
-    def cleanup() {
+    def cleanup(): Unit = {
       batchId = batchOffsets.length  // Prevent reading any other batch
       val ds = deserializeStream
       deserializeStream = null
@@ -727,7 +727,7 @@ private[spark] class ExternalSorter[K, V, C](
    */
   def writePartitionedMapOutput(
       shuffleId: Int,
-      mapId: Int,
+      mapId: Long,
       mapOutputWriter: ShuffleMapOutputWriter): Unit = {
     var nextPartitionId = 0
     if (spills.isEmpty) {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
index 10ab0b3f89964..1200ac001cce7 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
@@ -76,7 +76,7 @@ class OpenHashMap[K : ClassTag, @specialized(Long, Int, Double) V: ClassTag](
   }
 
   /** Set the value for a key */
-  def update(k: K, v: V) {
+  def update(k: K, v: V): Unit = {
     if (k == null) {
       haveNullValue = true
       nullValue = v
diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
index 8883e17bf3164..6815e47a198d9 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
@@ -113,7 +113,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
    * Add an element to the set. If the set is over capacity after the insertion, grow the set
    * and rehash all elements.
    */
-  def add(k: T) {
+  def add(k: T): Unit = {
     addWithoutResize(k)
     rehashIfNeeded(k, grow, move)
   }
@@ -166,7 +166,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
    * @param moveFunc Callback invoked when we move the key from one position (in the old data array)
    *                 to a new position (in the new data array).
    */
-  def rehashIfNeeded(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit) {
+  def rehashIfNeeded(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit): Unit = {
     if (_size > _growThreshold) {
       rehash(k, allocateFunc, moveFunc)
     }
@@ -227,7 +227,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
    * @param moveFunc Callback invoked when we move the key from one position (in the old data array)
    *                 to a new position (in the new data array).
    */
-  private def rehash(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit) {
+  private def rehash(k: T, allocateFunc: (Int) => Unit, moveFunc: (Int, Int) => Unit): Unit = {
     val newCapacity = _capacity * 2
     require(newCapacity > 0 && newCapacity <= OpenHashSet.MAX_CAPACITY,
       s"Can't contain more than ${(loadFactor * OpenHashSet.MAX_CAPACITY).toInt} elements")
@@ -320,8 +320,8 @@ object OpenHashSet {
     override def hash(o: Float): Int = java.lang.Float.floatToIntBits(o)
   }
 
-  private def grow1(newSize: Int) {}
-  private def move1(oldPos: Int, newPos: Int) { }
+  private def grow1(newSize: Int): Unit = {}
+  private def move1(oldPos: Int, newPos: Int): Unit = { }
 
   private val grow = grow1 _
   private val move = move1 _
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
index b4ec4ea521253..7a50d851941ee 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMap.scala
@@ -66,7 +66,7 @@ class PrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassTag,
   }
 
   /** Set the value for a key */
-  def update(k: K, v: V) {
+  def update(k: K, v: V): Unit = {
     val pos = _keySet.addWithoutResize(k) & OpenHashSet.POSITION_MASK
     _values(pos) = v
     _keySet.rehashIfNeeded(k, grow, move)
diff --git a/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala b/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
index 9a7a5a4e74868..582bd124b5116 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/SortDataFormat.scala
@@ -87,7 +87,7 @@ class KVArraySortDataFormat[K, T <: AnyRef : ClassTag] extends SortDataFormat[K,
 
   override def getKey(data: Array[T], pos: Int): K = data(2 * pos).asInstanceOf[K]
 
-  override def swap(data: Array[T], pos0: Int, pos1: Int) {
+  override def swap(data: Array[T], pos0: Int, pos1: Int): Unit = {
     val tmpKey = data(2 * pos0)
     val tmpVal = data(2 * pos0 + 1)
     data(2 * pos0) = data(2 * pos1)
@@ -96,12 +96,13 @@ class KVArraySortDataFormat[K, T <: AnyRef : ClassTag] extends SortDataFormat[K,
     data(2 * pos1 + 1) = tmpVal
   }
 
-  override def copyElement(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int) {
+  override def copyElement(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int): Unit = {
     dst(2 * dstPos) = src(2 * srcPos)
     dst(2 * dstPos + 1) = src(2 * srcPos + 1)
   }
 
-  override def copyRange(src: Array[T], srcPos: Int, dst: Array[T], dstPos: Int, length: Int) {
+  override def copyRange(src: Array[T], srcPos: Int,
+      dst: Array[T], dstPos: Int, length: Int): Unit = {
     System.arraycopy(src, 2 * srcPos, dst, 2 * dstPos, 2 * length)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
index bfc0face5d8e5..1983b0002853d 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/Spillable.scala
@@ -141,7 +141,7 @@ private[spark] abstract class Spillable[C](taskMemoryManager: TaskMemoryManager)
    *
    * @param size number of bytes spilled
    */
-  @inline private def logSpillage(size: Long) {
+  @inline private def logSpillage(size: Long): Unit = {
     val threadId = Thread.currentThread().getId
     logInfo("Thread %d spilling in-memory map of %s to disk (%d time%s so far)"
       .format(threadId, org.apache.spark.util.Utils.bytesToString(size),
diff --git a/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala b/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
index da8d58d05b6b9..9624b02cb407c 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/WritablePartitionedPairCollection.scala
@@ -19,7 +19,6 @@ package org.apache.spark.util.collection
 
 import java.util.Comparator
 
-import org.apache.spark.storage.DiskBlockObjectWriter
 
 /**
  * A common interface for size-tracking collections of key-value pairs that
diff --git a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
index c4540433bce97..4c1b49762ace3 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
@@ -18,15 +18,18 @@
 package org.apache.spark.util.logging
 
 import java.io._
+import java.util.EnumSet
 import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
 
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
 import org.apache.hadoop.fs.permission.FsPermission
+import org.apache.hadoop.hdfs.client.HdfsDataOutputStream
 import org.apache.log4j.{FileAppender => Log4jFileAppender, _}
 
 import org.apache.spark.SparkConf
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.JavaUtils
@@ -111,7 +114,8 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging {
         + DriverLogger.DRIVER_LOG_FILE_SUFFIX).getAbsolutePath()
       try {
         inStream = new BufferedInputStream(new FileInputStream(localLogFile))
-        outputStream = fileSystem.create(new Path(dfsLogFile), true)
+        outputStream = SparkHadoopUtil.createFile(fileSystem, new Path(dfsLogFile),
+          conf.get(DRIVER_LOG_ALLOW_EC))
         fileSystem.setPermission(new Path(dfsLogFile), LOG_FILE_PERMISSIONS)
       } catch {
         case e: Exception =>
@@ -131,12 +135,20 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging {
       }
       try {
         var remaining = inStream.available()
+        val hadData = remaining > 0
         while (remaining > 0) {
           val read = inStream.read(tmpBuffer, 0, math.min(remaining, UPLOAD_CHUNK_SIZE))
           outputStream.write(tmpBuffer, 0, read)
           remaining -= read
         }
-        outputStream.hflush()
+        if (hadData) {
+          outputStream match {
+            case hdfsStream: HdfsDataOutputStream =>
+              hdfsStream.hsync(EnumSet.allOf(classOf[HdfsDataOutputStream.SyncFlag]))
+            case other =>
+              other.hflush()
+          }
+        }
       } catch {
         case e: Exception => logError("Failed writing driver logs to dfs", e)
       }
diff --git a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
index 3188e0bd2b70d..7107be25eb505 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
@@ -34,7 +34,7 @@ private[spark] class FileAppender(inputStream: InputStream, file: File, bufferSi
   // Thread that reads the input stream and writes to file
   private val writingThread = new Thread("File appending thread for " + file) {
     setDaemon(true)
-    override def run() {
+    override def run(): Unit = {
       Utils.logUncaughtExceptions {
         appendStreamToFile()
       }
@@ -46,17 +46,17 @@ private[spark] class FileAppender(inputStream: InputStream, file: File, bufferSi
    * Wait for the appender to stop appending, either because input stream is closed
    * or because of any error in appending
    */
-  def awaitTermination() {
+  def awaitTermination(): Unit = {
     writingThread.join()
   }
 
   /** Stop the appender */
-  def stop() {
+  def stop(): Unit = {
     markedForStop = true
   }
 
   /** Continuously read chunks from the input stream and append to the file */
-  protected def appendStreamToFile() {
+  protected def appendStreamToFile(): Unit = {
     try {
       logDebug("Started appending thread")
       Utils.tryWithSafeFinally {
@@ -85,7 +85,7 @@ private[spark] class FileAppender(inputStream: InputStream, file: File, bufferSi
   }
 
   /** Append bytes to the file output stream */
-  protected def appendToFile(bytes: Array[Byte], len: Int) {
+  protected def appendToFile(bytes: Array[Byte], len: Int): Unit = {
     if (outputStream == null) {
       openFile()
     }
@@ -93,13 +93,13 @@ private[spark] class FileAppender(inputStream: InputStream, file: File, bufferSi
   }
 
   /** Open the file output stream */
-  protected def openFile() {
+  protected def openFile(): Unit = {
     outputStream = new FileOutputStream(file, true)
     logDebug(s"Opened file $file")
   }
 
   /** Close the file output stream */
-  protected def closeFile() {
+  protected def closeFile(): Unit = {
     outputStream.flush()
     outputStream.close()
     logDebug(s"Closed file $file")
diff --git a/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
index 59439b68792e5..b73f422649312 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
@@ -49,12 +49,12 @@ private[spark] class RollingFileAppender(
   private val enableCompression = conf.get(config.EXECUTOR_LOGS_ROLLING_ENABLE_COMPRESSION)
 
   /** Stop the appender */
-  override def stop() {
+  override def stop(): Unit = {
     super.stop()
   }
 
   /** Append bytes to file after rolling over is necessary */
-  override protected def appendToFile(bytes: Array[Byte], len: Int) {
+  override protected def appendToFile(bytes: Array[Byte], len: Int): Unit = {
     if (rollingPolicy.shouldRollover(len)) {
       rollover()
       rollingPolicy.rolledOver()
@@ -64,7 +64,7 @@ private[spark] class RollingFileAppender(
   }
 
   /** Rollover the file, by closing the output stream and moving it over */
-  private def rollover() {
+  private def rollover(): Unit = {
     try {
       closeFile()
       moveFile()
@@ -106,7 +106,7 @@ private[spark] class RollingFileAppender(
   }
 
   /** Move the active log file to a new rollover file */
-  private def moveFile() {
+  private def moveFile(): Unit = {
     val rolloverSuffix = rollingPolicy.generateRolledOverFileSuffix()
     val rolloverFile = new File(
       activeFile.getParentFile, activeFile.getName + rolloverSuffix).getAbsoluteFile
@@ -138,7 +138,7 @@ private[spark] class RollingFileAppender(
   }
 
   /** Retain only last few files */
-  private[util] def deleteOldFiles() {
+  private[util] def deleteOldFiles(): Unit = {
     try {
       val rolledoverFiles = activeFile.getParentFile.listFiles(new FileFilter {
         def accept(f: File): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala b/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
index 1f263df57c857..5327ecd3e56a9 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
@@ -67,12 +67,12 @@ private[spark] class TimeBasedRollingPolicy(
   }
 
   /** Rollover has occurred, so find the next time to rollover */
-  def rolledOver() {
+  def rolledOver(): Unit = {
     nextRolloverTime = calculateNextRolloverTime()
     logDebug(s"Current time: ${System.currentTimeMillis}, next rollover time: " + nextRolloverTime)
   }
 
-  def bytesWritten(bytes: Long) { }  // nothing to do
+  def bytesWritten(bytes: Long): Unit = { }  // nothing to do
 
   private def calculateNextRolloverTime(): Long = {
     val now = System.currentTimeMillis()
@@ -118,12 +118,12 @@ private[spark] class SizeBasedRollingPolicy(
   }
 
   /** Rollover has occurred, so reset the counter */
-  def rolledOver() {
+  def rolledOver(): Unit = {
     bytesWrittenSinceRollover = 0
   }
 
   /** Increment the bytes that have been written in the current file */
-  def bytesWritten(bytes: Long) {
+  def bytesWritten(bytes: Long): Unit = {
     bytesWrittenSinceRollover += bytes
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
index 70554f1d03067..6dd2beebbb3dc 100644
--- a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
@@ -201,7 +201,7 @@ class PoissonSampler[T](
   private val rng = new PoissonDistribution(if (fraction > 0.0) fraction else 1.0)
   private val rngGap = RandomSampler.newDefaultRNG
 
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     rng.reseedRandomGenerator(seed)
     rngGap.setSeed(seed)
   }
diff --git a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
index af09e50a157ae..313569a81646d 100644
--- a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
@@ -49,7 +49,7 @@ private[spark] class XORShiftRandom(init: Long) extends JavaRandom(init) {
     (nextSeed & ((1L << bits) -1)).asInstanceOf[Int]
   }
 
-  override def setSeed(s: Long) {
+  override def setSeed(s: Long): Unit = {
     seed = XORShiftRandom.hashSeed(s)
   }
 }
@@ -60,7 +60,7 @@ private[spark] object XORShiftRandom {
   /** Hash seeds to have 0/1 bits throughout. */
   private[random] def hashSeed(seed: Long): Long = {
     val bytes = ByteBuffer.allocate(java.lang.Long.BYTES).putLong(seed).array()
-    val lowBits = MurmurHash3.bytesHash(bytes)
+    val lowBits = MurmurHash3.bytesHash(bytes, MurmurHash3.arraySeed)
     val highBits = MurmurHash3.bytesHash(bytes, lowBits)
     (highBits.toLong << 32) | (lowBits.toLong & 0xFFFFFFFFL)
   }
diff --git a/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java b/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java
index 80cd70282a51d..ade13f02bde73 100644
--- a/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java
+++ b/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java
@@ -17,6 +17,8 @@
 
 package org.apache.spark;
 
+import com.codahale.metrics.Gauge;
+import com.codahale.metrics.MetricRegistry;
 import org.apache.spark.api.java.JavaSparkContext;
 
 import org.junit.After;
@@ -30,6 +32,7 @@ public class ExecutorPluginSuite {
   private static final String testBadPluginName = TestBadShutdownPlugin.class.getName();
   private static final String testPluginName = TestExecutorPlugin.class.getName();
   private static final String testSecondPluginName = TestSecondPlugin.class.getName();
+  private static final String testMetricsPluginName = TestMetricsPlugin.class.getName();
 
   // Static value modified by testing plugins to ensure plugins loaded correctly.
   public static int numSuccessfulPlugins = 0;
@@ -37,6 +40,10 @@ public class ExecutorPluginSuite {
   // Static value modified by testing plugins to verify plugins shut down properly.
   public static int numSuccessfulTerminations = 0;
 
+  // Static values modified by testing plugins to ensure metrics have been registered correctly.
+  public static MetricRegistry testMetricRegistry;
+  public static String gaugeName;
+
   private JavaSparkContext sc;
 
   @Before
@@ -107,8 +114,21 @@ public void testPluginShutdownWithException() {
     assertEquals(2, numSuccessfulTerminations);
   }
 
+  @Test
+  public void testPluginMetrics() {
+    // Verify that a custom metric is registered with the Spark metrics system
+    gaugeName = "test42";
+    SparkConf conf = initializeSparkConf(testMetricsPluginName);
+    sc = new JavaSparkContext(conf);
+    assertEquals(1, numSuccessfulPlugins);
+    assertEquals(gaugeName, testMetricRegistry.getGauges().firstKey());
+    sc.stop();
+    sc = null;
+    assertEquals(1, numSuccessfulTerminations);
+  }
+
   public static class TestExecutorPlugin implements ExecutorPlugin {
-    public void init() {
+    public void init(ExecutorPluginContext pluginContext) {
       ExecutorPluginSuite.numSuccessfulPlugins++;
     }
 
@@ -118,7 +138,7 @@ public void shutdown() {
   }
 
   public static class TestSecondPlugin implements ExecutorPlugin {
-    public void init() {
+    public void init(ExecutorPluginContext pluginContext) {
       ExecutorPluginSuite.numSuccessfulPlugins++;
     }
 
@@ -128,7 +148,7 @@ public void shutdown() {
   }
 
   public static class TestBadShutdownPlugin implements ExecutorPlugin {
-    public void init() {
+    public void init(ExecutorPluginContext pluginContext) {
       ExecutorPluginSuite.numSuccessfulPlugins++;
     }
 
@@ -136,4 +156,24 @@ public void shutdown() {
       throw new RuntimeException("This plugin will fail to cleanly shut down");
     }
   }
+
+  public static class TestMetricsPlugin implements ExecutorPlugin {
+    public void init(ExecutorPluginContext myContext) {
+      MetricRegistry metricRegistry = myContext.metricRegistry;
+      // Registers a dummy metrics gauge for testing
+      String gaugeName = ExecutorPluginSuite.gaugeName;
+      metricRegistry.register(MetricRegistry.name(gaugeName), new Gauge<Integer>() {
+        @Override
+        public Integer getValue() {
+          return 42;
+        }
+      });
+      ExecutorPluginSuite.testMetricRegistry = metricRegistry;
+      ExecutorPluginSuite.numSuccessfulPlugins++;
+     }
+
+    public void shutdown() {
+      ExecutorPluginSuite.numSuccessfulTerminations++;
+    }
+  }
 }
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index 6b83a984f037c..10e6936eb3799 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -19,8 +19,10 @@
 
 import java.io.*;
 import java.nio.ByteBuffer;
+import java.nio.file.Files;
 import java.util.*;
 
+import org.mockito.stubbing.Answer;
 import scala.Option;
 import scala.Product2;
 import scala.Tuple2;
@@ -53,6 +55,7 @@
 import org.apache.spark.security.CryptoStreamUtils;
 import org.apache.spark.serializer.*;
 import org.apache.spark.shuffle.IndexShuffleBlockResolver;
+import org.apache.spark.shuffle.sort.io.LocalDiskShuffleExecutorComponents;
 import org.apache.spark.storage.*;
 import org.apache.spark.util.Utils;
 
@@ -65,6 +68,7 @@
 
 public class UnsafeShuffleWriterSuite {
 
+  static final int DEFAULT_INITIAL_SORT_BUFFER_SIZE = 4096;
   static final int NUM_PARTITITONS = 4;
   TestMemoryManager memoryManager;
   TaskMemoryManager taskMemoryManager;
@@ -131,15 +135,29 @@ public void setUp() throws IOException {
         );
       });
 
-    when(shuffleBlockResolver.getDataFile(anyInt(), anyInt())).thenReturn(mergedOutputFile);
-    doAnswer(invocationOnMock -> {
+    when(shuffleBlockResolver.getDataFile(anyInt(), anyLong())).thenReturn(mergedOutputFile);
+
+    Answer<?> renameTempAnswer = invocationOnMock -> {
       partitionSizesInMergedFile = (long[]) invocationOnMock.getArguments()[2];
       File tmp = (File) invocationOnMock.getArguments()[3];
-      mergedOutputFile.delete();
-      tmp.renameTo(mergedOutputFile);
+      if (!mergedOutputFile.delete()) {
+        throw new RuntimeException("Failed to delete old merged output file.");
+      }
+      if (tmp != null) {
+        Files.move(tmp.toPath(), mergedOutputFile.toPath());
+      } else if (!mergedOutputFile.createNewFile()) {
+        throw new RuntimeException("Failed to create empty merged output file.");
+      }
       return null;
-    }).when(shuffleBlockResolver)
-      .writeIndexFileAndCommit(anyInt(), anyInt(), any(long[].class), any(File.class));
+    };
+
+    doAnswer(renameTempAnswer)
+        .when(shuffleBlockResolver)
+        .writeIndexFileAndCommit(anyInt(), anyLong(), any(long[].class), any(File.class));
+
+    doAnswer(renameTempAnswer)
+        .when(shuffleBlockResolver)
+        .writeIndexFileAndCommit(anyInt(), anyLong(), any(long[].class), eq(null));
 
     when(diskBlockManager.createTempShuffleBlock()).thenAnswer(invocationOnMock -> {
       TempShuffleBlockId blockId = new TempShuffleBlockId(UUID.randomUUID());
@@ -151,21 +169,20 @@ public void setUp() throws IOException {
     when(taskContext.taskMetrics()).thenReturn(taskMetrics);
     when(shuffleDep.serializer()).thenReturn(serializer);
     when(shuffleDep.partitioner()).thenReturn(hashPartitioner);
+    when(taskContext.taskMemoryManager()).thenReturn(taskMemoryManager);
   }
 
-  private UnsafeShuffleWriter<Object, Object> createWriter(
-      boolean transferToEnabled) throws IOException {
+  private UnsafeShuffleWriter<Object, Object> createWriter(boolean transferToEnabled) {
     conf.set("spark.file.transferTo", String.valueOf(transferToEnabled));
     return new UnsafeShuffleWriter<>(
       blockManager,
-      shuffleBlockResolver,
       taskMemoryManager,
-      new SerializedShuffleHandle<>(0, 1, shuffleDep),
-      0, // map id
+      new SerializedShuffleHandle<>(0, shuffleDep),
+      0L, // map id
       taskContext,
       conf,
-      taskContext.taskMetrics().shuffleWriteMetrics()
-    );
+      taskContext.taskMetrics().shuffleWriteMetrics(),
+      new LocalDiskShuffleExecutorComponents(conf, blockManager, shuffleBlockResolver));
   }
 
   private void assertSpillFilesWereCleanedUp() {
@@ -391,7 +408,7 @@ public void mergeSpillsWithFileStreamAndCompressionAndEncryption() throws Except
 
   @Test
   public void mergeSpillsWithCompressionAndEncryptionSlowPath() throws Exception {
-    conf.set(package$.MODULE$.SHUFFLE_UNDAFE_FAST_MERGE_ENABLE(), false);
+    conf.set(package$.MODULE$.SHUFFLE_UNSAFE_FAST_MERGE_ENABLE(), false);
     testMergingSpills(false, LZ4CompressionCodec.class.getName(), true);
   }
 
@@ -444,10 +461,10 @@ public void writeEnoughRecordsToTriggerSortBufferExpansionAndSpillRadixOn() thro
   }
 
   private void writeEnoughRecordsToTriggerSortBufferExpansionAndSpill() throws Exception {
-    memoryManager.limit(UnsafeShuffleWriter.DEFAULT_INITIAL_SORT_BUFFER_SIZE * 16);
+    memoryManager.limit(DEFAULT_INITIAL_SORT_BUFFER_SIZE * 16);
     final UnsafeShuffleWriter<Object, Object> writer = createWriter(false);
     final ArrayList<Product2<Object, Object>> dataToWrite = new ArrayList<>();
-    for (int i = 0; i < UnsafeShuffleWriter.DEFAULT_INITIAL_SORT_BUFFER_SIZE + 1; i++) {
+    for (int i = 0; i < DEFAULT_INITIAL_SORT_BUFFER_SIZE + 1; i++) {
       dataToWrite.add(new Tuple2<>(i, i));
     }
     writer.write(dataToWrite.iterator());
@@ -516,16 +533,15 @@ public void testPeakMemoryUsed() throws Exception {
     final long numRecordsPerPage = pageSizeBytes / recordLengthBytes;
     taskMemoryManager = spy(taskMemoryManager);
     when(taskMemoryManager.pageSizeBytes()).thenReturn(pageSizeBytes);
-    final UnsafeShuffleWriter<Object, Object> writer =
-      new UnsafeShuffleWriter<>(
+    final UnsafeShuffleWriter<Object, Object> writer = new UnsafeShuffleWriter<>(
         blockManager,
-        shuffleBlockResolver,
         taskMemoryManager,
-        new SerializedShuffleHandle<>(0, 1, shuffleDep),
-        0, // map id
+        new SerializedShuffleHandle<>(0, shuffleDep),
+        0L, // map id
         taskContext,
         conf,
-        taskContext.taskMetrics().shuffleWriteMetrics());
+        taskContext.taskMetrics().shuffleWriteMetrics(),
+        new LocalDiskShuffleExecutorComponents(conf, blockManager, shuffleBlockResolver));
 
     // Peak memory should be monotonically increasing. More specifically, every time
     // we allocate a new page it should increase by exactly the size of the page.
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index 8d03c6778e18b..6e995a3929a75 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -34,6 +34,7 @@
 import org.apache.spark.SparkConf;
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.memory.MemoryMode;
+import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TestMemoryConsumer;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.memory.TestMemoryManager;
@@ -691,13 +692,11 @@ public void avoidDeadlock() throws InterruptedException {
 
     Thread thread = new Thread(() -> {
       int i = 0;
-      long used = 0;
       while (i < 10) {
         c1.use(10000000);
-        used += 10000000;
         i++;
       }
-      c1.free(used);
+      c1.free(c1.getUsed());
     });
 
     try {
@@ -726,4 +725,22 @@ public void avoidDeadlock() throws InterruptedException {
     }
   }
 
+  @Test
+  public void freeAfterFailedReset() {
+    // SPARK-29244: BytesToBytesMap.free after a OOM reset operation should not cause failure.
+    memoryManager.limit(5000);
+    BytesToBytesMap map =
+      new BytesToBytesMap(taskMemoryManager, blockManager, serializerManager, 256, 0.5, 4000);
+    // Force OOM on next memory allocation.
+    memoryManager.markExecutionAsOutOfMemoryOnce();
+    try {
+      map.reset();
+      Assert.fail("Expected SparkOutOfMemoryError to be thrown");
+    } catch (SparkOutOfMemoryError e) {
+      // Expected exception; do nothing.
+    } finally {
+      map.free();
+    }
+  }
+
 }
diff --git a/core/src/test/java/org/apache/spark/util/SerializableConfigurationSuite.java b/core/src/test/java/org/apache/spark/util/SerializableConfigurationSuite.java
new file mode 100644
index 0000000000000..0944d681599a1
--- /dev/null
+++ b/core/src/test/java/org/apache/spark/util/SerializableConfigurationSuite.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.conf.Configuration;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+
+import static org.junit.Assert.assertEquals;
+
+
+public class SerializableConfigurationSuite {
+  private transient JavaSparkContext sc;
+
+  @Before
+  public void setUp() {
+    sc = new JavaSparkContext("local", "SerializableConfigurationSuite");
+  }
+
+  @After
+  public void tearDown() {
+    sc.stop();
+    sc = null;
+  }
+
+  @Test
+  public void testSerializableConfiguration() {
+    JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4), 2);
+    Configuration hadoopConfiguration = new Configuration(false);
+    hadoopConfiguration.set("test.property", "value");
+    SerializableConfiguration scs = new SerializableConfiguration(hadoopConfiguration);
+    SerializableConfiguration actual = rdd.map(val -> scs).collect().get(0);
+    assertEquals(actual.value().get("test.property"), "value");
+  }
+}
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index c6aa623560d57..d5b1a1c5f547d 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -235,6 +235,9 @@ public void testSortTimeMetric() throws Exception {
     sorter.insertRecord(null, 0, 0, 0, false);
     UnsafeSorterIterator iter = sorter.getSortedIterator();
     assertThat(sorter.getSortTimeNanos(), greaterThan(prevSortTime));
+
+    sorter.cleanupResources();
+    assertSpillFilesWereCleanedUp();
   }
 
   @Test
@@ -510,6 +513,8 @@ public void testGetIterator() throws Exception {
     verifyIntIterator(sorter.getIterator(79), 79, 300);
     verifyIntIterator(sorter.getIterator(139), 139, 300);
     verifyIntIterator(sorter.getIterator(279), 279, 300);
+    sorter.cleanupResources();
+    assertSpillFilesWereCleanedUp();
   }
 
   @Test
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index 435665d8a1ce2..a75cf3f0381df 100644
--- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -126,7 +126,7 @@ private[spark] object AccumulatorSuite {
     sc.addSparkListener(listener)
     testBody
     // wait until all events have been processed before proceeding to assert things
-    sc.listenerBus.waitUntilEmpty(10 * 1000)
+    sc.listenerBus.waitUntilEmpty()
     val accums = listener.getCompletedStageInfos.flatMap(_.accumulables.values)
     val isSet = accums.exists { a =>
       a.name == Some(PEAK_EXECUTION_MEMORY) && a.value.exists(_.asInstanceOf[Long] > 0L)
diff --git a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
index 6a30a1d32f8c6..92ed24408384f 100644
--- a/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ContextCleanerSuite.scala
@@ -97,7 +97,7 @@ abstract class ContextCleanerSuiteBase(val shuffleManager: Class[_] = classOf[So
   }
 
   /** Run GC and make sure it actually has run */
-  protected def runGC() {
+  protected def runGC(): Unit = {
     val weakRef = new WeakReference(new Object())
     val startTimeNs = System.nanoTime()
     System.gc() // Make a best effort to run the garbage collection. It *usually* runs GC.
@@ -406,7 +406,7 @@ class CleanerTester(
   sc.cleaner.get.attachListener(cleanerListener)
 
   /** Assert that all the stuff has been cleaned up */
-  def assertCleanup()(implicit waitTimeout: PatienceConfiguration.Timeout) {
+  def assertCleanup()(implicit waitTimeout: PatienceConfiguration.Timeout): Unit = {
     try {
       eventually(waitTimeout, interval(100.milliseconds)) {
         assert(isAllCleanedUp,
@@ -419,7 +419,7 @@ class CleanerTester(
   }
 
   /** Verify that RDDs, shuffles, etc. occupy resources */
-  private def preCleanupValidate() {
+  private def preCleanupValidate(): Unit = {
     assert(rddIds.nonEmpty || shuffleIds.nonEmpty || broadcastIds.nonEmpty ||
       checkpointIds.nonEmpty, "Nothing to cleanup")
 
@@ -465,7 +465,7 @@ class CleanerTester(
    * Verify that RDDs, shuffles, etc. do not occupy resources. Tests multiple times as there is
    * as there is not guarantee on how long it will take clean up the resources.
    */
-  private def postCleanupValidate() {
+  private def postCleanupValidate(): Unit = {
     // Verify the RDDs have been persisted and blocks are present
     rddIds.foreach { rddId =>
       assert(
diff --git a/core/src/test/scala/org/apache/spark/DebugFilesystem.scala b/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
index a5bdc95790722..1d3e28b39548f 100644
--- a/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
+++ b/core/src/test/scala/org/apache/spark/DebugFilesystem.scala
@@ -21,7 +21,6 @@ import java.io.{FileDescriptor, InputStream}
 import java.lang
 import java.nio.ByteBuffer
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.hadoop.fs._
diff --git a/core/src/test/scala/org/apache/spark/DriverSuite.scala b/core/src/test/scala/org/apache/spark/DriverSuite.scala
index 182f28c5cce54..f58777584d0ae 100644
--- a/core/src/test/scala/org/apache/spark/DriverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DriverSuite.scala
@@ -50,7 +50,7 @@ class DriverSuite extends SparkFunSuite with TimeLimits {
  * sys.exit() after finishing.
  */
 object DriverWithoutCleanup {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf
     val sc = new SparkContext(args(0), "DriverWithoutCleanup", conf)
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index 07fb323cfc355..460714f204a3a 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -64,7 +64,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
 
   private def post(event: SparkListenerEvent): Unit = {
     listenerBus.post(event)
-    listenerBus.waitUntilEmpty(1000)
+    listenerBus.waitUntilEmpty()
   }
 
   test("initialize dynamic allocation in SparkContext") {
diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
index 7f7f3db65d6ca..8844a0598ccb8 100644
--- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
@@ -40,7 +40,7 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi
   var transportContext: TransportContext = _
   var rpcHandler: ExternalBlockHandler = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val transportConf = SparkTransportConf.fromSparkConf(conf, "shuffle", numUsableCores = 2)
     rpcHandler = new ExternalBlockHandler(transportConf, null)
@@ -52,7 +52,7 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi
     conf.set(config.SHUFFLE_SERVICE_PORT, server.getPort)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     Utils.tryLogNonFatalError{
       server.close()
     }
diff --git a/core/src/test/scala/org/apache/spark/FailureSuite.scala b/core/src/test/scala/org/apache/spark/FailureSuite.scala
index 5f79b526a419b..8b75c3a0ba653 100644
--- a/core/src/test/scala/org/apache/spark/FailureSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FailureSuite.scala
@@ -31,7 +31,7 @@ object FailureSuiteState {
   var tasksRun = 0
   var tasksFailed = 0
 
-  def clear() {
+  def clear(): Unit = {
     synchronized {
       tasksRun = 0
       tasksFailed = 0
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 6651e38f7ed62..c7ea195cc95e3 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -42,12 +42,12 @@ import org.apache.spark.util.Utils
 class FileSuite extends SparkFunSuite with LocalSparkContext {
   var tempDir: File = _
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     super.beforeEach()
     tempDir = Utils.createTempDir()
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       Utils.deleteRecursively(tempDir)
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala
index e7eef8ec5150c..8433a6f52ac7a 100644
--- a/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/InternalAccumulatorSuite.scala
@@ -142,6 +142,7 @@ class InternalAccumulatorSuite extends SparkFunSuite with LocalSparkContext {
           sid,
           taskContext.partitionId(),
           taskContext.partitionId(),
+          taskContext.partitionId(),
           "simulated fetch failure")
       } else {
         iter
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index b533304287cf6..94ad8d8880027 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -40,7 +40,7 @@ import org.apache.spark.util.ThreadUtils
 class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAfter
   with LocalSparkContext {
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       resetSparkContext()
       JobCancellationSuite.taskStartedSemaphore.drainPermits()
@@ -127,7 +127,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     // Add a listener to release the semaphore once any tasks are launched.
     val sem = new Semaphore(0)
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
     })
@@ -157,7 +157,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     // Add a listener to release the semaphore once any tasks are launched.
     val sem = new Semaphore(0)
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
     })
@@ -192,7 +192,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     // Add a listener to release the semaphore once any tasks are launched.
     val sem = new Semaphore(0)
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
     })
@@ -225,7 +225,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     // Add a listener to release the semaphore once any tasks are launched.
     val sem = new Semaphore(0)
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
     })
@@ -264,7 +264,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     // Add a listener to release the semaphore once any tasks are launched.
     val sem = new Semaphore(0)
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
     })
@@ -301,7 +301,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
 
     sc = new SparkContext("local[2]", "test")
     sc.addSparkListener(new SparkListener {
-      override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem1.release()
       }
     })
@@ -391,7 +391,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     assert(executionOfInterruptibleCounter.get() < numElements)
  }
 
-  def testCount() {
+  def testCount(): Unit = {
     // Cancel before launching any tasks
     {
       val f = sc.parallelize(1 to 10000, 2).map { i => Thread.sleep(10); i }.countAsync()
@@ -405,7 +405,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
       // Add a listener to release the semaphore once any tasks are launched.
       val sem = new Semaphore(0)
       sc.addSparkListener(new SparkListener {
-        override def onTaskStart(taskStart: SparkListenerTaskStart) {
+        override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
           sem.release()
         }
       })
@@ -421,7 +421,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     }
   }
 
-  def testTake() {
+  def testTake(): Unit = {
     // Cancel before launching any tasks
     {
       val f = sc.parallelize(1 to 10000, 2).map { i => Thread.sleep(10); i }.takeAsync(5000)
@@ -435,7 +435,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
       // Add a listener to release the semaphore once any tasks are launched.
       val sem = new Semaphore(0)
       sc.addSparkListener(new SparkListener {
-        override def onTaskStart(taskStart: SparkListenerTaskStart) {
+        override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
           sem.release()
         }
       })
diff --git a/core/src/test/scala/org/apache/spark/JsonTestUtils.scala b/core/src/test/scala/org/apache/spark/JsonTestUtils.scala
index ba367cd476146..8aa7f3c7cb1bf 100644
--- a/core/src/test/scala/org/apache/spark/JsonTestUtils.scala
+++ b/core/src/test/scala/org/apache/spark/JsonTestUtils.scala
@@ -20,7 +20,7 @@ import org.json4s._
 import org.json4s.jackson.JsonMethods
 
 trait JsonTestUtils {
-  def assertValidDataInJson(validateJson: JValue, expectedJson: JValue) {
+  def assertValidDataInJson(validateJson: JValue, expectedJson: JValue): Unit = {
     val Diff(c, a, d) = validateJson.diff(expectedJson)
     val validatePretty = JsonMethods.pretty(validateJson)
     val expectedPretty = JsonMethods.pretty(expectedJson)
diff --git a/core/src/test/scala/org/apache/spark/LocalSparkContext.scala b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
index 05aaaa11451b4..d050ee2c45e7a 100644
--- a/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
+++ b/core/src/test/scala/org/apache/spark/LocalSparkContext.scala
@@ -27,12 +27,12 @@ trait LocalSparkContext extends BeforeAndAfterEach with BeforeAndAfterAll { self
 
   @transient var sc: SparkContext = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     InternalLoggerFactory.setDefaultFactory(Slf4JLoggerFactory.INSTANCE)
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       resetSparkContext()
     } finally {
@@ -48,7 +48,7 @@ trait LocalSparkContext extends BeforeAndAfterEach with BeforeAndAfterAll { self
 }
 
 object LocalSparkContext {
-  def stop(sc: SparkContext) {
+  def stop(sc: SparkContext): Unit = {
     if (sc != null) {
       sc.stop()
     }
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index d86975964b558..da2ba2165bb0c 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -64,14 +64,15 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
     val size10000 = MapStatus.decompressSize(MapStatus.compressSize(10000L))
     tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-        Array(1000L, 10000L)))
+        Array(1000L, 10000L), 5))
     tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
-        Array(10000L, 1000L)))
+        Array(10000L, 1000L), 6))
     val statuses = tracker.getMapSizesByExecutorId(10, 0)
     assert(statuses.toSet ===
-      Seq((BlockManagerId("a", "hostA", 1000), ArrayBuffer((ShuffleBlockId(10, 0, 0), size1000))),
-          (BlockManagerId("b", "hostB", 1000), ArrayBuffer((ShuffleBlockId(10, 1, 0), size10000))))
-        .toSet)
+      Seq((BlockManagerId("a", "hostA", 1000),
+        ArrayBuffer((ShuffleBlockId(10, 5, 0), size1000, 0))),
+          (BlockManagerId("b", "hostB", 1000),
+            ArrayBuffer((ShuffleBlockId(10, 6, 0), size10000, 1)))).toSet)
     assert(0 == tracker.getNumCachedSerializedBroadcast)
     tracker.stop()
     rpcEnv.shutdown()
@@ -86,9 +87,9 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val compressedSize1000 = MapStatus.compressSize(1000L)
     val compressedSize10000 = MapStatus.compressSize(10000L)
     tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-      Array(compressedSize1000, compressedSize10000)))
+      Array(compressedSize1000, compressedSize10000), 5))
     tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
-      Array(compressedSize10000, compressedSize1000)))
+      Array(compressedSize10000, compressedSize1000), 6))
     assert(tracker.containsShuffle(10))
     assert(tracker.getMapSizesByExecutorId(10, 0).nonEmpty)
     assert(0 == tracker.getNumCachedSerializedBroadcast)
@@ -109,9 +110,9 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val compressedSize1000 = MapStatus.compressSize(1000L)
     val compressedSize10000 = MapStatus.compressSize(10000L)
     tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-        Array(compressedSize1000, compressedSize1000, compressedSize1000)))
+        Array(compressedSize1000, compressedSize1000, compressedSize1000), 5))
     tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
-        Array(compressedSize10000, compressedSize1000, compressedSize1000)))
+        Array(compressedSize10000, compressedSize1000, compressedSize1000), 6))
 
     assert(0 == tracker.getNumCachedSerializedBroadcast)
     // As if we had two simultaneous fetch failures
@@ -147,10 +148,11 @@ class MapOutputTrackerSuite extends SparkFunSuite {
 
     val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
     masterTracker.registerMapOutput(10, 0, MapStatus(
-      BlockManagerId("a", "hostA", 1000), Array(1000L)))
+      BlockManagerId("a", "hostA", 1000), Array(1000L), 5))
     slaveTracker.updateEpoch(masterTracker.getEpoch)
     assert(slaveTracker.getMapSizesByExecutorId(10, 0).toSeq ===
-      Seq((BlockManagerId("a", "hostA", 1000), ArrayBuffer((ShuffleBlockId(10, 0, 0), size1000)))))
+      Seq((BlockManagerId("a", "hostA", 1000),
+        ArrayBuffer((ShuffleBlockId(10, 5, 0), size1000, 0)))))
     assert(0 == masterTracker.getNumCachedSerializedBroadcast)
 
     val masterTrackerEpochBeforeLossOfMapOutput = masterTracker.getEpoch
@@ -184,7 +186,7 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     // Message size should be ~123B, and no exception should be thrown
     masterTracker.registerShuffle(10, 1)
     masterTracker.registerMapOutput(10, 0, MapStatus(
-      BlockManagerId("88", "mph", 1000), Array.fill[Long](10)(0)))
+      BlockManagerId("88", "mph", 1000), Array.fill[Long](10)(0), 5))
     val senderAddress = RpcAddress("localhost", 12345)
     val rpcCallContext = mock(classOf[RpcCallContext])
     when(rpcCallContext.senderAddress).thenReturn(senderAddress)
@@ -218,11 +220,11 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     // on hostB with output size 3
     tracker.registerShuffle(10, 3)
     tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-        Array(2L)))
+        Array(2L), 5))
     tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("a", "hostA", 1000),
-        Array(2L)))
+        Array(2L), 6))
     tracker.registerMapOutput(10, 2, MapStatus(BlockManagerId("b", "hostB", 1000),
-        Array(3L)))
+        Array(3L), 7))
 
     // When the threshold is 50%, only host A should be returned as a preferred location
     // as it has 4 out of 7 bytes of output.
@@ -262,7 +264,7 @@ class MapOutputTrackerSuite extends SparkFunSuite {
       masterTracker.registerShuffle(20, 100)
       (0 until 100).foreach { i =>
         masterTracker.registerMapOutput(20, i, new CompressedMapStatus(
-          BlockManagerId("999", "mps", 1000), Array.fill[Long](4000000)(0)))
+          BlockManagerId("999", "mps", 1000), Array.fill[Long](4000000)(0), 5))
       }
       val senderAddress = RpcAddress("localhost", 12345)
       val rpcCallContext = mock(classOf[RpcCallContext])
@@ -311,16 +313,18 @@ class MapOutputTrackerSuite extends SparkFunSuite {
     val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L))
     val size10000 = MapStatus.decompressSize(MapStatus.compressSize(10000L))
     tracker.registerMapOutput(10, 0, MapStatus(BlockManagerId("a", "hostA", 1000),
-      Array(size0, size1000, size0, size10000)))
+      Array(size0, size1000, size0, size10000), 5))
     tracker.registerMapOutput(10, 1, MapStatus(BlockManagerId("b", "hostB", 1000),
-      Array(size10000, size0, size1000, size0)))
+      Array(size10000, size0, size1000, size0), 6))
     assert(tracker.containsShuffle(10))
-    assert(tracker.getMapSizesByExecutorId(10, 0, 4).toSeq ===
+    assert(tracker.getMapSizesByExecutorId(10, 0, 4, false).toSeq ===
         Seq(
           (BlockManagerId("a", "hostA", 1000),
-              Seq((ShuffleBlockId(10, 0, 1), size1000), (ShuffleBlockId(10, 0, 3), size10000))),
+              Seq((ShuffleBlockId(10, 5, 1), size1000, 0),
+                (ShuffleBlockId(10, 5, 3), size10000, 0))),
           (BlockManagerId("b", "hostB", 1000),
-              Seq((ShuffleBlockId(10, 1, 0), size10000), (ShuffleBlockId(10, 1, 2), size1000)))
+              Seq((ShuffleBlockId(10, 6, 0), size10000, 1),
+                (ShuffleBlockId(10, 6, 2), size1000, 1)))
         )
     )
 
diff --git a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
index 1aa1c421d792e..bdeb631878350 100644
--- a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
+++ b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
@@ -43,12 +43,12 @@ trait SharedSparkContext extends BeforeAndAfterAll with BeforeAndAfterEach { sel
     }
   }
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     initializeContext()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       LocalSparkContext.stop(_sc)
       _sc = null
diff --git a/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
index 73638d9b131ea..378a361845139 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleNettySuite.scala
@@ -23,7 +23,7 @@ class ShuffleNettySuite extends ShuffleSuite with BeforeAndAfterAll {
 
   // This test suite should run all tests in ShuffleSuite with Netty shuffle mode.
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     conf.set("spark.shuffle.blockTransferService", "netty")
   }
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index 923c9c90447fd..c652f879cc8f9 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.scheduler.{MapStatus, MyRDD, SparkListener, SparkListene
 import org.apache.spark.serializer.KryoSerializer
 import org.apache.spark.shuffle.ShuffleWriter
 import org.apache.spark.storage.{ShuffleBlockId, ShuffleDataBlockId, ShuffleIndexBlockId}
-import org.apache.spark.util.{MutablePair, Utils}
+import org.apache.spark.util.MutablePair
 
 abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkContext {
 
@@ -360,7 +360,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC
     val metricsSystem = sc.env.metricsSystem
     val shuffleMapRdd = new MyRDD(sc, 1, Nil)
     val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(1))
-    val shuffleHandle = manager.registerShuffle(0, 1, shuffleDep)
+    val shuffleHandle = manager.registerShuffle(0, shuffleDep)
     mapTrackerMaster.registerShuffle(0, 1)
 
     // first attempt -- its successful
@@ -487,7 +487,7 @@ object ShuffleSuite {
     @volatile var bytesWritten: Long = 0
     @volatile var bytesRead: Long = 0
     val listener = new SparkListener {
-      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
         recordsWritten += taskEnd.taskMetrics.shuffleWriteMetrics.recordsWritten
         bytesWritten += taskEnd.taskMetrics.shuffleWriteMetrics.bytesWritten
         recordsRead += taskEnd.taskMetrics.shuffleReadMetrics.recordsRead
@@ -498,7 +498,7 @@ object ShuffleSuite {
 
     job
 
-    sc.listenerBus.waitUntilEmpty(500)
+    sc.listenerBus.waitUntilEmpty()
     AggregatedShuffleMetrics(recordsWritten, recordsRead, bytesWritten, bytesRead)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
index 1aceda498d7c7..1a563621a5179 100644
--- a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala
@@ -37,7 +37,7 @@ class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll {
 
   private var tempDir: File = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     // Once 'spark.local.dir' is set, it is cached. Unless this is manually cleared
     // before/after a test, it could return the same directory even if this property
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 9f00131c8dc20..0ac6ba2d76c6f 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -463,7 +463,7 @@ class Class2 {}
 class Class3 {}
 
 class CustomRegistrator extends KryoRegistrator {
-  def registerClasses(kryo: Kryo) {
+  def registerClasses(kryo: Kryo): Unit = {
     kryo.register(classOf[Class2])
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
index 536b4aec75623..6271ce507fddb 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala
@@ -63,7 +63,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache()
     assert(sc.getRDDStorageInfo.length === 0)
     rdd.collect()
-    sc.listenerBus.waitUntilEmpty(10000)
+    sc.listenerBus.waitUntilEmpty()
     eventually(timeout(10.seconds), interval(100.milliseconds)) {
       assert(sc.getRDDStorageInfo.length === 1)
     }
@@ -82,7 +82,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext {
 package object testPackage extends Assertions {
   private val CALL_SITE_REGEX = "(.+) at (.+):([0-9]+)".r
 
-  def runCallSiteTest(sc: SparkContext) {
+  def runCallSiteTest(sc: SparkContext): Unit = {
     val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2)
     val rddCreationSite = rdd.getCreationSite
     val curCallSite = sc.getCallSite().shortForm // note: 2 lines after definition of "rdd"
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 786f55c96a3e8..4fd862888dcc6 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -450,7 +450,9 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
     sc.setLocalProperty("testProperty", "testValue")
     var result = "unset";
-    val thread = new Thread() { override def run() = {result = sc.getLocalProperty("testProperty")}}
+    val thread = new Thread() {
+      override def run(): Unit = {result = sc.getLocalProperty("testProperty")}
+    }
     thread.start()
     thread.join()
     sc.stop()
@@ -461,10 +463,10 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
     var result = "unset";
     val thread1 = new Thread() {
-      override def run() = {sc.setLocalProperty("testProperty", "testValue")}}
+      override def run(): Unit = {sc.setLocalProperty("testProperty", "testValue")}}
     // testProperty should be unset and thus return null
     val thread2 = new Thread() {
-      override def run() = {result = sc.getLocalProperty("testProperty")}}
+      override def run(): Unit = {result = sc.getLocalProperty("testProperty")}}
     thread1.start()
     thread1.join()
     thread2.start()
@@ -705,7 +707,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       if (context.stageAttemptNumber == 0) {
         if (context.partitionId == 0) {
           // Make the first task in the first stage attempt fail.
-          throw new FetchFailedException(SparkEnv.get.blockManager.blockManagerId, 0, 0, 0,
+          throw new FetchFailedException(SparkEnv.get.blockManager.blockManagerId, 0, 0L, 0, 0,
             new java.io.IOException("fake"))
         } else {
           // Make the second task in the first stage attempt sleep to generate a zombie task
@@ -716,7 +718,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       }
       x
     }.collect()
-    sc.listenerBus.waitUntilEmpty(10000)
+    sc.listenerBus.waitUntilEmpty()
     // As executors will send the metrics of running tasks via heartbeat, we can use this to check
     // whether there is any running task.
     eventually(timeout(10.seconds)) {
@@ -761,7 +763,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       sc = new SparkContext(conf)
 
       // Ensure all executors has started
-      TestUtils.waitUntilExecutorsUp(sc, 1, 10000)
+      TestUtils.waitUntilExecutorsUp(sc, 1, 60000)
       assert(sc.resources.size === 1)
       assert(sc.resources.get(GPU).get.addresses === Array("5", "6"))
       assert(sc.resources.get(GPU).get.name === "gpu")
@@ -790,7 +792,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       sc = new SparkContext(conf)
 
       // Ensure all executors has started
-      TestUtils.waitUntilExecutorsUp(sc, 1, 10000)
+      TestUtils.waitUntilExecutorsUp(sc, 1, 60000)
       // driver gpu resources file should take precedence over the script
       assert(sc.resources.size === 1)
       assert(sc.resources.get(GPU).get.addresses === Array("0", "1", "8"))
diff --git a/core/src/test/scala/org/apache/spark/ThreadingSuite.scala b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
index 5cf9c087e1dcb..bb04d0d263253 100644
--- a/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
@@ -29,7 +29,7 @@ object ThreadingSuiteState {
   val runningThreads = new AtomicInteger
   val failed = new AtomicBoolean
 
-  def clear() {
+  def clear(): Unit = {
     runningThreads.set(0)
     failed.set(false)
   }
@@ -44,7 +44,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     @volatile var answer1: Int = 0
     @volatile var answer2: Int = 0
     new Thread {
-      override def run() {
+      override def run(): Unit = {
         answer1 = nums.reduce(_ + _)
         answer2 = nums.first()    // This will run "locally" in the current thread
         sem.release()
@@ -62,7 +62,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     @volatile var ok = true
     for (i <- 0 until 10) {
       new Thread {
-        override def run() {
+        override def run(): Unit = {
           val answer1 = nums.reduce(_ + _)
           if (answer1 != 55) {
             printf("In thread %d: answer1 was %d\n", i, answer1)
@@ -90,7 +90,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     @volatile var ok = true
     for (i <- 0 until 10) {
       new Thread {
-        override def run() {
+        override def run(): Unit = {
           val answer1 = nums.reduce(_ + _)
           if (answer1 != 55) {
             printf("In thread %d: answer1 was %d\n", i, answer1)
@@ -121,7 +121,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     var throwable: Option[Throwable] = None
     for (i <- 0 until 2) {
       new Thread {
-        override def run() {
+        override def run(): Unit = {
           try {
             val ans = nums.map(number => {
               val running = ThreadingSuiteState.runningThreads
@@ -161,7 +161,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     var throwable: Option[Throwable] = None
     val threads = (1 to 5).map { i =>
       new Thread() {
-        override def run() {
+        override def run(): Unit = {
           try {
             sc.setLocalProperty("test", i.toString)
             assert(sc.getLocalProperty("test") === i.toString)
@@ -189,7 +189,7 @@ class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
     var throwable: Option[Throwable] = None
     val threads = (1 to 5).map { i =>
       new Thread() {
-        override def run() {
+        override def run(): Unit = {
           try {
             assert(sc.getLocalProperty("test") === "parent")
             sc.setLocalProperty("test", i.toString)
diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
index 73f9d0e2bc0e1..022fcbb25b0af 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
@@ -141,12 +141,14 @@ private[spark] class Benchmark(
     val minIters = if (overrideNumIters != 0) overrideNumIters else minNumIters
     val minDuration = if (overrideNumIters != 0) 0 else minTime.toNanos
     val runTimes = ArrayBuffer[Long]()
+    var totalTime = 0L
     var i = 0
-    while (i < minIters || runTimes.sum < minDuration) {
+    while (i < minIters || totalTime < minDuration) {
       val timer = new Benchmark.Timer(i)
       f(timer)
       val runTime = timer.totalTime()
       runTimes += runTime
+      totalTime += runTime
 
       if (outputPerIteration) {
         // scalastyle:off
diff --git a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala
index a6666db4e95c3..55e34b32fe0d4 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/BenchmarkBase.scala
@@ -21,6 +21,7 @@ import java.io.{File, FileOutputStream, OutputStream}
 
 /**
  * A base class for generate benchmark results to a file.
+ * For JDK9+, JDK major version number is added to the file names to distingush the results.
  */
 abstract class BenchmarkBase {
   var output: Option[OutputStream] = None
@@ -43,7 +44,9 @@ abstract class BenchmarkBase {
   def main(args: Array[String]): Unit = {
     val regenerateBenchmarkFiles: Boolean = System.getenv("SPARK_GENERATE_BENCHMARK_FILES") == "1"
     if (regenerateBenchmarkFiles) {
-      val resultFileName = s"${this.getClass.getSimpleName.replace("$", "")}-results.txt"
+      val version = System.getProperty("java.version").split("\\D+")(0).toInt
+      val jdkString = if (version > 8) s"-jdk$version" else ""
+      val resultFileName = s"${this.getClass.getSimpleName.replace("$", "")}$jdkString-results.txt"
       val file = new File(s"benchmarks/$resultFileName")
       if (!file.exists()) {
         file.createNewFile()
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 66b2f487dc1cb..a6776ee077894 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -194,11 +194,12 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
    * In between each step, this test verifies that the broadcast blocks are present only on the
    * expected nodes.
    */
-  private def testUnpersistTorrentBroadcast(distributed: Boolean, removeFromDriver: Boolean) {
+  private def testUnpersistTorrentBroadcast(distributed: Boolean,
+      removeFromDriver: Boolean): Unit = {
     val numSlaves = if (distributed) 2 else 0
 
     // Verify that blocks are persisted only on the driver
-    def afterCreation(broadcastId: Long, bmm: BlockManagerMaster) {
+    def afterCreation(broadcastId: Long, bmm: BlockManagerMaster): Unit = {
       var blockId = BroadcastBlockId(broadcastId)
       var statuses = bmm.getBlockStatus(blockId, askSlaves = true)
       assert(statuses.size === 1)
@@ -209,7 +210,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
     }
 
     // Verify that blocks are persisted in both the executors and the driver
-    def afterUsingBroadcast(broadcastId: Long, bmm: BlockManagerMaster) {
+    def afterUsingBroadcast(broadcastId: Long, bmm: BlockManagerMaster): Unit = {
       var blockId = BroadcastBlockId(broadcastId)
       val statuses = bmm.getBlockStatus(blockId, askSlaves = true)
       assert(statuses.size === numSlaves + 1)
@@ -220,7 +221,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
 
     // Verify that blocks are unpersisted on all executors, and on all nodes if removeFromDriver
     // is true.
-    def afterUnpersist(broadcastId: Long, bmm: BlockManagerMaster) {
+    def afterUnpersist(broadcastId: Long, bmm: BlockManagerMaster): Unit = {
       var blockId = BroadcastBlockId(broadcastId)
       var expectedNumBlocks = if (removeFromDriver) 0 else 1
       var statuses = bmm.getBlockStatus(blockId, askSlaves = true)
@@ -251,7 +252,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
       afterCreation: (Long, BlockManagerMaster) => Unit,
       afterUsingBroadcast: (Long, BlockManagerMaster) => Unit,
       afterUnpersist: (Long, BlockManagerMaster) => Unit,
-      removeFromDriver: Boolean) {
+      removeFromDriver: Boolean): Unit = {
 
     sc = if (distributed) {
       val _sc =
@@ -307,7 +308,7 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
 
 package object testPackage extends Assertions {
 
-  def runCallSiteTest(sc: SparkContext) {
+  def runCallSiteTest(sc: SparkContext): Unit = {
     val broadcast = sc.broadcast(Array(1, 2, 3, 4))
     broadcast.destroy(blocking = true)
     val thrown = intercept[SparkException] { broadcast.value }
diff --git a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
index 9cfb8a647ad89..6914714dce6eb 100644
--- a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
@@ -46,7 +46,7 @@ class ExternalShuffleServiceDbSuite extends SparkFunSuite {
   var blockHandler: ExternalBlockHandler = _
   var blockResolver: ExternalShuffleBlockResolver = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     sparkConf = new SparkConf()
     sparkConf.set("spark.shuffle.service.enabled", "true")
@@ -63,7 +63,7 @@ class ExternalShuffleServiceDbSuite extends SparkFunSuite {
     registerExecutor()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       dataContext.cleanup()
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index ad402c0e905ae..eeccf56cbf02e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -89,7 +89,7 @@ class JsonProtocolSuite extends SparkFunSuite with JsonTestUtils {
     assertValidDataInJson(output, JsonMethods.parse(JsonConstants.workerStateJsonStr))
   }
 
-  def assertValidJson(json: JValue) {
+  def assertValidJson(json: JValue): Unit = {
     try {
       JsonMethods.parse(JsonMethods.compact(json))
     } catch {
diff --git a/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala b/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala
index cbdf1755b0c5b..84fc16979925b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala
@@ -29,9 +29,6 @@ import org.apache.spark.util.SparkConfWithEnv
 
 class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {
 
-  /** Length of time to wait while draining listener events. */
-  private val WAIT_TIMEOUT_MILLIS = 10000
-
   test("verify that correct log urls get propagated from workers") {
     sc = new SparkContext("local-cluster[2,1,1024]", "test")
 
@@ -41,7 +38,7 @@ class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {
     // Trigger a job so that executors get added
     sc.parallelize(1 to 100, 4).map(_.toString).count()
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.addedExecutorInfos.values.foreach { info =>
       assert(info.logUrlMap.nonEmpty)
       // Browse to each URL to check that it's valid
@@ -61,7 +58,7 @@ class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {
     // Trigger a job so that executors get added
     sc.parallelize(1 to 100, 4).map(_.toString).count()
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
     assert(listeners.size === 1)
     val listener = listeners(0)
@@ -77,7 +74,7 @@ class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {
 private[spark] class SaveExecutorInfo extends SparkListener {
   val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()
 
-  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
+  override def onExecutorAdded(executor: SparkListenerExecutorAdded): Unit = {
     addedExecutorInfos(executor.executorId) = executor.executorInfo
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
index ef947eb074647..d04d9b6dcb2be 100644
--- a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
@@ -58,7 +58,7 @@ class RPackageUtilsSuite
   /** Simple PrintStream that reads data into a buffer */
   private class BufferPrintStream extends PrintStream(noOpOutputStream) {
     // scalastyle:off println
-    override def println(line: String) {
+    override def println(line: String): Unit = {
     // scalastyle:on println
       lineBuffer += line
     }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 385f549aa1ad9..5b81671edb149 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -57,7 +57,7 @@ trait TestPrematureExit {
   private class BufferPrintStream extends PrintStream(noOpOutputStream) {
     var lineBuffer = ArrayBuffer[String]()
     // scalastyle:off println
-    override def println(line: String) {
+    override def println(line: String): Unit = {
       lineBuffer += line
     }
     // scalastyle:on println
@@ -121,7 +121,7 @@ class SparkSubmitSuite
 
   private val submit = new SparkSubmit()
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     super.beforeEach()
   }
 
@@ -600,7 +600,7 @@ class SparkSubmitSuite
   }
 
   // TODO(SPARK-9603): Building a package is flaky on Jenkins Maven builds.
-  // See https://gist.github.com/shivaram/3a2fecce60768a603dac for a error log
+  // See https://gist.github.com/shivaram/3a2fecce60768a603dac for an error log
   ignore("correctly builds R packages included in a jar with --packages") {
     assume(RUtils.isRInstalled, "R isn't installed on this machine.")
     assume(RUtils.isSparkRInstalled, "SparkR is not installed in this build.")
@@ -1365,7 +1365,7 @@ object SparkSubmitSuite extends SparkFunSuite with TimeLimits {
 }
 
 object JarCreationTest extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     val sc = new SparkContext(conf)
@@ -1389,7 +1389,7 @@ object JarCreationTest extends Logging {
 }
 
 object SimpleApplicationTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     val sc = new SparkContext(conf)
@@ -1415,7 +1415,7 @@ object SimpleApplicationTest {
 }
 
 object UserClasspathFirstTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val ccl = Thread.currentThread().getContextClassLoader()
     val resource = ccl.getResourceAsStream("test.resource")
     val bytes = ByteStreams.toByteArray(resource)
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index 8e1a519e187ce..31e6c730eadc0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -44,13 +44,13 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
   private class BufferPrintStream extends PrintStream(noOpOutputStream) {
     var lineBuffer = ArrayBuffer[String]()
     // scalastyle:off println
-    override def println(line: String) {
+    override def println(line: String): Unit = {
       lineBuffer += line
     }
     // scalastyle:on println
   }
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     // We don't want to write logs during testing
     SparkSubmitUtils.printStream = new BufferPrintStream
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/TestExecutor.scala b/core/src/test/scala/org/apache/spark/deploy/client/TestExecutor.scala
index a98b1fa8f83a1..1dce49d1f9d5a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/TestExecutor.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/TestExecutor.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy.client
 
 private[spark] object TestExecutor {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // scalastyle:off println
     println("Hello world!")
     // scalastyle:on println
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
index 1148446c9faa1..48bd088d07ff9 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
@@ -28,7 +28,7 @@ import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.scalatest.Matchers
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 30261dde678f1..1d465ba37364b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -86,7 +86,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     }
   }
 
-  private def testAppLogParsing(inMemory: Boolean) {
+  private def testAppLogParsing(inMemory: Boolean): Unit = {
     val clock = new ManualClock(12345678)
     val conf = createTestConf(inMemory = inMemory)
     val provider = new FsHistoryProvider(conf, clock)
@@ -1254,7 +1254,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
   private def writeFile(file: File, codec: Option[CompressionCodec],
     events: SparkListenerEvent*) = {
     val fstream = new FileOutputStream(file)
-    val cstream = codec.map(_.compressedOutputStream(fstream)).getOrElse(fstream)
+    val cstream = codec.map(_.compressedContinuousOutputStream(fstream)).getOrElse(fstream)
     val bstream = new BufferedOutputStream(cstream)
     EventLoggingListener.initEventLog(bstream, false, null)
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index dbc1938ed469a..17c1528b67588 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -26,7 +26,6 @@ import javax.servlet.http.{HttpServletRequest, HttpServletRequestWrapper, HttpSe
 import scala.collection.JavaConverters._
 import scala.concurrent.duration._
 
-import com.gargoylesoftware.htmlunit.BrowserVersion
 import com.google.common.io.{ByteStreams, Files}
 import org.apache.commons.io.{FileUtils, IOUtils}
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
@@ -40,8 +39,8 @@ import org.openqa.selenium.WebDriver
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
 import org.scalatest.{BeforeAndAfter, Matchers}
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar
-import org.scalatest.selenium.WebBrowser
+import org.scalatestplus.mockito.MockitoSugar
+import org.scalatestplus.selenium.WebBrowser
 
 import org.apache.spark._
 import org.apache.spark.internal.config._
@@ -94,6 +93,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     server = new HistoryServer(conf, provider, securityManager, 18080)
     server.initialize()
     server.bind()
+    provider.start()
     port = server.boundPort
   }
 
@@ -364,8 +364,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     contextHandler.addServlet(holder, "/")
     server.attachHandler(contextHandler)
 
-    implicit val webDriver: WebDriver =
-      new HtmlUnitDriver(BrowserVersion.INTERNET_EXPLORER_11, true)
+    implicit val webDriver: WebDriver = new HtmlUnitDriver(true)
 
     try {
       val url = s"http://localhost:$port"
@@ -451,6 +450,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     server = new HistoryServer(myConf, provider, securityManager, 0)
     server.initialize()
     server.bind()
+    provider.start()
     val port = server.boundPort
     val metrics = server.cacheMetrics
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
index f4558aa3eb893..e2d7facdd77e0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
@@ -47,12 +47,12 @@ class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {
   when(master.self).thenReturn(masterEndpointRef)
   val masterWebUI = new MasterWebUI(master, 0)
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     masterWebUI.bind()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       masterWebUI.stop()
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
index 89b8bb4ff7d03..d5312845a3b50 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
@@ -42,7 +42,7 @@ class StandaloneRestSubmitSuite extends SparkFunSuite with BeforeAndAfterEach {
   private var rpcEnv: Option[RpcEnv] = None
   private var server: Option[RestSubmissionServer] = None
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       rpcEnv.foreach(_.shutdown())
       server.foreach(_.stop())
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
index 70174f7ff939a..275bca3459855 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManagerSuite.scala
@@ -17,11 +17,17 @@
 
 package org.apache.spark.deploy.security
 
+import java.security.PrivilegedExceptionAction
+
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.security.Credentials
+import org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION
+import org.apache.hadoop.minikdc.MiniKdc
+import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.security.HadoopDelegationTokenProvider
+import org.apache.spark.util.Utils
 
 private class ExceptionThrowingDelegationTokenProvider extends HadoopDelegationTokenProvider {
   ExceptionThrowingDelegationTokenProvider.constructed = true
@@ -69,4 +75,48 @@ class HadoopDelegationTokenManagerSuite extends SparkFunSuite {
     assert(!manager.isProviderLoaded("hadoopfs"))
     assert(manager.isProviderLoaded("hbase"))
   }
+
+  test("SPARK-29082: do not fail if current user does not have credentials") {
+    // SparkHadoopUtil overrides the UGI configuration during initialization. That normally
+    // happens early in the Spark application, but here it may affect the test depending on
+    // how it's run, so force its initialization.
+    SparkHadoopUtil.get
+
+    var kdc: MiniKdc = null
+    try {
+      // UserGroupInformation.setConfiguration needs default kerberos realm which can be set in
+      // krb5.conf. MiniKdc sets "java.security.krb5.conf" in start and removes it when stop called.
+      val kdcDir = Utils.createTempDir()
+      val kdcConf = MiniKdc.createConf()
+      kdc = new MiniKdc(kdcConf, kdcDir)
+      kdc.start()
+
+      val krbConf = new Configuration()
+      krbConf.set(HADOOP_SECURITY_AUTHENTICATION, "kerberos")
+
+      UserGroupInformation.setConfiguration(krbConf)
+      val manager = new HadoopDelegationTokenManager(new SparkConf(false), krbConf, null)
+      val testImpl = new PrivilegedExceptionAction[Unit] {
+        override def run(): Unit = {
+          assert(UserGroupInformation.isSecurityEnabled())
+          val creds = new Credentials()
+          manager.obtainDelegationTokens(creds)
+          assert(creds.numberOfTokens() === 0)
+          assert(creds.numberOfSecretKeys() === 0)
+        }
+      }
+
+      val realUser = UserGroupInformation.createUserForTesting("realUser", Array.empty)
+      realUser.doAs(testImpl)
+
+      val proxyUser = UserGroupInformation.createProxyUserForTesting("proxyUser", realUser,
+        Array.empty)
+      proxyUser.doAs(testImpl)
+    } finally {
+      if (kdc != null) {
+        kdc.stop()
+      }
+      UserGroupInformation.reset()
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala
index 1f19884bc24d3..44f38e7043dcd 100644
--- a/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProviderSuite.scala
@@ -22,14 +22,15 @@ import org.apache.hadoop.fs.Path
 import org.scalatest.Matchers
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.internal.config.STAGING_DIR
+import org.apache.spark.internal.config.{STAGING_DIR, SUBMIT_DEPLOY_MODE}
 
 class HadoopFSDelegationTokenProviderSuite extends SparkFunSuite with Matchers {
   test("hadoopFSsToAccess should return defaultFS even if not configured") {
     val sparkConf = new SparkConf()
     val defaultFS = "hdfs://localhost:8020"
     val statingDir = "hdfs://localhost:8021"
-    sparkConf.set("spark.master", "yarn-client")
+    sparkConf.setMaster("yarn")
+    sparkConf.set(SUBMIT_DEPLOY_MODE, "client")
     sparkConf.set(STAGING_DIR, statingDir)
     val hadoopConf = new Configuration()
     hadoopConf.set("fs.defaultFS", defaultFS)
diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
index 64d99a59b9192..c34263dd17128 100644
--- a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.executor
 
-import java.io.File
 import java.net.URL
 import java.nio.ByteBuffer
 import java.util.Properties
@@ -30,7 +29,7 @@ import org.json4s.JsonAST.{JArray, JObject}
 import org.json4s.JsonDSL._
 import org.mockito.Mockito.when
 import org.scalatest.concurrent.Eventually.{eventually, timeout}
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.TestUtils._
@@ -136,7 +135,7 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
     // not enough gpu's on the executor
     withTempDir { tmpDir =>
       val gpuArgs = ResourceAllocation(EXECUTOR_GPU_ID, Seq("0"))
-            val ja = Extraction.decompose(Seq(gpuArgs))
+      val ja = Extraction.decompose(Seq(gpuArgs))
       val f1 = createTempJsonFile(tmpDir, "resources", ja)
 
       var error = intercept[IllegalArgumentException] {
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index ac7e4b51ebc2b..621151a39eea6 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -35,7 +35,7 @@ import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
@@ -56,7 +56,7 @@ import org.apache.spark.util.{LongAccumulator, UninterruptibleThread}
 class ExecutorSuite extends SparkFunSuite
     with LocalSparkContext with MockitoSugar with Eventually with PrivateMethodTester {
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     // Unset any latches after each test; each test that needs them initializes new ones.
     ExecutorSuiteHelper.latches = null
     super.afterEach()
@@ -528,7 +528,8 @@ class FetchFailureThrowingRDD(sc: SparkContext) extends RDD[Int](sc, Nil) {
         throw new FetchFailedException(
           bmAddress = BlockManagerId("1", "hostA", 1234),
           shuffleId = 0,
-          mapId = 0,
+          mapId = 0L,
+          mapIndex = 0,
           reduceId = 0,
           message = "fake fetch failure"
         )
diff --git a/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala b/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala
index 9ed1497db5e1d..9836697e1647c 100644
--- a/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ProcfsMetricsGetterSuite.scala
@@ -22,9 +22,9 @@ import org.apache.spark.SparkFunSuite
 
 class ProcfsMetricsGetterSuite extends SparkFunSuite {
 
-  val p = new ProcfsMetricsGetter(getTestResourcePath("ProcfsMetrics"))
 
   test("testGetProcessInfo") {
+    val p = new ProcfsMetricsGetter(getTestResourcePath("ProcfsMetrics"))
     var r = ProcfsMetrics(0, 0, 0, 0, 0, 0)
     r = p.addProcfsMetricsFromOneProcess(r, 26109)
     assert(r.jvmVmemTotal == 4769947648L)
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
index 576ca1613f75e..9a21ea6dafcac 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
@@ -25,7 +25,6 @@ import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils
 
 /**
  * Tests the correctness of
@@ -35,13 +34,13 @@ import org.apache.spark.util.Utils
 class WholeTextFileInputFormatSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
   private var sc: SparkContext = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val conf = new SparkConf()
     sc = new SparkContext("local", "test", conf)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       sc.stop()
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
index 47552916adb22..fab7aea6c47aa 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -40,7 +40,7 @@ class WholeTextFileRecordReaderSuite extends SparkFunSuite with BeforeAndAfterAl
   private var sc: SparkContext = _
   private var factory: CompressionCodecFactory = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     // Hadoop's FileSystem caching does not use the Configuration as part of its cache key, which
     // can cause Filesystem.get(Configuration) to return a cached instance created with a different
     // configuration than the one passed to get() (see HADOOP-8490 for more details). This caused
@@ -59,7 +59,7 @@ class WholeTextFileRecordReaderSuite extends SparkFunSuite with BeforeAndAfterAl
     factory = new CompressionCodecFactory(sc.hadoopConfiguration)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       sc.stop()
     } finally {
diff --git a/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferFileRegionSuite.scala b/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferFileRegionSuite.scala
index a6b0654204f34..551c0f1a73241 100644
--- a/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferFileRegionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferFileRegionSuite.scala
@@ -23,7 +23,7 @@ import scala.util.Random
 
 import org.mockito.Mockito.when
 import org.scalatest.BeforeAndAfterEach
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
 import org.apache.spark.internal.config
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
index 7b40e3e58216d..4b27396e6ae05 100644
--- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 class CompressionCodecSuite extends SparkFunSuite {
   val conf = new SparkConf(false)
 
-  def testCodec(codec: CompressionCodec) {
+  def testCodec(codec: CompressionCodec): Unit = {
     // Write 1000 integers to the output stream, compressed.
     val outputStream = new ByteArrayOutputStream()
     val out = codec.compressedOutputStream(outputStream)
diff --git a/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala b/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala
index c26945fa5fa31..60f67699f81be 100644
--- a/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala
+++ b/core/src/test/scala/org/apache/spark/memory/TestMemoryManager.scala
@@ -17,60 +17,110 @@
 
 package org.apache.spark.memory
 
+import javax.annotation.concurrent.GuardedBy
+
+import scala.collection.mutable
+
 import org.apache.spark.SparkConf
 import org.apache.spark.storage.BlockId
 
 class TestMemoryManager(conf: SparkConf)
   extends MemoryManager(conf, numCores = 1, Long.MaxValue, Long.MaxValue) {
 
+  @GuardedBy("this")
+  private var consequentOOM = 0
+  @GuardedBy("this")
+  private var available = Long.MaxValue
+  @GuardedBy("this")
+  private val memoryForTask = mutable.HashMap[Long, Long]().withDefaultValue(0L)
+
   override private[memory] def acquireExecutionMemory(
       numBytes: Long,
       taskAttemptId: Long,
-      memoryMode: MemoryMode): Long = {
-    if (consequentOOM > 0) {
-      consequentOOM -= 1
-      0
-    } else if (available >= numBytes) {
-      available -= numBytes
-      numBytes
-    } else {
-      val grant = available
-      available = 0
-      grant
+      memoryMode: MemoryMode): Long = synchronized {
+    require(numBytes >= 0)
+    val acquired = {
+      if (consequentOOM > 0) {
+        consequentOOM -= 1
+        0
+      } else if (available >= numBytes) {
+        available -= numBytes
+        numBytes
+      } else {
+        val grant = available
+        available = 0
+        grant
+      }
     }
+    memoryForTask(taskAttemptId) = memoryForTask.getOrElse(taskAttemptId, 0L) + acquired
+    acquired
+  }
+
+  override private[memory] def releaseExecutionMemory(
+      numBytes: Long,
+      taskAttemptId: Long,
+      memoryMode: MemoryMode): Unit = synchronized {
+    require(numBytes >= 0)
+    available += numBytes
+    val existingMemoryUsage = memoryForTask.getOrElse(taskAttemptId, 0L)
+    val newMemoryUsage = existingMemoryUsage - numBytes
+    require(
+      newMemoryUsage >= 0,
+      s"Attempting to free $numBytes of memory for task attempt $taskAttemptId, but it only " +
+      s"allocated $existingMemoryUsage bytes of memory")
+    memoryForTask(taskAttemptId) = newMemoryUsage
+  }
+
+  override private[memory] def releaseAllExecutionMemoryForTask(taskAttemptId: Long): Long = {
+    memoryForTask.remove(taskAttemptId).getOrElse(0L)
+  }
+
+  override private[memory] def getExecutionMemoryUsageForTask(taskAttemptId: Long): Long = {
+    memoryForTask.getOrElse(taskAttemptId, 0L)
   }
+
   override def acquireStorageMemory(
       blockId: BlockId,
       numBytes: Long,
-      memoryMode: MemoryMode): Boolean = true
+      memoryMode: MemoryMode): Boolean = {
+    require(numBytes >= 0)
+    true
+  }
+
   override def acquireUnrollMemory(
       blockId: BlockId,
       numBytes: Long,
-     memoryMode: MemoryMode): Boolean = true
-  override def releaseStorageMemory(numBytes: Long, memoryMode: MemoryMode): Unit = {}
-  override private[memory] def releaseExecutionMemory(
-      numBytes: Long,
-      taskAttemptId: Long,
-      memoryMode: MemoryMode): Unit = {
-    available += numBytes
+      memoryMode: MemoryMode): Boolean = {
+    require(numBytes >= 0)
+    true
   }
+
+  override def releaseStorageMemory(numBytes: Long, memoryMode: MemoryMode): Unit = {
+    require(numBytes >= 0)
+  }
+
   override def maxOnHeapStorageMemory: Long = Long.MaxValue
 
   override def maxOffHeapStorageMemory: Long = 0L
 
-  private var consequentOOM = 0
-  private var available = Long.MaxValue
-
+  /**
+   * Causes the next call to [[acquireExecutionMemory()]] to fail to allocate
+   * memory (returning `0`), simulating low-on-memory / out-of-memory conditions.
+   */
   def markExecutionAsOutOfMemoryOnce(): Unit = {
     markconsequentOOM(1)
   }
 
-  def markconsequentOOM(n : Int) : Unit = {
+  /**
+   * Causes the next `n` calls to [[acquireExecutionMemory()]] to fail to allocate
+   * memory (returning `0`), simulating low-on-memory / out-of-memory conditions.
+   */
+  def markconsequentOOM(n: Int): Unit = synchronized {
     consequentOOM += n
   }
 
-  def limit(avail: Long): Unit = {
+  def limit(avail: Long): Unit = synchronized {
+    require(avail >= 0)
     available = avail
   }
-
 }
diff --git a/core/src/test/scala/org/apache/spark/memory/TestMemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/TestMemoryManagerSuite.scala
new file mode 100644
index 0000000000000..043f341074b88
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/memory/TestMemoryManagerSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.memory
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+
+/**
+ * Tests of [[TestMemoryManager]] itself.
+ */
+class TestMemoryManagerSuite extends SparkFunSuite {
+  test("tracks allocated execution memory by task") {
+    val testMemoryManager = new TestMemoryManager(new SparkConf())
+
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(0) == 0)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(1) == 0)
+
+    testMemoryManager.acquireExecutionMemory(10, 0, MemoryMode.ON_HEAP)
+    testMemoryManager.acquireExecutionMemory(5, 1, MemoryMode.ON_HEAP)
+    testMemoryManager.acquireExecutionMemory(5, 0, MemoryMode.ON_HEAP)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(0) == 15)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(1) == 5)
+
+    testMemoryManager.releaseExecutionMemory(10, 0, MemoryMode.ON_HEAP)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(0) == 5)
+
+    testMemoryManager.releaseAllExecutionMemoryForTask(0)
+    testMemoryManager.releaseAllExecutionMemoryForTask(1)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(0) == 0)
+    assert(testMemoryManager.getExecutionMemoryUsageForTask(1) == 0)
+  }
+
+  test("markconsequentOOM") {
+    val testMemoryManager = new TestMemoryManager(new SparkConf())
+    assert(testMemoryManager.acquireExecutionMemory(1, 0, MemoryMode.ON_HEAP) == 1)
+    testMemoryManager.markconsequentOOM(2)
+    assert(testMemoryManager.acquireExecutionMemory(1, 0, MemoryMode.ON_HEAP) == 0)
+    assert(testMemoryManager.acquireExecutionMemory(1, 0, MemoryMode.ON_HEAP) == 0)
+    assert(testMemoryManager.acquireExecutionMemory(1, 0, MemoryMode.ON_HEAP) == 1)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
index c7bd0c905d027..dbcec647a3dbc 100644
--- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
@@ -166,7 +166,7 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
     var shuffleRead = 0L
     var shuffleWritten = 0L
     sc.addSparkListener(new SparkListener() {
-      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
         val metrics = taskEnd.taskMetrics
         inputRead += metrics.inputMetrics.recordsRead
         outputWritten += metrics.outputMetrics.recordsWritten
@@ -182,7 +182,7 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
       .reduceByKey(_ + _)
       .saveAsTextFile(tmpFile.toURI.toString)
 
-    sc.listenerBus.waitUntilEmpty(500)
+    sc.listenerBus.waitUntilEmpty()
     assert(inputRead == numRecords)
 
     assert(outputWritten == numBuckets)
@@ -243,17 +243,17 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
     val taskMetrics = new ArrayBuffer[Long]()
 
     // Avoid receiving earlier taskEnd events
-    sc.listenerBus.waitUntilEmpty(500)
+    sc.listenerBus.waitUntilEmpty()
 
     sc.addSparkListener(new SparkListener() {
-      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
         taskMetrics += collector(taskEnd)
       }
     })
 
     job
 
-    sc.listenerBus.waitUntilEmpty(500)
+    sc.listenerBus.waitUntilEmpty()
     taskMetrics.sum
   }
 
@@ -284,7 +284,7 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
 
     val taskBytesWritten = new ArrayBuffer[Long]()
     sc.addSparkListener(new SparkListener() {
-      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+      override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
         taskBytesWritten += taskEnd.taskMetrics.outputMetrics.bytesWritten
       }
     })
@@ -293,7 +293,7 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
 
     try {
       rdd.saveAsTextFile(outPath.toString)
-      sc.listenerBus.waitUntilEmpty(500)
+      sc.listenerBus.waitUntilEmpty()
       assert(taskBytesWritten.length == 2)
       val outFiles = fs.listStatus(outPath).filter(_.getPath.getName != "_SUCCESS")
       taskBytesWritten.zip(outFiles).foreach { case (bytes, fileStatus) =>
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
index 544d52d48b385..e05fad19567ae 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferSecuritySuite.scala
@@ -29,7 +29,7 @@ import scala.util.{Failure, Success, Try}
 import com.google.common.io.CharStreams
 import org.mockito.Mockito._
 import org.scalatest.Matchers
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config._
diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
index 5d67d3358a9ca..edddf88a28f85 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockTransferServiceSuite.scala
@@ -41,7 +41,7 @@ class NettyBlockTransferServiceSuite
   private var service0: NettyBlockTransferService = _
   private var service1: NettyBlockTransferService = _
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       if (service0 != null) {
         service0.close()
diff --git a/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala
index d7265b6c24fe7..55cd1a4bfe7dd 100644
--- a/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/network/netty/SparkTransportConfSuite.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.network.netty
 
-import org.scalatest.Matchers
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.network.util.NettyUtils
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index a7eb0eca72e56..faef953e9fb90 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -37,12 +37,12 @@ class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with Tim
   // Necessary to make ScalaTest 3.x interrupt a thread on the JVM like ScalaTest 2.2.x
   implicit val defaultSignaler: Signaler = ThreadSignaler
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     sc = new SparkContext("local[2]", "test")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       LocalSparkContext.stop(sc)
       sc = null
@@ -86,7 +86,7 @@ class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with Tim
   }
 
   test("takeAsync") {
-    def testTake(rdd: RDD[Int], input: Seq[Int], num: Int) {
+    def testTake(rdd: RDD[Int], input: Seq[Int], num: Int): Unit = {
       val expected = input.take(num)
       val saw = rdd.takeAsync(num).get()
       assert(saw == expected, "incorrect result for rdd with %d partitions (expected %s, saw %s)"
diff --git a/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
index 42b30707f2624..617ca5a1a8bc4 100644
--- a/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
@@ -67,7 +67,8 @@ object CoalescedRDDBenchmark extends BenchmarkBase {
     benchmark.run()
   }
 
-  private def performCoalesce(blocks: immutable.Seq[(Int, Seq[String])], numPartitions: Int) {
+  private def performCoalesce(blocks: immutable.Seq[(Int, Seq[String])],
+      numPartitions: Int): Unit = {
     sc.makeRDD(blocks).coalesce(numPartitions).partitions
   }
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 1564435a0bbae..01fe170073a10 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -200,7 +200,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     assert(sums.partitioner === Some(p))
     // count the dependencies to make sure there is only 1 ShuffledRDD
     val deps = new HashSet[RDD[_]]()
-    def visit(r: RDD[_]) {
+    def visit(r: RDD[_]): Unit = {
       for (dep <- r.dependencies) {
         deps += dep.rdd
         visit(dep.rdd)
diff --git a/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
index 424d9f825c465..10f4bbcf7f48b 100644
--- a/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
@@ -22,7 +22,7 @@ import scala.collection.immutable.NumericRange
 import org.scalacheck.Arbitrary._
 import org.scalacheck.Gen
 import org.scalacheck.Prop._
-import org.scalatest.prop.Checkers
+import org.scalatestplus.scalacheck.Checkers
 
 import org.apache.spark.SparkFunSuite
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
index cb0de1c6beb6b..da2ccbfae181f 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PartitionwiseSampledRDDSuite.scala
@@ -25,7 +25,7 @@ class MockSampler extends RandomSampler[Long, Long] {
 
   private var s: Long = _
 
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     s = seed
   }
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
index 69739a2e58481..860cf4d7ed9b2 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PipedRDDSuite.scala
@@ -21,16 +21,18 @@ import java.io.File
 
 import scala.collection.JavaConverters._
 import scala.collection.Map
+import scala.concurrent.duration._
 import scala.io.Codec
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapred.{FileSplit, JobConf, TextInputFormat}
+import org.scalatest.concurrent.Eventually
 
 import org.apache.spark._
 import org.apache.spark.util.Utils
 
-class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
+class PipedRDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
   val envCommand = if (Utils.isWindows) {
     "cmd.exe /C set"
   } else {
@@ -100,11 +102,16 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
 
     assert(result.collect().length === 0)
 
-    // collect stderr writer threads
-    val stderrWriterThread = Thread.getAllStackTraces.keySet().asScala
-      .find { _.getName.startsWith(PipedRDD.STDIN_WRITER_THREAD_PREFIX) }
-
-    assert(stderrWriterThread.isEmpty)
+    // SPARK-29104 PipedRDD will invoke `stdinWriterThread.interrupt()` at task completion,
+    // and `obj.wait` will get InterruptedException. However, there exists a possibility
+    // which the thread termination gets delayed because the thread starts from `obj.wait()`
+    // with that exception. To prevent test flakiness, we need to use `eventually`.
+    eventually(timeout(10.seconds), interval(1.second)) {
+      // collect stdin writer threads
+      val stdinWriterThread = Thread.getAllStackTraces.keySet().asScala
+        .find { _.getName.startsWith(PipedRDD.STDIN_WRITER_THREAD_PREFIX) }
+      assert(stdinWriterThread.isEmpty)
+    }
   }
 
   test("advanced pipe") {
@@ -230,7 +237,7 @@ class PipedRDDSuite extends SparkFunSuite with SharedSparkContext {
     testExportInputFile("mapreduce_map_input_file")
   }
 
-  def testExportInputFile(varName: String) {
+  def testExportInputFile(varName: String): Unit = {
     assume(TestUtils.testCommandAvailable(envCommand))
     val nums = new HadoopRDD(sc, new JobConf(), classOf[TextInputFormat], classOf[LongWritable],
       classOf[Text], 2) {
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 60e63bfd68625..859c25ff03819 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -366,7 +366,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     assert(math.abs(partitions1(1).length - 500) < initialPartitions)
     assert(repartitioned1.collect() === input)
 
-    def testSplitPartitions(input: Seq[Int], initialPartitions: Int, finalPartitions: Int) {
+    def testSplitPartitions(input: Seq[Int], initialPartitions: Int, finalPartitions: Int): Unit = {
       val data = sc.parallelize(input, initialPartitions)
       val repartitioned = data.repartition(finalPartitions)
       assert(repartitioned.partitions.size === finalPartitions)
@@ -1099,7 +1099,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
       override def index: Int = 0
     })
     override def getDependencies: Seq[Dependency[_]] = mutableDependencies
-    def addDependency(dep: Dependency[_]) {
+    def addDependency(dep: Dependency[_]): Unit = {
       mutableDependencies += dep
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index 5bdf71be35b3b..5929fbf85a1f4 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -409,7 +409,7 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
       (0 until 10) foreach { _ =>
         new Thread {
-          override def run() {
+          override def run(): Unit = {
             (0 until 100) foreach { _ =>
               endpointRef.send("Hello")
             }
diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
index 59b4b706bbcdd..378d433cf44f8 100644
--- a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.ExecutionException
 import scala.concurrent.duration._
 
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.network.client.TransportClient
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
index 8d5f04ac7651a..fc8ac38479932 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
@@ -26,13 +26,18 @@ import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 
 class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
 
-  test("global sync by barrier() call") {
+  def initLocalClusterSparkContext(): Unit = {
     val conf = new SparkConf()
       // Init local cluster here so each barrier task runs in a separated process, thus `barrier()`
       // call is actually useful.
       .setMaster("local-cluster[4, 1, 1024]")
       .setAppName("test-cluster")
+      .set(TEST_NO_STAGE_RETRY, true)
     sc = new SparkContext(conf)
+  }
+
+  test("global sync by barrier() call") {
+    initLocalClusterSparkContext()
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -48,10 +53,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   test("support multiple barrier() call within a single task") {
-    val conf = new SparkConf()
-      .setMaster("local-cluster[4, 1, 1024]")
-      .setAppName("test-cluster")
-    sc = new SparkContext(conf)
+    initLocalClusterSparkContext()
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -77,12 +79,8 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   test("throw exception on barrier() call timeout") {
-    val conf = new SparkConf()
-      .set("spark.barrier.sync.timeout", "1")
-      .set(TEST_NO_STAGE_RETRY, true)
-      .setMaster("local-cluster[4, 1, 1024]")
-      .setAppName("test-cluster")
-    sc = new SparkContext(conf)
+    initLocalClusterSparkContext()
+    sc.conf.set("spark.barrier.sync.timeout", "1")
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -102,12 +100,8 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   test("throw exception if barrier() call doesn't happen on every task") {
-    val conf = new SparkConf()
-      .set("spark.barrier.sync.timeout", "1")
-      .set(TEST_NO_STAGE_RETRY, true)
-      .setMaster("local-cluster[4, 1, 1024]")
-      .setAppName("test-cluster")
-    sc = new SparkContext(conf)
+    initLocalClusterSparkContext()
+    sc.conf.set("spark.barrier.sync.timeout", "1")
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -125,12 +119,8 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
   }
 
   test("throw exception if the number of barrier() calls are not the same on every task") {
-    val conf = new SparkConf()
-      .set("spark.barrier.sync.timeout", "1")
-      .set(TEST_NO_STAGE_RETRY, true)
-      .setMaster("local-cluster[4, 1, 1024]")
-      .setAppName("test-cluster")
-    sc = new SparkContext(conf)
+    initLocalClusterSparkContext()
+    sc.conf.set("spark.barrier.sync.timeout", "1")
     val rdd = sc.makeRDD(1 to 10, 4)
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
@@ -156,10 +146,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
     assert(error.contains("within 1 second(s)"))
   }
 
-
-  def testBarrierTaskKilled(sc: SparkContext, interruptOnCancel: Boolean): Unit = {
-    sc.setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, interruptOnCancel.toString)
-
+  def testBarrierTaskKilled(interruptOnKill: Boolean): Unit = {
     withTempDir { dir =>
       val killedFlagFile = "barrier.task.killed"
       val rdd = sc.makeRDD(Seq(0, 1), 2)
@@ -181,12 +168,15 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
 
       val listener = new SparkListener {
         override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
-          new Thread {
-            override def run: Unit = {
-              Thread.sleep(1000)
-              sc.killTaskAttempt(taskStart.taskInfo.taskId, interruptThread = false)
-            }
-          }.start()
+          val partitionId = taskStart.taskInfo.index
+          if (partitionId == 0) {
+            new Thread {
+              override def run: Unit = {
+                Thread.sleep(1000)
+                sc.killTaskAttempt(taskStart.taskInfo.taskId, interruptThread = interruptOnKill)
+              }
+            }.start()
+          }
         }
       }
       sc.addSparkListener(listener)
@@ -201,15 +191,13 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext {
     }
   }
 
-  test("barrier task killed") {
-    val conf = new SparkConf()
-      .set("spark.barrier.sync.timeout", "1")
-      .set(TEST_NO_STAGE_RETRY, true)
-      .setMaster("local-cluster[4, 1, 1024]")
-      .setAppName("test-cluster")
-    sc = new SparkContext(conf)
+  test("barrier task killed, no interrupt") {
+    initLocalClusterSparkContext()
+    testBarrierTaskKilled(interruptOnKill = false)
+  }
 
-    testBarrierTaskKilled(sc, true)
-    testBarrierTaskKilled(sc, false)
+  test("barrier task killed, interrupt") {
+    initLocalClusterSparkContext()
+    testBarrierTaskKilled(interruptOnKill = true)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
index 0fe0e5b78233c..246d4b2f56ec9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BlacklistIntegrationSuite.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.scheduler
 
-import scala.concurrent.duration._
-
 import org.apache.spark._
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Tests._
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
index 93a88cc30a20c..a1671a58f0d9b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BlacklistTrackerSuite.scala
@@ -21,7 +21,7 @@ import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{never, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.scalatest.BeforeAndAfterEach
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.internal.config
@@ -437,7 +437,7 @@ class BlacklistTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with M
   }
 
   test("check blacklist configuration invariants") {
-    val conf = new SparkConf().setMaster("yarn-cluster")
+    val conf = new SparkConf().setMaster("yarn").set(config.SUBMIT_DEPLOY_MODE, "cluster")
     Seq(
       (2, 2),
       (2, 3)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index 3edbbeb9c08f1..61522145f8868 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.scheduler
 import java.util.Properties
 import java.util.concurrent.atomic.AtomicBoolean
 
-import scala.collection.immutable
 import scala.collection.mutable
 import scala.concurrent.duration._
 import scala.language.postfixOps
@@ -29,7 +28,7 @@ import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.when
 import org.mockito.invocation.InvocationOnMock
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar._
+import org.scalatestplus.mockito.MockitoSugar._
 
 import org.apache.spark._
 import org.apache.spark.internal.config._
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala b/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala
index 1be2e2a067115..46e5e6f97b1f1 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CustomShuffledRDD.scala
@@ -111,7 +111,7 @@ class CustomShuffledRDD[K, V, C](
       .asInstanceOf[Iterator[(K, C)]]
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     dependency = null
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index cd854c379b08a..bd0a35af206af 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -151,7 +151,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSet.tasks.foreach(_.epoch = mapOutputTracker.getEpoch)
       taskSets += taskSet
     }
-    override def cancelTasks(stageId: Int, interruptThread: Boolean) {
+    override def cancelTasks(stageId: Int, interruptThread: Boolean): Unit = {
       cancelledStages += stageId
     }
     override def killTaskAttempt(
@@ -172,34 +172,66 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     override def applicationAttemptId(): Option[String] = None
   }
 
-  /** Length of time to wait while draining listener events. */
-  val WAIT_TIMEOUT_MILLIS = 10000
-
-  val submittedStageInfos = new HashSet[StageInfo]
-  val successfulStages = new HashSet[Int]
-  val failedStages = new ArrayBuffer[Int]
-  val stageByOrderOfExecution = new ArrayBuffer[Int]
-  val endedTasks = new HashSet[Long]
-  val sparkListener = new SparkListener() {
-    override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) {
-      submittedStageInfos += stageSubmitted.stageInfo
+  /**
+   * Listeners which records some information to verify in UTs. Getter-kind methods in this class
+   * ensures the value is returned after ensuring there's no event to process, as well as the
+   * value is immutable: prevent showing odd result by race condition.
+   */
+  class EventInfoRecordingListener extends SparkListener {
+    private val _submittedStageInfos = new HashSet[StageInfo]
+    private val _successfulStages = new HashSet[Int]
+    private val _failedStages = new ArrayBuffer[Int]
+    private val _stageByOrderOfExecution = new ArrayBuffer[Int]
+    private val _endedTasks = new HashSet[Long]
+
+    override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = {
+      _submittedStageInfos += stageSubmitted.stageInfo
     }
 
-    override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) {
+    override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
       val stageInfo = stageCompleted.stageInfo
-      stageByOrderOfExecution += stageInfo.stageId
+      _stageByOrderOfExecution += stageInfo.stageId
       if (stageInfo.failureReason.isEmpty) {
-        successfulStages += stageInfo.stageId
+        _successfulStages += stageInfo.stageId
       } else {
-        failedStages += stageInfo.stageId
+        _failedStages += stageInfo.stageId
       }
     }
 
     override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
-      endedTasks += taskEnd.taskInfo.taskId
+      _endedTasks += taskEnd.taskInfo.taskId
+    }
+
+    def submittedStageInfos: Set[StageInfo] = {
+      waitForListeners()
+      _submittedStageInfos.toSet
+    }
+
+    def successfulStages: Set[Int] = {
+      waitForListeners()
+      _successfulStages.toSet
     }
+
+    def failedStages: List[Int] = {
+      waitForListeners()
+      _failedStages.toList
+    }
+
+    def stageByOrderOfExecution: List[Int] = {
+      waitForListeners()
+      _stageByOrderOfExecution.toList
+    }
+
+    def endedTasks: Set[Long] = {
+      waitForListeners()
+      _endedTasks.toSet
+    }
+
+    private def waitForListeners(): Unit = sc.listenerBus.waitUntilEmpty()
   }
 
+  var sparkListener: EventInfoRecordingListener = null
+
   var mapOutputTracker: MapOutputTrackerMaster = null
   var broadcastManager: BroadcastManager = null
   var securityMgr: SecurityManager = null
@@ -220,7 +252,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
             getOrElse(Seq())
         }.toIndexedSeq
       }
-      override def removeExecutor(execId: String) {
+      override def removeExecutor(execId: String): Unit = {
         // don't need to propagate to the driver, which we don't have
       }
     }
@@ -248,10 +280,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
   private def init(testConf: SparkConf): Unit = {
     sc = new SparkContext("local[2]", "DAGSchedulerSuite", testConf)
-    submittedStageInfos.clear()
-    successfulStages.clear()
-    failedStages.clear()
-    endedTasks.clear()
+    sparkListener = new EventInfoRecordingListener
     failure = null
     sc.addSparkListener(sparkListener)
     taskSets.clear()
@@ -287,7 +316,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     }
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     super.afterAll()
   }
 
@@ -304,7 +333,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
    * After processing the event, submit waiting stages as is done on most iterations of the
    * DAGScheduler event loop.
    */
-  private def runEvent(event: DAGSchedulerEvent) {
+  private def runEvent(event: DAGSchedulerEvent): Unit = {
     dagEventProcessLoopTester.post(event)
   }
 
@@ -317,7 +346,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     it.next.asInstanceOf[Tuple2[_, _]]._1
 
   /** Send the given CompletionEvent messages for the tasks in the TaskSet. */
-  private def complete(taskSet: TaskSet, results: Seq[(TaskEndReason, Any)]) {
+  private def complete(taskSet: TaskSet, results: Seq[(TaskEndReason, Any)]): Unit = {
     assert(taskSet.tasks.size >= results.size)
     for ((result, i) <- results.zipWithIndex) {
       if (i < taskSet.tasks.size) {
@@ -329,7 +358,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   private def completeWithAccumulator(
       accumId: Long,
       taskSet: TaskSet,
-      results: Seq[(TaskEndReason, Any)]) {
+      results: Seq[(TaskEndReason, Any)]): Unit = {
     assert(taskSet.tasks.size >= results.size)
     for ((result, i) <- results.zipWithIndex) {
       if (i < taskSet.tasks.size) {
@@ -364,19 +393,18 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
   }
 
   /** Sends TaskSetFailed to the scheduler. */
-  private def failed(taskSet: TaskSet, message: String) {
+  private def failed(taskSet: TaskSet, message: String): Unit = {
     runEvent(TaskSetFailed(taskSet, message, None))
   }
 
   /** Sends JobCancelled to the DAG scheduler. */
-  private def cancel(jobId: Int) {
+  private def cancel(jobId: Int): Unit = {
     runEvent(JobCancelled(jobId, None))
   }
 
   test("[SPARK-3353] parent stage should have lower stage id") {
-    stageByOrderOfExecution.clear()
     sc.parallelize(1 to 10).map(x => (x, x)).reduceByKey(_ + _, 4).count()
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    val stageByOrderOfExecution = sparkListener.stageByOrderOfExecution
     assert(stageByOrderOfExecution.length === 2)
     assert(stageByOrderOfExecution(0) < stageByOrderOfExecution(1))
   }
@@ -456,18 +484,22 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // map stage1 completes successfully, with one task on each executor
     complete(taskSets(0), Seq(
       (Success,
-        MapStatus(BlockManagerId("exec-hostA1", "hostA", 12345), Array.fill[Long](1)(2))),
+        MapStatus(
+          BlockManagerId("exec-hostA1", "hostA", 12345), Array.fill[Long](1)(2), mapTaskId = 5)),
       (Success,
-        MapStatus(BlockManagerId("exec-hostA2", "hostA", 12345), Array.fill[Long](1)(2))),
-      (Success, makeMapStatus("hostB", 1))
+        MapStatus(
+          BlockManagerId("exec-hostA2", "hostA", 12345), Array.fill[Long](1)(2), mapTaskId = 6)),
+      (Success, makeMapStatus("hostB", 1, mapTaskId = 7))
     ))
     // map stage2 completes successfully, with one task on each executor
     complete(taskSets(1), Seq(
       (Success,
-        MapStatus(BlockManagerId("exec-hostA1", "hostA", 12345), Array.fill[Long](1)(2))),
+        MapStatus(
+          BlockManagerId("exec-hostA1", "hostA", 12345), Array.fill[Long](1)(2), mapTaskId = 8)),
       (Success,
-        MapStatus(BlockManagerId("exec-hostA2", "hostA", 12345), Array.fill[Long](1)(2))),
-      (Success, makeMapStatus("hostB", 1))
+        MapStatus(
+          BlockManagerId("exec-hostA2", "hostA", 12345), Array.fill[Long](1)(2), mapTaskId = 9)),
+      (Success, makeMapStatus("hostB", 1, mapTaskId = 10))
     ))
     // make sure our test setup is correct
     val initialMapStatus1 = mapOutputTracker.shuffleStatuses(firstShuffleId).mapStatuses
@@ -475,16 +507,19 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(initialMapStatus1.count(_ != null) === 3)
     assert(initialMapStatus1.map{_.location.executorId}.toSet ===
       Set("exec-hostA1", "exec-hostA2", "exec-hostB"))
+    assert(initialMapStatus1.map{_.mapTaskId}.toSet === Set(5, 6, 7))
 
     val initialMapStatus2 = mapOutputTracker.shuffleStatuses(secondShuffleId).mapStatuses
     //  val initialMapStatus1 = mapOutputTracker.mapStatuses.get(0).get
     assert(initialMapStatus2.count(_ != null) === 3)
     assert(initialMapStatus2.map{_.location.executorId}.toSet ===
       Set("exec-hostA1", "exec-hostA2", "exec-hostB"))
+    assert(initialMapStatus2.map{_.mapTaskId}.toSet === Set(8, 9, 10))
 
     // reduce stage fails with a fetch failure from one host
     complete(taskSets(2), Seq(
-      (FetchFailed(BlockManagerId("exec-hostA2", "hostA", 12345), firstShuffleId, 0, 0, "ignored"),
+      (FetchFailed(BlockManagerId("exec-hostA2", "hostA", 12345),
+        firstShuffleId, 0L, 0, 0, "ignored"),
         null)
     ))
 
@@ -619,9 +654,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     submit(unserializableRdd, Array(0))
     assert(failure.getMessage.startsWith(
       "Job aborted due to stage failure: Task not serializable:"))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.contains(0))
-    assert(failedStages.size === 1)
+    assert(sparkListener.failedStages === Seq(0))
     assertDataStructuresEmpty()
   }
 
@@ -629,9 +662,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     submit(new MyRDD(sc, 1, Nil), Array(0))
     failed(taskSets(0), "some failure")
     assert(failure.getMessage === "Job aborted due to stage failure: some failure")
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.contains(0))
-    assert(failedStages.size === 1)
+    assert(sparkListener.failedStages === Seq(0))
     assertDataStructuresEmpty()
   }
 
@@ -640,9 +671,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     val jobId = submit(rdd, Array(0))
     cancel(jobId)
     assert(failure.getMessage === s"Job $jobId cancelled ")
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.contains(0))
-    assert(failedStages.size === 1)
+    assert(sparkListener.failedStages === Seq(0))
     assertDataStructuresEmpty()
   }
 
@@ -657,7 +686,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       override def submitTasks(taskSet: TaskSet): Unit = {
         taskSets += taskSet
       }
-      override def cancelTasks(stageId: Int, interruptThread: Boolean) {
+      override def cancelTasks(stageId: Int, interruptThread: Boolean): Unit = {
         throw new UnsupportedOperationException
       }
       override def killTaskAttempt(
@@ -700,9 +729,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(results === Map(0 -> 42))
     assertDataStructuresEmpty()
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.isEmpty)
-    assert(successfulStages.contains(0))
+    assert(sparkListener.failedStages.isEmpty)
+    assert(sparkListener.successfulStages.contains(0))
   }
 
   test("run trivial shuffle") {
@@ -733,7 +761,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // the 2nd ResultTask failed
     complete(taskSets(1), Seq(
       (Success, 42),
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"), null)))
     // this will get called
     // blockManagerMaster.removeExecutor("exec-hostA")
     // ask the scheduler to try it again
@@ -815,18 +843,18 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     val testRdd = new MyRDD(sc, 0, Nil)
     val waiter = scheduler.submitJob(testRdd, func, Seq.empty, CallSite.empty,
       resultHandler, properties)
-    sc.listenerBus.waitUntilEmpty(1000L)
+    sc.listenerBus.waitUntilEmpty()
     assert(assertionError.get() === null)
   }
 
   // Helper function to validate state when creating tests for task failures
-  private def checkStageId(stageId: Int, attempt: Int, stageAttempt: TaskSet) {
+  private def checkStageId(stageId: Int, attempt: Int, stageAttempt: TaskSet): Unit = {
     assert(stageAttempt.stageId === stageId)
     assert(stageAttempt.stageAttemptId == attempt)
   }
 
   // Helper functions to extract commonly used code in Fetch Failure test cases
-  private def setupStageAbortTest(sc: SparkContext) {
+  private def setupStageAbortTest(sc: SparkContext): Unit = {
     sc.listenerBus.addToSharedQueue(new EndListener())
     ended = false
     jobResult = null
@@ -880,7 +908,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     val stageAttempt = taskSets.last
     checkStageId(stageId, attemptIdx, stageAttempt)
     complete(stageAttempt, stageAttempt.tasks.zipWithIndex.map { case (task, idx) =>
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleDep.shuffleId, 0, idx, "ignored"), null)
+      (FetchFailed(makeBlockManagerId("hostA"), shuffleDep.shuffleId, 0L, 0, idx, "ignored"), null)
     }.toSeq)
   }
 
@@ -933,7 +961,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     completeNextResultStageWithSuccess(1, 1)
 
     // Confirm job finished successfully
-    sc.listenerBus.waitUntilEmpty(1000)
+    sc.listenerBus.waitUntilEmpty()
     assert(ended)
     assert(results === (0 until parts).map { idx => idx -> 42 }.toMap)
     assertDataStructuresEmpty()
@@ -970,7 +998,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       } else {
         // Stage should have been aborted and removed from running stages
         assertDataStructuresEmpty()
-        sc.listenerBus.waitUntilEmpty(1000)
+        sc.listenerBus.waitUntilEmpty()
         assert(ended)
         jobResult match {
           case JobFailed(reason) =>
@@ -1092,7 +1120,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     completeNextResultStageWithSuccess(2, 1)
 
     assertDataStructuresEmpty()
-    sc.listenerBus.waitUntilEmpty(1000)
+    sc.listenerBus.waitUntilEmpty()
     assert(ended)
     assert(results === Map(0 -> 42))
   }
@@ -1113,19 +1141,17 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The first result task fails, with a fetch failure for the output from the first mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.contains(1))
+    assert(sparkListener.failedStages.contains(1))
 
     // The second ResultTask fails, with a fetch failure for the output from the second mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 1, 1, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 1L, 1, 1, "ignored"),
       null))
     // The SparkListener should not receive redundant failure events.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.size == 1)
+    assert(sparkListener.failedStages.size === 1)
   }
 
   test("Retry all the tasks on a resubmitted attempt of a barrier stage caused by FetchFailure") {
@@ -1142,7 +1168,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The first result task fails, with a fetch failure for the output from the first mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
     assert(mapOutputTracker.findMissingPartitions(shuffleId) === Some(Seq(0, 1)))
 
@@ -1153,7 +1179,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // Complete the result stage.
     completeNextResultStageWithSuccess(1, 1)
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     assertDataStructuresEmpty()
   }
 
@@ -1172,7 +1198,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(1),
       TaskKilled("test"),
       null))
-    assert(failedStages === Seq(0))
+    assert(sparkListener.failedStages === Seq(0))
     assert(mapOutputTracker.findMissingPartitions(shuffleId) === Some(Seq(0, 1)))
 
     scheduler.resubmitFailedStages()
@@ -1182,7 +1208,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // Complete the result stage.
     completeNextResultStageWithSuccess(1, 0)
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     assertDataStructuresEmpty()
   }
 
@@ -1208,7 +1234,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       null))
 
     // Assert the stage has been cancelled.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     assert(failure.getMessage.startsWith("Job aborted due to stage failure: Could not recover " +
       "from a failed barrier ResultStage."))
   }
@@ -1226,11 +1252,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     val mapStageId = 0
     def countSubmittedMapStageAttempts(): Int = {
-      submittedStageInfos.count(_.stageId == mapStageId)
+      sparkListener.submittedStageInfos.count(_.stageId == mapStageId)
     }
 
     // The map stage should have been submitted.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 1)
 
     complete(taskSets(0), Seq(
@@ -1245,14 +1270,12 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The first result task fails, with a fetch failure for the output from the first mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.contains(1))
+    assert(sparkListener.failedStages.contains(1))
 
     // Trigger resubmission of the failed map stage.
     runEvent(ResubmitFailedStages)
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
 
     // Another attempt for the map stage should have been submitted, resulting in 2 total attempts.
     assert(countSubmittedMapStageAttempts() === 2)
@@ -1260,7 +1283,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The second ResultTask fails, with a fetch failure for the output from the second mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(1),
-      FetchFailed(makeBlockManagerId("hostB"), shuffleId, 1, 1, "ignored"),
+      FetchFailed(makeBlockManagerId("hostB"), shuffleId, 1L, 1, 1, "ignored"),
       null))
 
     // Another ResubmitFailedStages event should not result in another attempt for the map
@@ -1269,7 +1292,6 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // shouldn't effect anything -- our calling it just makes *SURE* it gets called between the
     // desired event and our check.
     runEvent(ResubmitFailedStages)
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 2)
 
   }
@@ -1287,14 +1309,13 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     submit(reduceRdd, Array(0, 1))
 
     def countSubmittedReduceStageAttempts(): Int = {
-      submittedStageInfos.count(_.stageId == 1)
+      sparkListener.submittedStageInfos.count(_.stageId == 1)
     }
     def countSubmittedMapStageAttempts(): Int = {
-      submittedStageInfos.count(_.stageId == 0)
+      sparkListener.submittedStageInfos.count(_.stageId == 0)
     }
 
     // The map stage should have been submitted.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 1)
 
     // Complete the map stage.
@@ -1303,13 +1324,12 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       (Success, makeMapStatus("hostB", 2))))
 
     // The reduce stage should have been submitted.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedReduceStageAttempts() === 1)
 
     // The first result task fails, with a fetch failure for the output from the first mapper.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
 
     // Trigger resubmission of the failed map stage and finish the re-started map task.
@@ -1318,14 +1338,13 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     // Because the map stage finished, another attempt for the reduce stage should have been
     // submitted, resulting in 2 total attempts for each the map and the reduce stage.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 2)
     assert(countSubmittedReduceStageAttempts() === 2)
 
     // A late FetchFailed arrives from the second task in the original reduce stage.
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(1),
-      FetchFailed(makeBlockManagerId("hostB"), shuffleId, 1, 1, "ignored"),
+      FetchFailed(makeBlockManagerId("hostB"), shuffleId, 1L, 1, 1, "ignored"),
       null))
 
     // Running ResubmitFailedStages shouldn't result in any more attempts for the map stage, because
@@ -1348,10 +1367,9 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(1), Success, 42,
       Seq.empty, Array.empty, createFakeTaskInfoWithId(1)))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     // verify stage exists
     assert(scheduler.stageIdToStage.contains(0))
-    assert(endedTasks.size == 2)
+    assert(sparkListener.endedTasks.size === 2)
 
     // finish other 2 tasks
     runEvent(makeCompletionEvent(
@@ -1360,8 +1378,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(3), Success, 42,
       Seq.empty, Array.empty, createFakeTaskInfoWithId(3)))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(endedTasks.size == 4)
+    assert(sparkListener.endedTasks.size === 4)
 
     // verify the stage is done
     assert(!scheduler.stageIdToStage.contains(0))
@@ -1371,15 +1388,13 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(3), Success, 42,
       Seq.empty, Array.empty, createFakeTaskInfoWithId(5)))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(endedTasks.size == 5)
+    assert(sparkListener.endedTasks.size === 5)
 
     // make sure non successful tasks also send out event
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(3), UnknownReason, 42,
       Seq.empty, Array.empty, createFakeTaskInfoWithId(6)))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(endedTasks.size == 6)
+    assert(sparkListener.endedTasks.size === 6)
   }
 
   test("ignore late map task completions") {
@@ -1452,8 +1467,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     // Listener bus should get told about the map stage failing, but not the reduce stage
     // (since the reduce stage hasn't been started yet).
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(failedStages.toSet === Set(0))
+    assert(sparkListener.failedStages.toSet === Set(0))
 
     assertDataStructuresEmpty()
   }
@@ -1525,7 +1539,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     runEvent(ExecutorLost("exec-hostA", ExecutorKilled))
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(null, firstShuffleId, 2, 0, "Fetch failed"),
+      FetchFailed(null, firstShuffleId, 2L, 2, 0, "Fetch failed"),
       null))
 
     // so we resubmit stage 0, which completes happily
@@ -1681,7 +1695,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // listener for all jobs, and here we want to capture the failure for each job separately.
     class FailureRecordingJobListener() extends JobListener {
       var failureMessage: String = _
-      override def taskSucceeded(index: Int, result: Any) {}
+      override def taskSucceeded(index: Int, result: Any): Unit = {}
       override def jobFailed(exception: Exception): Unit = { failureMessage = exception.getMessage }
     }
     val listener1 = new FailureRecordingJobListener()
@@ -1696,9 +1710,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(cancelledStages.toSet === Set(0, 2))
 
     // Make sure the listeners got told about both failed stages.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
-    assert(successfulStages.isEmpty)
-    assert(failedStages.toSet === Set(0, 2))
+    assert(sparkListener.successfulStages.isEmpty)
+    assert(sparkListener.failedStages.toSet === Set(0, 2))
 
     assert(listener1.failureMessage === s"Job aborted due to stage failure: $stageFailureMessage")
     assert(listener2.failureMessage === s"Job aborted due to stage failure: $stageFailureMessage")
@@ -1785,7 +1798,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // lets say there is a fetch failure in this task set, which makes us go back and
     // run stage 0, attempt 1
     complete(taskSets(1), Seq(
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleDep1.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"),
+        shuffleDep1.shuffleId, 0L, 0, 0, "ignored"), null)))
     scheduler.resubmitFailedStages()
 
     // stage 0, attempt 1 should have the properties of job2
@@ -1866,7 +1880,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       (Success, makeMapStatus("hostC", 1))))
     // fail the third stage because hostA went down
     complete(taskSets(2), Seq(
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"),
+        shuffleDepTwo.shuffleId, 0L, 0, 0, "ignored"), null)))
     // TODO assert this:
     // blockManagerMaster.removeExecutor("exec-hostA")
     // have DAGScheduler try again
@@ -1897,7 +1912,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       (Success, makeMapStatus("hostB", 1))))
     // pretend stage 2 failed because hostA went down
     complete(taskSets(2), Seq(
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleDepTwo.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"),
+        shuffleDepTwo.shuffleId, 0L, 0, 0, "ignored"), null)))
     // TODO assert this:
     // blockManagerMaster.removeExecutor("exec-hostA")
     // DAGScheduler should notice the cached copy of the second shuffle and try to get it rerun.
@@ -2258,7 +2274,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     submit(reduceRdd, Array(0, 1))
     complete(taskSets(1), Seq(
       (Success, 42),
-      (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"), null)))
     // Ask the scheduler to try it again; TaskSet 2 will rerun the map task that we couldn't fetch
     // from, then TaskSet 3 will run the reduce stage
     scheduler.resubmitFailedStages()
@@ -2317,7 +2333,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(taskSets(1).stageId === 1)
     complete(taskSets(1), Seq(
       (Success, makeMapStatus("hostA", rdd2.partitions.length)),
-      (FetchFailed(makeBlockManagerId("hostA"), dep1.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"), dep1.shuffleId, 0L, 0, 0, "ignored"), null)))
     scheduler.resubmitFailedStages()
     assert(listener2.results.size === 0)    // Second stage listener should not have a result yet
 
@@ -2343,7 +2359,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(taskSets(4).stageId === 2)
     complete(taskSets(4), Seq(
       (Success, 52),
-      (FetchFailed(makeBlockManagerId("hostD"), dep2.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostD"), dep2.shuffleId, 0L, 0, 0, "ignored"), null)))
     scheduler.resubmitFailedStages()
 
     // TaskSet 5 will rerun stage 1's lost task, then TaskSet 6 will rerun stage 2
@@ -2381,7 +2397,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(taskSets(1).stageId === 1)
     complete(taskSets(1), Seq(
       (Success, makeMapStatus("hostC", rdd2.partitions.length)),
-      (FetchFailed(makeBlockManagerId("hostA"), dep1.shuffleId, 0, 0, "ignored"), null)))
+      (FetchFailed(makeBlockManagerId("hostA"), dep1.shuffleId, 0L, 0, 0, "ignored"), null)))
     scheduler.resubmitFailedStages()
     // Stage1 listener should not have a result yet
     assert(listener2.results.size === 0)
@@ -2516,7 +2532,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       rdd1.map {
         case (x, _) if (x == 1) =>
           throw new FetchFailedException(
-            BlockManagerId("1", "1", 1), shuffleHandle.shuffleId, 0, 0, "test")
+            BlockManagerId("1", "1", 1), shuffleHandle.shuffleId, 0L, 0, 0, "test")
         case (x, _) => x
       }.count()
     }
@@ -2529,7 +2545,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       rdd1.map {
         case (x, _) if (x == 1) && FailThisAttempt._fail.getAndSet(false) =>
           throw new FetchFailedException(
-            BlockManagerId("1", "1", 1), shuffleHandle.shuffleId, 0, 0, "test")
+            BlockManagerId("1", "1", 1), shuffleHandle.shuffleId, 0L, 0, 0, "test")
       }
     }
 
@@ -2583,7 +2599,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(taskSets(1).stageId === 1 && taskSets(1).stageAttemptId === 0)
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleIdA, 0, 0,
+      FetchFailed(makeBlockManagerId("hostA"), shuffleIdA, 0L, 0, 0,
         "Fetch failure of task: stageId=1, stageAttempt=0, partitionId=0"),
       result = null))
 
@@ -2659,7 +2675,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       sc.parallelize(1 to tasks, tasks).foreach { _ =>
         accum.add(1L)
       }
-      sc.listenerBus.waitUntilEmpty(1000)
+      sc.listenerBus.waitUntilEmpty()
       assert(foundCount.get() === tasks)
     }
   }
@@ -2672,11 +2688,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     val mapStageId = 0
     def countSubmittedMapStageAttempts(): Int = {
-      submittedStageInfos.count(_.stageId == mapStageId)
+      sparkListener.submittedStageInfos.count(_.stageId == mapStageId)
     }
 
     // The map stage should have been submitted.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 1)
 
     // The first map task fails with TaskKilled.
@@ -2684,7 +2699,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(0),
       TaskKilled("test"),
       null))
-    assert(failedStages === Seq(0))
+    assert(sparkListener.failedStages === Seq(0))
 
     // The second map task fails with TaskKilled.
     runEvent(makeCompletionEvent(
@@ -2694,7 +2709,6 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     // Trigger resubmission of the failed map stage.
     runEvent(ResubmitFailedStages)
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
 
     // Another attempt for the map stage should have been submitted, resulting in 2 total attempts.
     assert(countSubmittedMapStageAttempts() === 2)
@@ -2708,11 +2722,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     val mapStageId = 0
     def countSubmittedMapStageAttempts(): Int = {
-      submittedStageInfos.count(_.stageId == mapStageId)
+      sparkListener.submittedStageInfos.count(_.stageId == mapStageId)
     }
 
     // The map stage should have been submitted.
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 1)
 
     // The first map task fails with TaskKilled.
@@ -2720,11 +2733,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
       taskSets(0).tasks(0),
       TaskKilled("test"),
       null))
-    assert(failedStages === Seq(0))
+    assert(sparkListener.failedStages === Seq(0))
 
     // Trigger resubmission of the failed map stage.
     runEvent(ResubmitFailedStages)
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
 
     // Another attempt for the map stage should have been submitted, resulting in 2 total attempts.
     assert(countSubmittedMapStageAttempts() === 2)
@@ -2737,11 +2749,10 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 
     // The second map task failure doesn't trigger stage retry.
     runEvent(ResubmitFailedStages)
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
     assert(countSubmittedMapStageAttempts() === 2)
   }
 
-  test("SPARK-23207: retry all the succeeding stages when the map stage is indeterminate") {
+  private def constructIndeterminateStageFetchFailed(): (Int, Int) = {
     val shuffleMapRdd1 = new MyRDD(sc, 2, Nil, indeterminate = true)
 
     val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, new HashPartitioner(2))
@@ -2769,14 +2780,152 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The first task of the final stage failed with fetch failure
     runEvent(makeCompletionEvent(
       taskSets(2).tasks(0),
-      FetchFailed(makeBlockManagerId("hostC"), shuffleId2, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostC"), shuffleId2, 0L, 0, 0, "ignored"),
+      null))
+    (shuffleId1, shuffleId2)
+  }
+
+  test("SPARK-25341: abort stage while using old fetch protocol") {
+    // reset the test context with using old fetch protocol
+    afterEach()
+    val conf = new SparkConf()
+    conf.set(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL.key, "true")
+    init(conf)
+    // Construct the scenario of indeterminate stage fetch failed.
+    constructIndeterminateStageFetchFailed()
+    // The job should fail because Spark can't rollback the shuffle map stage while
+    // using old protocol.
+    assert(failure != null && failure.getMessage.contains(
+      "Spark can only do this while using the new shuffle block fetching protocol"))
+  }
+
+  test("SPARK-25341: retry all the succeeding stages when the map stage is indeterminate") {
+    val (shuffleId1, shuffleId2) = constructIndeterminateStageFetchFailed()
+
+    // Check status for all failedStages
+    val failedStages = scheduler.failedStages.toSeq
+    assert(failedStages.map(_.id) == Seq(1, 2))
+    // Shuffle blocks of "hostC" is lost, so first task of the `shuffleMapRdd2` needs to retry.
+    assert(failedStages.collect {
+      case stage: ShuffleMapStage if stage.shuffleDep.shuffleId == shuffleId2 => stage
+    }.head.findMissingPartitions() == Seq(0))
+    // The result stage is still waiting for its 2 tasks to complete
+    assert(failedStages.collect {
+      case stage: ResultStage => stage
+    }.head.findMissingPartitions() == Seq(0, 1))
+
+    scheduler.resubmitFailedStages()
+
+    // The first task of the `shuffleMapRdd2` failed with fetch failure
+    runEvent(makeCompletionEvent(
+      taskSets(3).tasks(0),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId1, 0L, 0, 0, "ignored"),
+      null))
+
+    val newFailedStages = scheduler.failedStages.toSeq
+    assert(newFailedStages.map(_.id) == Seq(0, 1))
+
+    scheduler.resubmitFailedStages()
+
+    // First shuffle map stage resubmitted and reran all tasks.
+    assert(taskSets(4).stageId == 0)
+    assert(taskSets(4).stageAttemptId == 1)
+    assert(taskSets(4).tasks.length == 2)
+
+    // Finish all stage.
+    complete(taskSets(4), Seq(
+      (Success, makeMapStatus("hostA", 2)),
+      (Success, makeMapStatus("hostB", 2))))
+    assert(mapOutputTracker.findMissingPartitions(shuffleId1) === Some(Seq.empty))
+
+    complete(taskSets(5), Seq(
+      (Success, makeMapStatus("hostC", 2)),
+      (Success, makeMapStatus("hostD", 2))))
+    assert(mapOutputTracker.findMissingPartitions(shuffleId2) === Some(Seq.empty))
+
+    complete(taskSets(6), Seq((Success, 11), (Success, 12)))
+
+    // Job successful ended.
+    assert(results === Map(0 -> 11, 1 -> 12))
+    results.clear()
+    assertDataStructuresEmpty()
+  }
+
+  test("SPARK-25341: continuous indeterminate stage roll back") {
+    // shuffleMapRdd1/2/3 are all indeterminate.
+    val shuffleMapRdd1 = new MyRDD(sc, 2, Nil, indeterminate = true)
+    val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, new HashPartitioner(2))
+    val shuffleId1 = shuffleDep1.shuffleId
+
+    val shuffleMapRdd2 = new MyRDD(
+      sc, 2, List(shuffleDep1), tracker = mapOutputTracker, indeterminate = true)
+    val shuffleDep2 = new ShuffleDependency(shuffleMapRdd2, new HashPartitioner(2))
+    val shuffleId2 = shuffleDep2.shuffleId
+
+    val shuffleMapRdd3 = new MyRDD(
+      sc, 2, List(shuffleDep2), tracker = mapOutputTracker, indeterminate = true)
+    val shuffleDep3 = new ShuffleDependency(shuffleMapRdd3, new HashPartitioner(2))
+    val shuffleId3 = shuffleDep3.shuffleId
+    val finalRdd = new MyRDD(sc, 2, List(shuffleDep3), tracker = mapOutputTracker)
+
+    submit(finalRdd, Array(0, 1), properties = new Properties())
+
+    // Finish the first 2 shuffle map stages.
+    complete(taskSets(0), Seq(
+      (Success, makeMapStatus("hostA", 2)),
+      (Success, makeMapStatus("hostB", 2))))
+    assert(mapOutputTracker.findMissingPartitions(shuffleId1) === Some(Seq.empty))
+
+    complete(taskSets(1), Seq(
+      (Success, makeMapStatus("hostB", 2)),
+      (Success, makeMapStatus("hostD", 2))))
+    assert(mapOutputTracker.findMissingPartitions(shuffleId2) === Some(Seq.empty))
+
+    // Executor lost on hostB, both of stage 0 and 1 should be reran.
+    runEvent(makeCompletionEvent(
+      taskSets(2).tasks(0),
+      FetchFailed(makeBlockManagerId("hostB"), shuffleId2, 0L, 0, 0, "ignored"),
       null))
+    mapOutputTracker.removeOutputsOnHost("hostB")
 
-    // The second shuffle map stage need to rerun, the job will abort for the indeterminate
-    // stage rerun.
-    // TODO: After we support re-generate shuffle file(SPARK-25341), this test will be extended.
-    assert(failure != null && failure.getMessage
-      .contains("Spark cannot rollback the ShuffleMapStage 1"))
+    assert(scheduler.failedStages.toSeq.map(_.id) == Seq(1, 2))
+    scheduler.resubmitFailedStages()
+
+    def checkAndCompleteRetryStage(
+        taskSetIndex: Int,
+        stageId: Int,
+        shuffleId: Int): Unit = {
+      assert(taskSets(taskSetIndex).stageId == stageId)
+      assert(taskSets(taskSetIndex).stageAttemptId == 1)
+      assert(taskSets(taskSetIndex).tasks.length == 2)
+      complete(taskSets(taskSetIndex), Seq(
+        (Success, makeMapStatus("hostA", 2)),
+        (Success, makeMapStatus("hostB", 2))))
+      assert(mapOutputTracker.findMissingPartitions(shuffleId) === Some(Seq.empty))
+    }
+
+    // Check all indeterminate stage roll back.
+    checkAndCompleteRetryStage(3, 0, shuffleId1)
+    checkAndCompleteRetryStage(4, 1, shuffleId2)
+    checkAndCompleteRetryStage(5, 2, shuffleId3)
+
+    // Result stage success, all job ended.
+    complete(taskSets(6), Seq((Success, 11), (Success, 12)))
+    assert(results === Map(0 -> 11, 1 -> 12))
+    results.clear()
+    assertDataStructuresEmpty()
+  }
+
+  test("SPARK-29042: Sampled RDD with unordered input should be indeterminate") {
+    val shuffleMapRdd1 = new MyRDD(sc, 2, Nil, indeterminate = false)
+
+    val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, new HashPartitioner(2))
+    val shuffleMapRdd2 = new MyRDD(sc, 2, List(shuffleDep1), tracker = mapOutputTracker)
+
+    assert(shuffleMapRdd2.outputDeterministicLevel == DeterministicLevel.UNORDERED)
+
+    val sampledRdd = shuffleMapRdd2.sample(true, 0.3, 1000L)
+    assert(sampledRdd.outputDeterministicLevel == DeterministicLevel.INDETERMINATE)
   }
 
   private def assertResultStageFailToRollback(mapRdd: MyRDD): Unit = {
@@ -2797,7 +2946,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // Fail the second task with FetchFailed.
     runEvent(makeCompletionEvent(
       taskSets.last.tasks(1),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
 
     // The job should fail because Spark can't rollback the result stage.
@@ -2840,7 +2989,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // Fail the second task with FetchFailed.
     runEvent(makeCompletionEvent(
       taskSets.last.tasks(1),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored"),
       null))
 
     assert(failure == null, "job should not fail")
@@ -2887,33 +3036,6 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     assert(latch.await(10, TimeUnit.SECONDS))
   }
 
-  test("SPARK-28699: abort stage if parent stage is indeterminate stage") {
-    val shuffleMapRdd = new MyRDD(sc, 2, Nil, indeterminate = true)
-
-    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2))
-    val shuffleId = shuffleDep.shuffleId
-    val finalRdd = new MyRDD(sc, 2, List(shuffleDep), tracker = mapOutputTracker)
-
-    submit(finalRdd, Array(0, 1))
-
-    // Finish the first shuffle map stage.
-    complete(taskSets(0), Seq(
-      (Success, makeMapStatus("hostA", 2)),
-      (Success, makeMapStatus("hostB", 2))))
-    assert(mapOutputTracker.findMissingPartitions(shuffleId) === Some(Seq.empty))
-
-    runEvent(makeCompletionEvent(
-      taskSets(1).tasks(0),
-      FetchFailed(makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored"),
-      null))
-
-    // Shuffle blocks of "hostA" is lost, so first task of the `shuffleMapRdd` needs to retry.
-    // The result stage is still waiting for its 2 tasks to complete.
-    // Because of shuffleMapRdd is indeterminate, this job will be abort.
-    assert(failure != null && failure.getMessage
-      .contains("Spark cannot rollback the ShuffleMapStage 0"))
-  }
-
   test("Completions in zombie tasksets update status of non-zombie taskset") {
     val parts = 4
     val shuffleMapRdd = new MyRDD(sc, parts, Nil)
@@ -2930,7 +3052,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
     // The second task of the shuffle map stage failed with FetchFailed.
     runEvent(makeCompletionEvent(
       taskSets(0).tasks(1),
-      FetchFailed(makeBlockManagerId("hostB"), shuffleDep.shuffleId, 0, 0, "ignored"),
+      FetchFailed(makeBlockManagerId("hostB"), shuffleDep.shuffleId, 0L, 0, 0, "ignored"),
       null))
 
     scheduler.resubmitFailedStages()
@@ -2969,7 +3091,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
    * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations.
    * Note that this checks only the host and not the executor ID.
    */
-  private def assertLocations(taskSet: TaskSet, hosts: Seq[Seq[String]]) {
+  private def assertLocations(taskSet: TaskSet, hosts: Seq[Seq[String]]): Unit = {
     assert(hosts.size === taskSet.tasks.size)
     for ((taskLocs, expectedLocs) <- taskSet.tasks.map(_.preferredLocations).zip(hosts)) {
       assert(taskLocs.map(_.host).toSet === expectedLocs.toSet)
@@ -3020,8 +3142,8 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with TimeLi
 }
 
 object DAGSchedulerSuite {
-  def makeMapStatus(host: String, reduces: Int, sizes: Byte = 2): MapStatus =
-    MapStatus(makeBlockManagerId(host), Array.fill[Long](reduces)(sizes))
+  def makeMapStatus(host: String, reduces: Int, sizes: Byte = 2, mapTaskId: Long = -1): MapStatus =
+    MapStatus(makeBlockManagerId(host), Array.fill[Long](reduces)(sizes), mapTaskId)
 
   def makeBlockManagerId(host: String): BlockManagerId =
     BlockManagerId("exec-" + host, host, 12345)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index a83ca594ee908..ae55d1915fa4a 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -178,7 +178,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
    */
   private def testEventLogging(
       compressionCodec: Option[String] = None,
-      extraConf: Map[String, String] = Map()) {
+      extraConf: Map[String, String] = Map()): Unit = {
     val conf = getLoggingConf(testDirPath, compressionCodec)
     extraConf.foreach { case (k, v) => conf.set(k, v) }
     val logName = compressionCodec.map("test-" + _).getOrElse("test")
@@ -218,7 +218,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
    * Test end-to-end event logging functionality in an application.
    * This runs a simple Spark job and asserts that the expected events are logged when expected.
    */
-  private def testApplicationEventLogging(compressionCodec: Option[String] = None) {
+  private def testApplicationEventLogging(compressionCodec: Option[String] = None): Unit = {
     // Set defaultFS to something that would cause an exception, to make sure we don't run
     // into SPARK-6688.
     val conf = getLoggingConf(testDirPath, compressionCodec)
@@ -284,7 +284,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
    * from SparkListenerTaskEnd events for tasks belonging to the stage are
    * logged in a StageExecutorMetrics event for each executor at stage completion.
    */
-  private def testStageExecutorMetricsEventLogging() {
+  private def testStageExecutorMetricsEventLogging(): Unit = {
     val conf = getLoggingConf(testDirPath, None)
     val logName = "stageExecutorMetrics-test"
     val eventLogger = new EventLoggingListener(logName, None, testDirPath.toUri(), conf)
@@ -621,19 +621,19 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     var jobEnded = false
     var appEnded = false
 
-    override def onJobStart(jobStart: SparkListenerJobStart) {
+    override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
       jobStarted = true
     }
 
-    override def onJobEnd(jobEnd: SparkListenerJobEnd) {
+    override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
       jobEnded = true
     }
 
-    override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd) {
+    override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
       appEnded = true
     }
 
-    def assertAllCallbacksInvoked() {
+    def assertAllCallbacksInvoked(): Unit = {
       assert(jobStarted, "JobStart callback not invoked!")
       assert(jobEnded, "JobEnd callback not invoked!")
       assert(appEnded, "ApplicationEnd callback not invoked!")
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
index 73e88c4a0fda6..4e71ec1ea7b37 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
@@ -67,9 +67,9 @@ private class DummyExternalClusterManager extends ExternalClusterManager {
 
 private class DummySchedulerBackend extends SchedulerBackend {
   var initialized = false
-  def start() {}
-  def stop() {}
-  def reviveOffers() {}
+  def start(): Unit = {}
+  def stop(): Unit = {}
+  def reviveOffers(): Unit = {}
   def defaultParallelism(): Int = 1
   def maxNumConcurrentTasks(): Int = 0
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala b/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
index b29d32f7b35c5..8cb6268f85d36 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
@@ -42,22 +42,30 @@ object FakeTask {
    * locations for each task (given as varargs) if this sequence is not empty.
    */
   def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
-    createTaskSet(numTasks, stageAttemptId = 0, prefLocs: _*)
+    createTaskSet(numTasks, stageId = 0, stageAttemptId = 0, priority = 0, prefLocs: _*)
   }
 
-  def createTaskSet(numTasks: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
-    createTaskSet(numTasks, stageId = 0, stageAttemptId, prefLocs: _*)
+  def createTaskSet(
+      numTasks: Int,
+      stageId: Int,
+      stageAttemptId: Int,
+      prefLocs: Seq[TaskLocation]*): TaskSet = {
+    createTaskSet(numTasks, stageId, stageAttemptId, priority = 0, prefLocs: _*)
   }
 
-  def createTaskSet(numTasks: Int, stageId: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*):
-  TaskSet = {
+  def createTaskSet(
+      numTasks: Int,
+      stageId: Int,
+      stageAttemptId: Int,
+      priority: Int,
+      prefLocs: Seq[TaskLocation]*): TaskSet = {
     if (prefLocs.size != 0 && prefLocs.size != numTasks) {
       throw new IllegalArgumentException("Wrong number of task locations")
     }
     val tasks = Array.tabulate[Task[_]](numTasks) { i =>
       new FakeTask(stageId, i, if (prefLocs.size != 0) prefLocs(i) else Nil)
     }
-    new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null)
+    new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null)
   }
 
   def createShuffleMapTaskSet(
@@ -65,6 +73,15 @@ object FakeTask {
       stageId: Int,
       stageAttemptId: Int,
       prefLocs: Seq[TaskLocation]*): TaskSet = {
+    createShuffleMapTaskSet(numTasks, stageId, stageAttemptId, priority = 0, prefLocs: _*)
+  }
+
+  def createShuffleMapTaskSet(
+      numTasks: Int,
+      stageId: Int,
+      stageAttemptId: Int,
+      priority: Int,
+      prefLocs: Seq[TaskLocation]*): TaskSet = {
     if (prefLocs.size != 0 && prefLocs.size != numTasks) {
       throw new IllegalArgumentException("Wrong number of task locations")
     }
@@ -74,17 +91,18 @@ object FakeTask {
       }, prefLocs(i), new Properties,
         SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array())
     }
-    new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null)
+    new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null)
   }
 
   def createBarrierTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
-    createBarrierTaskSet(numTasks, stageId = 0, stageAttempId = 0, prefLocs: _*)
+    createBarrierTaskSet(numTasks, stageId = 0, stageAttemptId = 0, priority = 0, prefLocs: _*)
   }
 
   def createBarrierTaskSet(
       numTasks: Int,
       stageId: Int,
-      stageAttempId: Int,
+      stageAttemptId: Int,
+      priority: Int,
       prefLocs: Seq[TaskLocation]*): TaskSet = {
     if (prefLocs.size != 0 && prefLocs.size != numTasks) {
       throw new IllegalArgumentException("Wrong number of task locations")
@@ -92,6 +110,6 @@ object FakeTask {
     val tasks = Array.tabulate[Task[_]](numTasks) { i =>
       new FakeTask(stageId, i, if (prefLocs.size != 0) prefLocs(i) else Nil, isBarrier = true)
     }
-    new TaskSet(tasks, stageId, stageAttempId, priority = 0, null)
+    new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index c1e7fb9a1db16..700d9ebd76c0c 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -61,7 +61,7 @@ class MapStatusSuite extends SparkFunSuite {
       stddev <- Seq(0.0, 0.01, 0.5, 1.0)
     ) {
       val sizes = Array.fill[Long](numSizes)(abs(round(Random.nextGaussian() * stddev)) + mean)
-      val status = MapStatus(BlockManagerId("a", "b", 10), sizes)
+      val status = MapStatus(BlockManagerId("a", "b", 10), sizes, -1)
       val status1 = compressAndDecompressMapStatus(status)
       for (i <- 0 until numSizes) {
         if (sizes(i) != 0) {
@@ -75,7 +75,7 @@ class MapStatusSuite extends SparkFunSuite {
 
   test("large tasks should use " + classOf[HighlyCompressedMapStatus].getName) {
     val sizes = Array.fill[Long](2001)(150L)
-    val status = MapStatus(null, sizes)
+    val status = MapStatus(null, sizes, -1)
     assert(status.isInstanceOf[HighlyCompressedMapStatus])
     assert(status.getSizeForBlock(10) === 150L)
     assert(status.getSizeForBlock(50) === 150L)
@@ -87,10 +87,12 @@ class MapStatusSuite extends SparkFunSuite {
     val sizes = Array.tabulate[Long](3000) { i => i.toLong }
     val avg = sizes.sum / sizes.count(_ != 0)
     val loc = BlockManagerId("a", "b", 10)
-    val status = MapStatus(loc, sizes)
+    val mapTaskAttemptId = 5
+    val status = MapStatus(loc, sizes, mapTaskAttemptId)
     val status1 = compressAndDecompressMapStatus(status)
     assert(status1.isInstanceOf[HighlyCompressedMapStatus])
     assert(status1.location == loc)
+    assert(status1.mapTaskId == mapTaskAttemptId)
     for (i <- 0 until 3000) {
       val estimate = status1.getSizeForBlock(i)
       if (sizes(i) > 0) {
@@ -109,7 +111,7 @@ class MapStatusSuite extends SparkFunSuite {
     val smallBlockSizes = sizes.filter(n => n > 0 && n < threshold)
     val avg = smallBlockSizes.sum / smallBlockSizes.length
     val loc = BlockManagerId("a", "b", 10)
-    val status = MapStatus(loc, sizes)
+    val status = MapStatus(loc, sizes, 5)
     val status1 = compressAndDecompressMapStatus(status)
     assert(status1.isInstanceOf[HighlyCompressedMapStatus])
     assert(status1.location == loc)
@@ -165,7 +167,7 @@ class MapStatusSuite extends SparkFunSuite {
     SparkEnv.set(env)
     // Value of element in sizes is equal to the corresponding index.
     val sizes = (0L to 2000L).toArray
-    val status1 = MapStatus(BlockManagerId("exec-0", "host-0", 100), sizes)
+    val status1 = MapStatus(BlockManagerId("exec-0", "host-0", 100), sizes, 5)
     val arrayStream = new ByteArrayOutputStream(102400)
     val objectOutputStream = new ObjectOutputStream(arrayStream)
     assert(status1.isInstanceOf[HighlyCompressedMapStatus])
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
index 848f702935536..7d063c3b3ac53 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
@@ -22,7 +22,6 @@ import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.time.{Seconds, Span}
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TaskContext}
-import org.apache.spark.util.Utils
 
 /**
  * Integration tests for the OutputCommitCoordinator.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index d6964063c118e..05d9ec4861de9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -254,7 +254,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
       .reduceByKey { case (_, _) =>
         val ctx = TaskContext.get()
         if (ctx.stageAttemptNumber() == 0) {
-          throw new FetchFailedException(SparkEnv.get.blockManager.blockManagerId, 1, 1, 1,
+          throw new FetchFailedException(SparkEnv.get.blockManager.blockManagerId, 1, 1L, 1, 1,
             new Exception("Failure for test."))
         } else {
           ctx.stageId()
@@ -288,7 +288,7 @@ private case class OutputCommitFunctions(tempDirPath: String) {
 
   // Mock output committer that simulates a failed commit (after commit is authorized)
   private def failingOutputCommitter = new FakeOutputCommitter {
-    override def commitTask(taskAttemptContext: TaskAttemptContext) {
+    override def commitTask(taskAttemptContext: TaskAttemptContext): Unit = {
       throw new RuntimeException
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
index d65b5cbfc094e..55e7f5333c676 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.scheduler
 
 import java.io._
 import java.net.URI
+import java.nio.charset.StandardCharsets
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.ArrayBuffer
@@ -52,10 +53,11 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
   test("Simple replay") {
     val logFilePath = getFilePath(testDir, "events.txt")
     val fstream = fileSystem.create(logFilePath)
+    val fwriter = new OutputStreamWriter(fstream, StandardCharsets.UTF_8)
     val applicationStart = SparkListenerApplicationStart("Greatest App (N)ever", None,
       125L, "Mickey", None)
     val applicationEnd = SparkListenerApplicationEnd(1000L)
-    Utils.tryWithResource(new PrintWriter(fstream)) { writer =>
+    Utils.tryWithResource(new PrintWriter(fwriter)) { writer =>
       // scalastyle:off println
       writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
       writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationEnd))))
@@ -87,8 +89,9 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
   test("Replay compressed inprogress log file succeeding on partial read") {
     val buffered = new ByteArrayOutputStream
     val codec = new LZ4CompressionCodec(new SparkConf())
-    val compstream = codec.compressedOutputStream(buffered)
-    Utils.tryWithResource(new PrintWriter(compstream)) { writer =>
+    val compstream = codec.compressedContinuousOutputStream(buffered)
+    val cwriter = new OutputStreamWriter(compstream, StandardCharsets.UTF_8)
+    Utils.tryWithResource(new PrintWriter(cwriter)) { writer =>
 
       val applicationStart = SparkListenerApplicationStart("AppStarts", None,
         125L, "Mickey", None)
@@ -134,10 +137,11 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
   test("Replay incompatible event log") {
     val logFilePath = getFilePath(testDir, "incompatible.txt")
     val fstream = fileSystem.create(logFilePath)
+    val fwriter = new OutputStreamWriter(fstream, StandardCharsets.UTF_8)
     val applicationStart = SparkListenerApplicationStart("Incompatible App", None,
       125L, "UserUsingIncompatibleVersion", None)
     val applicationEnd = SparkListenerApplicationEnd(1000L)
-    Utils.tryWithResource(new PrintWriter(fstream)) { writer =>
+    Utils.tryWithResource(new PrintWriter(fwriter)) { writer =>
       // scalastyle:off println
       writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
       writer.println("""{"Event":"UnrecognizedEventOnlyForTest","Timestamp":1477593059313}""")
@@ -184,7 +188,7 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
    * event to the corresponding event replayed from the event logs. This test makes the
    * assumption that the event logging behavior is correct (tested in a separate suite).
    */
-  private def testApplicationReplay(codecName: Option[String] = None) {
+  private def testApplicationReplay(codecName: Option[String] = None): Unit = {
     val logDir = new File(testDir.getAbsolutePath, "test-replay")
     // Here, it creates `Path` from the URI instead of the absolute path for the explicit file
     // scheme so that the string representation of this `Path` has leading file scheme correctly.
@@ -242,7 +246,7 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
 
     private[scheduler] val loggedEvents = new ArrayBuffer[JValue]
 
-    override def onEvent(event: SparkListenerEvent) {
+    override def onEvent(event: SparkListenerEvent): Unit = {
       val eventJson = JsonProtocol.sparkEventToJson(event)
       loggedEvents += eventJson
     }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
index 96706536fe53c..4f737c9499ad6 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -621,7 +621,7 @@ class BasicSchedulerIntegrationSuite extends SchedulerIntegrationSuite[SingleCor
           backend.taskSuccess(taskDescription, DAGSchedulerSuite.makeMapStatus("hostA", 10))
         case (1, 0, 0) =>
           val fetchFailed = FetchFailed(
-            DAGSchedulerSuite.makeBlockManagerId("hostA"), shuffleId, 0, 0, "ignored")
+            DAGSchedulerSuite.makeBlockManagerId("hostA"), shuffleId, 0L, 0, 0, "ignored")
           backend.taskFailed(taskDescription, fetchFailed)
         case (1, _, partition) =>
           backend.taskSuccess(taskDescription, 42 + partition)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index 8903e1054f53d..f73ebd6a5b42d 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -38,9 +38,6 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
 
   import LiveListenerBus._
 
-  /** Length of time to wait while draining listener events. */
-  val WAIT_TIMEOUT_MILLIS = 10000
-
   val jobCompletionTime = 1421191296660L
 
   private val mockSparkContext: SparkContext = Mockito.mock(classOf[SparkContext])
@@ -65,7 +62,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
 
     sc.listenerBus.addToSharedQueue(listener)
     sc.listenerBus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded))
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     sc.stop()
 
     assert(listener.sparkExSeen)
@@ -97,7 +94,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     // Starting listener bus should flush all buffered events
     bus.start(mockSparkContext, mockMetricsSystem)
     Mockito.verify(mockMetricsSystem).registerSource(bus.metrics)
-    bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    bus.waitUntilEmpty()
     assert(counter.count === 5)
     assert(sharedQueueSize(bus) === 0)
     assert(eventProcessingTimeCount(bus) === 5)
@@ -159,7 +156,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     assert(!drained)
 
     new Thread("ListenerBusStopper") {
-      override def run() {
+      override def run(): Unit = {
         stopperStarted.release()
         // stop() will block until notify() is called below
         bus.stop()
@@ -223,7 +220,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     rdd2.setName("Target RDD")
     rdd2.count()
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
 
     listener.stageInfos.size should be {1}
     val (stageInfo, taskInfoMetrics) = listener.stageInfos.head
@@ -248,7 +245,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     rdd3.setName("Trois")
 
     rdd1.count()
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.stageInfos.size should be {1}
     val stageInfo1 = listener.stageInfos.keys.find(_.stageId == 0).get
     stageInfo1.rddInfos.size should be {1} // ParallelCollectionRDD
@@ -257,7 +254,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     listener.stageInfos.clear()
 
     rdd2.count()
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.stageInfos.size should be {1}
     val stageInfo2 = listener.stageInfos.keys.find(_.stageId == 1).get
     stageInfo2.rddInfos.size should be {3}
@@ -266,7 +263,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     listener.stageInfos.clear()
 
     rdd3.count()
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.stageInfos.size should be {2} // Shuffle map stage + result stage
     val stageInfo3 = listener.stageInfos.keys.find(_.stageId == 3).get
     stageInfo3.rddInfos.size should be {1} // ShuffledRDD
@@ -282,7 +279,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     val rdd2 = rdd1.map(_.toString)
     sc.runJob(rdd2, (items: Iterator[String]) => items.size, Seq(0, 1))
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
 
     listener.stageInfos.size should be {1}
     val (stageInfo, _) = listener.stageInfos.head
@@ -310,7 +307,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     val numSlices = 16
     val d = sc.parallelize(0 to 10000, numSlices).map(w)
     d.count()
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.stageInfos.size should be (1)
 
     val d2 = d.map { i => w(i) -> i * 2 }.setName("shuffle input 1")
@@ -321,7 +318,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     d4.setName("A Cogroup")
     d4.collectAsMap()
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     listener.stageInfos.size should be (4)
     listener.stageInfos.foreach { case (stageInfo, taskInfoMetrics) =>
       /**
@@ -372,7 +369,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
       .reduce { case (x, y) => x }
     assert(result === 1.to(maxRpcMessageSize).toArray)
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     val TASK_INDEX = 0
     assert(listener.startedTasks.contains(TASK_INDEX))
     assert(listener.startedGettingResultTasks.contains(TASK_INDEX))
@@ -388,7 +385,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     val result = sc.parallelize(Seq(1), 1).map(2 * _).reduce { case (x, y) => x }
     assert(result === 2)
 
-    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    sc.listenerBus.waitUntilEmpty()
     val TASK_INDEX = 0
     assert(listener.startedTasks.contains(TASK_INDEX))
     assert(listener.startedGettingResultTasks.isEmpty)
@@ -443,7 +440,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
 
     // Post events to all listeners, and wait until the queue is drained
     (1 to 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) }
-    bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+    bus.waitUntilEmpty()
 
     // The exception should be caught, and the event should be propagated to other listeners
     assert(jobCounter1.count === 5)
@@ -513,7 +510,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
       // after we post one event, both interrupting listeners should get removed, and the
       // event log queue should be removed
       bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded))
-      bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+      bus.waitUntilEmpty()
       assert(bus.activeQueues() === Set(SHARED_QUEUE, APP_STATUS_QUEUE))
       assert(bus.findListenersByClass[BasicJobCounter]().size === 2)
       assert(bus.findListenersByClass[InterruptingListener]().size === 0)
@@ -522,7 +519,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
 
       // posting more events should be fine, they'll just get processed from the OK queue.
       (0 until 5).foreach { _ => bus.post(SparkListenerJobEnd(0, jobCompletionTime, JobSucceeded)) }
-      bus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
+      bus.waitUntilEmpty()
       assert(counter1.count === 6)
       assert(counter2.count === 6)
 
@@ -563,7 +560,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
   /**
    * Assert that the given list of numbers has an average that is greater than zero.
    */
-  private def checkNonZeroAvg(m: Iterable[Long], msg: String) {
+  private def checkNonZeroAvg(m: Iterable[Long], msg: String): Unit = {
     assert(m.sum / m.size.toDouble > 0.0, msg)
   }
 
@@ -574,7 +571,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     val stageInfos = mutable.Map[StageInfo, Seq[(TaskInfo, TaskMetrics)]]()
     var taskInfoMetrics = mutable.Buffer[(TaskInfo, TaskMetrics)]()
 
-    override def onTaskEnd(task: SparkListenerTaskEnd) {
+    override def onTaskEnd(task: SparkListenerTaskEnd): Unit = {
       val info = task.taskInfo
       val metrics = task.taskMetrics
       if (info != null && metrics != null) {
@@ -582,7 +579,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
       }
     }
 
-    override def onStageCompleted(stage: SparkListenerStageCompleted) {
+    override def onStageCompleted(stage: SparkListenerStageCompleted): Unit = {
       stageInfos(stage.stageInfo) = taskInfoMetrics
       taskInfoMetrics = mutable.Buffer.empty
     }
@@ -606,7 +603,7 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
       notify()
     }
 
-    override def onTaskGettingResult(taskGettingResult: SparkListenerTaskGettingResult) {
+    override def onTaskGettingResult(taskGettingResult: SparkListenerTaskGettingResult): Unit = {
       startedGettingResultTasks += taskGettingResult.taskInfo.index
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
index a6576e0d1c520..c84735c9665a7 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
@@ -57,7 +57,7 @@ class SparkListenerWithClusterSuite extends SparkFunSuite with LocalSparkContext
   private class SaveExecutorInfo extends SparkListener {
     val addedExecutorInfo = mutable.Map[String, ExecutorInfo]()
 
-    override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
+    override def onExecutorAdded(executor: SparkListenerExecutorAdded): Unit = {
       addedExecutorInfo(executor.executorId) = executor.executorInfo
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index c16b552d20891..394a2a9fbf7cb 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -176,7 +176,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
       if (stageAttemptNumber < 2) {
         // Throw FetchFailedException to explicitly trigger stage resubmission. A normal exception
         // will only trigger task resubmission in the same stage.
-        throw new FetchFailedException(null, 0, 0, 0, "Fake")
+        throw new FetchFailedException(null, 0, 0L, 0, 0, "Fake")
       }
       Seq(stageAttemptNumber).iterator
     }.collect()
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
index ae464352da440..8439be955c738 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
@@ -33,6 +33,7 @@ import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark._
+import org.apache.spark.TaskState.TaskState
 import org.apache.spark.TestUtils.JavaSourceFromString
 import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 import org.apache.spark.storage.TaskResultBlockId
@@ -52,7 +53,7 @@ private class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: Task
   @volatile var removeBlockSuccessfully = false
 
   override def enqueueSuccessfulTask(
-    taskSetManager: TaskSetManager, tid: Long, serializedData: ByteBuffer) {
+    taskSetManager: TaskSetManager, tid: Long, serializedData: ByteBuffer): Unit = {
     if (!removedResult) {
       // Only remove the result once, since we'd like to test the case where the task eventually
       // succeeds.
@@ -78,6 +79,16 @@ private class ResultDeletingTaskResultGetter(sparkEnv: SparkEnv, scheduler: Task
   }
 }
 
+private class DummyTaskSchedulerImpl(sc: SparkContext)
+  extends TaskSchedulerImpl(sc, 1, true) {
+  override def handleFailedTask(
+      taskSetManager: TaskSetManager,
+      tid: Long,
+      taskState: TaskState,
+      reason: TaskFailedReason): Unit = {
+    // do nothing
+  }
+}
 
 /**
  * A [[TaskResultGetter]] that stores the [[DirectTaskResult]]s it receives from executors
@@ -130,6 +141,31 @@ class TaskResultGetterSuite extends SparkFunSuite with BeforeAndAfter with Local
       "Expect result to be removed from the block manager.")
   }
 
+  test("handling total size of results larger than maxResultSize") {
+    sc = new SparkContext("local", "test", conf)
+    val scheduler = new DummyTaskSchedulerImpl(sc)
+    val spyScheduler = spy(scheduler)
+    val resultGetter = new TaskResultGetter(sc.env, spyScheduler)
+    scheduler.taskResultGetter = resultGetter
+    val myTsm = new TaskSetManager(spyScheduler, FakeTask.createTaskSet(2), 1) {
+      // always returns false
+      override def canFetchMoreResults(size: Long): Boolean = false
+    }
+    val indirectTaskResult = IndirectTaskResult(TaskResultBlockId(0), 0)
+    val directTaskResult = new DirectTaskResult(ByteBuffer.allocate(0), Nil, Array())
+    val ser = sc.env.closureSerializer.newInstance()
+    val serializedIndirect = ser.serialize(indirectTaskResult)
+    val serializedDirect = ser.serialize(directTaskResult)
+    resultGetter.enqueueSuccessfulTask(myTsm, 0, serializedDirect)
+    resultGetter.enqueueSuccessfulTask(myTsm, 1, serializedIndirect)
+    eventually(timeout(1.second)) {
+      verify(spyScheduler, times(1)).handleFailedTask(
+        myTsm, 0, TaskState.KILLED, TaskKilled("Tasks result size has exceeded maxResultSize"))
+      verify(spyScheduler, times(1)).handleFailedTask(
+        myTsm, 1, TaskState.KILLED, TaskKilled("Tasks result size has exceeded maxResultSize"))
+    }
+  }
+
   test("task retried if result missing from block manager") {
     // Set the maximum number of task failures to > 0, so that the task set isn't aborted
     // after the result is missing.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index cac6285e58417..e7ecf847ff4f4 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -26,7 +26,7 @@ import org.mockito.ArgumentMatchers.{any, anyInt, anyString, eq => meq}
 import org.mockito.Mockito.{atLeast, atMost, never, spy, times, verify, when}
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
@@ -36,9 +36,9 @@ import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.util.ManualClock
 
 class FakeSchedulerBackend extends SchedulerBackend {
-  def start() {}
-  def stop() {}
-  def reviveOffers() {}
+  def start(): Unit = {}
+  def stop(): Unit = {}
+  def reviveOffers(): Unit = {}
   def defaultParallelism(): Int = 1
   def maxNumConcurrentTasks(): Int = 0
 }
@@ -228,19 +228,19 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       taskScheduler.taskSetManagerForAttempt(taskset.stageId, taskset.stageAttemptId).get.isZombie
     }
 
-    val attempt1 = FakeTask.createTaskSet(1, 0)
+    val attempt1 = FakeTask.createTaskSet(1, stageId = 0, stageAttemptId = 0)
     taskScheduler.submitTasks(attempt1)
     // The first submitted taskset is active
     assert(!isTasksetZombie(attempt1))
 
-    val attempt2 = FakeTask.createTaskSet(1, 1)
+    val attempt2 = FakeTask.createTaskSet(1, stageId = 0, stageAttemptId = 1)
     taskScheduler.submitTasks(attempt2)
     // The first submitted taskset is zombie now
     assert(isTasksetZombie(attempt1))
     // The newly submitted taskset is active
     assert(!isTasksetZombie(attempt2))
 
-    val attempt3 = FakeTask.createTaskSet(1, 2)
+    val attempt3 = FakeTask.createTaskSet(1, stageId = 0, stageAttemptId = 2)
     taskScheduler.submitTasks(attempt3)
     // The first submitted taskset remains zombie
     assert(isTasksetZombie(attempt1))
@@ -255,7 +255,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
     val numFreeCores = 1
     val workerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", numFreeCores))
-    val attempt1 = FakeTask.createTaskSet(10)
+    val attempt1 = FakeTask.createTaskSet(10, stageId = 0, stageAttemptId = 0)
 
     // submit attempt 1, offer some resources, some tasks get scheduled
     taskScheduler.submitTasks(attempt1)
@@ -271,7 +271,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(0 === taskDescriptions2.length)
 
     // if we schedule another attempt for the same stage, it should get scheduled
-    val attempt2 = FakeTask.createTaskSet(10, 1)
+    val attempt2 = FakeTask.createTaskSet(10, stageId = 0, stageAttemptId = 1)
 
     // submit attempt 2, offer some resources, some tasks get scheduled
     taskScheduler.submitTasks(attempt2)
@@ -287,7 +287,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
     val numFreeCores = 10
     val workerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", numFreeCores))
-    val attempt1 = FakeTask.createTaskSet(10)
+    val attempt1 = FakeTask.createTaskSet(10, stageId = 0, stageAttemptId = 0)
 
     // submit attempt 1, offer some resources, some tasks get scheduled
     taskScheduler.submitTasks(attempt1)
@@ -303,7 +303,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(0 === taskDescriptions2.length)
 
     // submit attempt 2
-    val attempt2 = FakeTask.createTaskSet(10, 1)
+    val attempt2 = FakeTask.createTaskSet(10, stageId = 0, stageAttemptId = 1)
     taskScheduler.submitTasks(attempt2)
 
     // attempt 1 finished (this can happen even if it was marked zombie earlier -- all tasks were
@@ -497,7 +497,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
 
   test("abort stage when all executors are blacklisted and we cannot acquire new executor") {
     taskScheduler = setupSchedulerWithMockTaskSetBlacklist()
-    val taskSet = FakeTask.createTaskSet(numTasks = 10, stageAttemptId = 0)
+    val taskSet = FakeTask.createTaskSet(numTasks = 10)
     taskScheduler.submitTasks(taskSet)
     val tsm = stageToMockTaskSetManager(0)
 
@@ -539,7 +539,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "0")
 
     // We have only 1 task remaining with 1 executor
-    val taskSet = FakeTask.createTaskSet(numTasks = 1, stageAttemptId = 0)
+    val taskSet = FakeTask.createTaskSet(numTasks = 1)
     taskScheduler.submitTasks(taskSet)
     val tsm = stageToMockTaskSetManager(0)
 
@@ -571,7 +571,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       config.UNSCHEDULABLE_TASKSET_TIMEOUT.key -> "10")
 
     // We have only 1 task remaining with 1 executor
-    val taskSet = FakeTask.createTaskSet(numTasks = 1, stageAttemptId = 0)
+    val taskSet = FakeTask.createTaskSet(numTasks = 1)
     taskScheduler.submitTasks(taskSet)
     val tsm = stageToMockTaskSetManager(0)
 
@@ -910,7 +910,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
   test("SPARK-16106 locality levels updated if executor added to existing host") {
     val taskScheduler = setupScheduler()
 
-    taskScheduler.submitTasks(FakeTask.createTaskSet(2, 0,
+    taskScheduler.submitTasks(FakeTask.createTaskSet(2, stageId = 0, stageAttemptId = 0,
       (0 until 2).map { _ => Seq(TaskLocation("host0", "executor2")) }: _*
     ))
 
@@ -948,7 +948,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
   test("scheduler checks for executors that can be expired from blacklist") {
     taskScheduler = setupScheduler()
 
-    taskScheduler.submitTasks(FakeTask.createTaskSet(1, 0))
+    taskScheduler.submitTasks(FakeTask.createTaskSet(1, stageId = 0, stageAttemptId = 0))
     taskScheduler.resourceOffers(IndexedSeq(
       new WorkerOffer("executor0", "host0", 1)
     )).flatten
@@ -962,8 +962,8 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     taskScheduler.initialize(new FakeSchedulerBackend)
     // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
     new DAGScheduler(sc, taskScheduler) {
-      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
-      override def executorAdded(execId: String, host: String) {}
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {}
+      override def executorAdded(execId: String, host: String): Unit = {}
     }
 
     val e0Offers = IndexedSeq(WorkerOffer("executor0", "host0", 1))
@@ -993,8 +993,8 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     taskScheduler.initialize(new FakeSchedulerBackend)
     // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
     new DAGScheduler(sc, taskScheduler) {
-      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
-      override def executorAdded(execId: String, host: String) {}
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {}
+      override def executorAdded(execId: String, host: String): Unit = {}
     }
 
     val e0Offers = IndexedSeq(WorkerOffer("executor0", "host0", 1))
@@ -1044,8 +1044,8 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     }
     // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
     new DAGScheduler(sc, taskScheduler) {
-      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
-      override def executorAdded(execId: String, host: String) {}
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {}
+      override def executorAdded(execId: String, host: String): Unit = {}
     }
     taskScheduler.initialize(new FakeSchedulerBackend)
 
@@ -1084,8 +1084,8 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     }
     // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
     new DAGScheduler(sc, taskScheduler) {
-      override def taskStarted(task: Task[_], taskInfo: TaskInfo) {}
-      override def executorAdded(execId: String, host: String) {}
+      override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {}
+      override def executorAdded(execId: String, host: String): Unit = {}
     }
     taskScheduler.initialize(new FakeSchedulerBackend)
     // make an offer on the preferred host so the scheduler knows its alive.  This is necessary
@@ -1154,6 +1154,29 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(3 === taskDescriptions.length)
   }
 
+  test("SPARK-29263: barrier TaskSet can't schedule when higher prio taskset takes the slots") {
+    val taskCpus = 2
+    val taskScheduler = setupSchedulerWithMaster(
+      s"local[$taskCpus]",
+      config.CPUS_PER_TASK.key -> taskCpus.toString)
+
+    val numFreeCores = 3
+    val workerOffers = IndexedSeq(
+      new WorkerOffer("executor0", "host0", numFreeCores, Some("192.168.0.101:49625")),
+      new WorkerOffer("executor1", "host1", numFreeCores, Some("192.168.0.101:49627")),
+      new WorkerOffer("executor2", "host2", numFreeCores, Some("192.168.0.101:49629")))
+    val barrier = FakeTask.createBarrierTaskSet(3, stageId = 0, stageAttemptId = 0, priority = 1)
+    val highPrio = FakeTask.createTaskSet(1, stageId = 1, stageAttemptId = 0, priority = 0)
+
+    // submit highPrio and barrier taskSet
+    taskScheduler.submitTasks(highPrio)
+    taskScheduler.submitTasks(barrier)
+    val taskDescriptions = taskScheduler.resourceOffers(workerOffers).flatten
+    // it schedules the highPrio task first, and then will not have enough slots to schedule
+    // the barrier taskset
+    assert(1 === taskDescriptions.length)
+  }
+
   test("cancelTasks shall kill all the running tasks and fail the stage") {
     val taskScheduler = setupScheduler()
 
@@ -1169,7 +1192,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       }
     })
 
-    val attempt1 = FakeTask.createTaskSet(10, 0)
+    val attempt1 = FakeTask.createTaskSet(10)
     taskScheduler.submitTasks(attempt1)
 
     val workerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", 1),
@@ -1200,7 +1223,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       }
     })
 
-    val attempt1 = FakeTask.createTaskSet(10, 0)
+    val attempt1 = FakeTask.createTaskSet(10)
     taskScheduler.submitTasks(attempt1)
 
     val workerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", 1),
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
index b3bc76687ce1b..ed97a4c206ca3 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetBlacklistSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler
 import org.mockito.ArgumentMatchers.isA
 import org.mockito.Mockito.{never, verify}
 import org.scalatest.BeforeAndAfterEach
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index fedfa083e8d8f..441ec6ab6e18b 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -38,7 +38,7 @@ import org.apache.spark.util.{AccumulatorV2, ManualClock}
 class FakeDAGScheduler(sc: SparkContext, taskScheduler: FakeTaskScheduler)
   extends DAGScheduler(sc) {
 
-  override def taskStarted(task: Task[_], taskInfo: TaskInfo) {
+  override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {
     taskScheduler.startedTasks += taskInfo.index
   }
 
@@ -48,13 +48,13 @@ class FakeDAGScheduler(sc: SparkContext, taskScheduler: FakeTaskScheduler)
       result: Any,
       accumUpdates: Seq[AccumulatorV2[_, _]],
       metricPeaks: Array[Long],
-      taskInfo: TaskInfo) {
+      taskInfo: TaskInfo): Unit = {
     taskScheduler.endedTasks(taskInfo.index) = reason
   }
 
-  override def executorAdded(execId: String, host: String) {}
+  override def executorAdded(execId: String, host: String): Unit = {}
 
-  override def executorLost(execId: String, reason: ExecutorLossReason) {}
+  override def executorLost(execId: String, reason: ExecutorLossReason): Unit = {}
 
   override def taskSetFailed(
       taskSet: TaskSet,
@@ -74,13 +74,13 @@ object FakeRackUtil {
   var numBatchInvocation = 0
   var numSingleHostInvocation = 0
 
-  def cleanUp() {
+  def cleanUp(): Unit = {
     hostToRack.clear()
     numBatchInvocation = 0
     numSingleHostInvocation = 0
   }
 
-  def assignHostToRack(host: String, rack: String) {
+  def assignHostToRack(host: String, rack: String): Unit = {
     hostToRack(host) = rack
   }
 
@@ -124,7 +124,7 @@ class FakeTaskScheduler(sc: SparkContext, liveExecutors: (String, String)* /* ex
 
   dagScheduler = new FakeDAGScheduler(sc, this)
 
-  def removeExecutor(execId: String) {
+  def removeExecutor(execId: String): Unit = {
     executors -= execId
     val host = executorIdToHost.get(execId)
     assert(host != None)
@@ -149,7 +149,7 @@ class FakeTaskScheduler(sc: SparkContext, liveExecutors: (String, String)* /* ex
     hostsByRack.get(rack) != None
   }
 
-  def addExecutor(execId: String, host: String) {
+  def addExecutor(execId: String, host: String): Unit = {
     executors.put(execId, host)
     val executorsOnHost = hostToExecutors.getOrElseUpdate(host, new mutable.HashSet[String])
     executorsOnHost += execId
@@ -1262,7 +1262,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
 
     // now fail those tasks
     tsmSpy.handleFailedTask(taskDescs(0).taskId, TaskState.FAILED,
-      FetchFailed(BlockManagerId(taskDescs(0).executorId, "host1", 12345), 0, 0, 0, "ignored"))
+      FetchFailed(BlockManagerId(taskDescs(0).executorId, "host1", 12345), 0, 0L, 0, 0, "ignored"))
     tsmSpy.handleFailedTask(taskDescs(1).taskId, TaskState.FAILED,
       ExecutorLostFailure(taskDescs(1).executorId, exitCausedByApp = false, reason = None))
     tsmSpy.handleFailedTask(taskDescs(2).taskId, TaskState.FAILED,
@@ -1302,7 +1302,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
 
     // Fail the task with fetch failure
     tsm.handleFailedTask(taskDescs(0).taskId, TaskState.FAILED,
-      FetchFailed(BlockManagerId(taskDescs(0).executorId, "host1", 12345), 0, 0, 0, "ignored"))
+      FetchFailed(BlockManagerId(taskDescs(0).executorId, "host1", 12345), 0, 0L, 0, 0, "ignored"))
 
     assert(blacklistTracker.isNodeBlacklisted("host1"))
   }
diff --git a/core/src/test/scala/org/apache/spark/security/EncryptionFunSuite.scala b/core/src/test/scala/org/apache/spark/security/EncryptionFunSuite.scala
index be6b8a6b5b108..213f0ba2ec180 100644
--- a/core/src/test/scala/org/apache/spark/security/EncryptionFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/security/EncryptionFunSuite.scala
@@ -27,7 +27,7 @@ trait EncryptionFunSuite {
    * Runs a test twice, initializing a SparkConf object with encryption off, then on. It's ok
    * for the test to modify the provided SparkConf.
    */
-  final protected def encryptionTest(name: String)(fn: SparkConf => Unit) {
+  final protected def encryptionTest(name: String)(fn: SparkConf => Unit): Unit = {
     encryptionTestHelper(name) { case (name, conf) =>
       test(name)(fn(conf))
     }
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
index 2915b99dcfb60..953b651c72a83 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
@@ -25,6 +25,7 @@ import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Kryo._
+import org.apache.spark.launcher.SparkLauncher.EXECUTOR_EXTRA_JAVA_OPTIONS
 import org.apache.spark.serializer.KryoTest._
 import org.apache.spark.util.ThreadUtils
 
@@ -71,6 +72,9 @@ object KryoSerializerBenchmark extends BenchmarkBase {
 
   def createSparkContext(usePool: Boolean): SparkContext = {
     val conf = new SparkConf()
+    // SPARK-29282 This is for consistency between JDK8 and JDK11.
+    conf.set(EXECUTOR_EXTRA_JAVA_OPTIONS,
+      "-XX:+UseParallelGC -XX:-UseDynamicNumberOfGCThreads")
     conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
     conf.set(KRYO_USER_REGISTRATORS, classOf[MyRegistrator].getName)
     conf.set(KRYO_USE_POOL, usePool)
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
index 5d76c096d46ac..d4fafab4a5d64 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerDistributedSuite.scala
@@ -56,7 +56,7 @@ object KryoDistributedTest {
   class MyCustomClass
 
   class AppJarRegistrator extends KryoRegistrator {
-    override def registerClasses(k: Kryo) {
+    override def registerClasses(k: Kryo): Unit = {
       k.register(Utils.classForName(AppJarRegistrator.customClassName,
         noSparkClassLoader = true))
     }
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index 2442670b6d3f0..b5313fc24cd84 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -86,7 +86,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
     check(1)
@@ -119,7 +119,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
     check((1, 1))
@@ -146,7 +146,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
     check(List[Int]())
@@ -173,7 +173,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
   test("Bug: SPARK-10251") {
     val ser = new KryoSerializer(conf.clone.set(KRYO_REGISTRATION_REQUIRED, true))
       .newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
     check((1, 3))
@@ -202,7 +202,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
   test("ranges") {
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
       // Check that very long ranges don't get written one element at a time
       assert(ser.serialize(t).limit() < 200)
@@ -238,7 +238,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
   test("custom registrator") {
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
 
@@ -350,8 +350,11 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     val ser = new KryoSerializer(conf).newInstance()
     val denseBlockSizes = new Array[Long](5000)
     val sparseBlockSizes = Array[Long](0L, 1L, 0L, 2L)
+    var mapTaskId = 0
     Seq(denseBlockSizes, sparseBlockSizes).foreach { blockSizes =>
-      ser.serialize(HighlyCompressedMapStatus(BlockManagerId("exec-1", "host", 1234), blockSizes))
+      mapTaskId += 1
+      ser.serialize(HighlyCompressedMapStatus(
+        BlockManagerId("exec-1", "host", 1234), blockSizes, mapTaskId))
     }
   }
 
@@ -460,7 +463,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
 
     val tests = mutable.ListBuffer[Future[Boolean]]()
 
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       tests += Future {
         val serializerInstance = ser.newInstance()
         serializerInstance.deserialize[T](serializerInstance.serialize(t)) === t
@@ -579,7 +582,7 @@ object KryoTest {
   }
 
   class MyRegistrator extends KryoRegistrator {
-    override def registerClasses(k: Kryo) {
+    override def registerClasses(k: Kryo): Unit = {
       k.register(classOf[CaseClass])
       k.register(classOf[ClassWithNoArgConstructor])
       k.register(classOf[ClassWithoutNoArgConstructor])
@@ -588,7 +591,7 @@ object KryoTest {
   }
 
   class RegistratorWithoutAutoReset extends KryoRegistrator {
-    override def registerClasses(k: Kryo) {
+    override def registerClasses(k: Kryo): Unit = {
       k.setAutoReset(false)
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala
index 126ba0e8b1e93..65f3793c421fa 100644
--- a/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/UnsafeKryoSerializerSuite.scala
@@ -23,12 +23,12 @@ class UnsafeKryoSerializerSuite extends KryoSerializerSuite {
 
   // This test suite should run all tests in KryoSerializerSuite with kryo unsafe.
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     conf.set(KRYO_USE_UNSAFE, true)
     super.beforeAll()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     conf.set(KRYO_USE_UNSAFE, false)
     super.afterAll()
   }
diff --git a/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
index 6d2ef17a7a790..d0cbb30fe0232 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/BlockStoreShuffleReaderSuite.scala
@@ -102,12 +102,13 @@ class BlockStoreShuffleReaderSuite extends SparkFunSuite with LocalSparkContext
     // Make a mocked MapOutputTracker for the shuffle reader to use to determine what
     // shuffle data to read.
     val mapOutputTracker = mock(classOf[MapOutputTracker])
-    when(mapOutputTracker.getMapSizesByExecutorId(shuffleId, reduceId, reduceId + 1)).thenReturn {
+    when(mapOutputTracker.getMapSizesByExecutorId(
+      shuffleId, reduceId, reduceId + 1, useOldFetchProtocol = false)).thenReturn {
       // Test a scenario where all data is local, to avoid creating a bunch of additional mocks
       // for the code to read data over the network.
       val shuffleBlockIdsAndSizes = (0 until numMaps).map { mapId =>
         val shuffleBlockId = ShuffleBlockId(shuffleId, mapId, reduceId)
-        (shuffleBlockId, byteOutputStream.size().toLong)
+        (shuffleBlockId, byteOutputStream.size().toLong, mapId)
       }
       Seq((localBlockManagerId, shuffleBlockIdsAndSizes)).toIterator
     }
@@ -118,7 +119,7 @@ class BlockStoreShuffleReaderSuite extends SparkFunSuite with LocalSparkContext
       when(dependency.serializer).thenReturn(serializer)
       when(dependency.aggregator).thenReturn(None)
       when(dependency.keyOrdering).thenReturn(None)
-      new BaseShuffleHandle(shuffleId, numMaps, dependency)
+      new BaseShuffleHandle(shuffleId, dependency)
     }
 
     val serializerManager = new SerializerManager(
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
index b9f81fa0d0a06..f8474022867f4 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
@@ -25,7 +25,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.mockito.{Mock, MockitoAnnotations}
 import org.mockito.Answers.RETURNS_SMART_NULLS
-import org.mockito.ArgumentMatchers.{any, anyInt}
+import org.mockito.ArgumentMatchers.{any, anyInt, anyLong}
 import org.mockito.Mockito._
 import org.scalatest.BeforeAndAfterEach
 
@@ -65,7 +65,6 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     taskMetrics = new TaskMetrics
     shuffleHandle = new BypassMergeSortShuffleHandle[Int, Int](
       shuffleId = 0,
-      numMaps = 2,
       dependency = dependency
     )
     val memoryManager = new TestMemoryManager(conf)
@@ -78,7 +77,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     when(taskContext.taskMemoryManager()).thenReturn(taskMemoryManager)
 
     when(blockResolver.writeIndexFileAndCommit(
-      anyInt, anyInt, any(classOf[Array[Long]]), any(classOf[File])))
+      anyInt, anyLong, any(classOf[Array[Long]]), any(classOf[File])))
       .thenAnswer { invocationOnMock =>
         val tmp = invocationOnMock.getArguments()(3).asInstanceOf[File]
         if (tmp != null) {
@@ -139,8 +138,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     val writer = new BypassMergeSortShuffleWriter[Int, Int](
       blockManager,
       shuffleHandle,
-      0, // MapId
-      0L, // MapTaskAttemptId
+      0L, // MapId
       conf,
       taskContext.taskMetrics().shuffleWriteMetrics,
       shuffleExecutorComponents)
@@ -166,8 +164,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
       val writer = new BypassMergeSortShuffleWriter[Int, Int](
         blockManager,
         shuffleHandle,
-        0, // MapId
-        0L,
+        0L, // MapId
         transferConf,
         taskContext.taskMetrics().shuffleWriteMetrics,
         shuffleExecutorComponents)
@@ -202,8 +199,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     val writer = new BypassMergeSortShuffleWriter[Int, Int](
       blockManager,
       shuffleHandle,
-      0, // MapId
-      0L,
+      0L, // MapId
       conf,
       taskContext.taskMetrics().shuffleWriteMetrics,
       shuffleExecutorComponents)
@@ -224,8 +220,7 @@ class BypassMergeSortShuffleWriterSuite extends SparkFunSuite with BeforeAndAfte
     val writer = new BypassMergeSortShuffleWriter[Int, Int](
       blockManager,
       shuffleHandle,
-      0, // MapId
-      0L,
+      0L, // MapId
       conf,
       taskContext.taskMetrics().shuffleWriteMetrics,
       shuffleExecutorComponents)
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
index 8b955c98f7953..49055ab71c3fe 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.shuffle.sort
 import java.lang.{Long => JLong}
 
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.executor.{ShuffleWriteMetrics, TaskMetrics}
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
index 0dd6040808f9e..4c5694fcf0305 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
@@ -57,7 +57,7 @@ class SortShuffleWriterSuite extends SparkFunSuite with SharedSparkContext with
       when(dependency.serializer).thenReturn(serializer)
       when(dependency.aggregator).thenReturn(None)
       when(dependency.keyOrdering).thenReturn(None)
-      new BaseShuffleHandle(shuffleId, numMaps = numMaps, dependency)
+      new BaseShuffleHandle(shuffleId, dependency)
     }
     shuffleExecutorComponents = new LocalDiskShuffleExecutorComponents(
       conf, blockManager, shuffleBlockResolver)
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
index 5156cc2cc47a6..f92455912f510 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
@@ -23,7 +23,7 @@ import java.nio.file.Files
 import java.util.Arrays
 
 import org.mockito.Answers.RETURNS_SMART_NULLS
-import org.mockito.ArgumentMatchers.{any, anyInt}
+import org.mockito.ArgumentMatchers.{any, anyInt, anyLong}
 import org.mockito.Mock
 import org.mockito.Mockito.when
 import org.mockito.MockitoAnnotations
@@ -73,9 +73,9 @@ class LocalDiskShuffleMapOutputWriterSuite extends SparkFunSuite with BeforeAndA
     conf = new SparkConf()
       .set("spark.app.id", "example.spark.app")
       .set("spark.shuffle.unsafe.file.output.buffer", "16k")
-    when(blockResolver.getDataFile(anyInt, anyInt)).thenReturn(mergedOutputFile)
+    when(blockResolver.getDataFile(anyInt, anyLong)).thenReturn(mergedOutputFile)
     when(blockResolver.writeIndexFileAndCommit(
-      anyInt, anyInt, any(classOf[Array[Long]]), any(classOf[File])))
+      anyInt, anyLong, any(classOf[Array[Long]]), any(classOf[File])))
       .thenAnswer { invocationOnMock =>
         partitionSizesInMergedFile = invocationOnMock.getArguments()(2).asInstanceOf[Array[Long]]
         val tmp: File = invocationOnMock.getArguments()(3).asInstanceOf[File]
diff --git a/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala b/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
index bb2d2633001f0..8e23de0053e00 100644
--- a/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/LiveEntitySuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.status
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.status.api.v1.RDDPartitionInfo
 
 class LiveEntitySuite extends SparkFunSuite {
 
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala
index ff4755833a916..0f3767c4f8c84 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala
@@ -22,13 +22,13 @@ import java.util.UUID
 import org.apache.spark.SparkFunSuite
 
 class BlockIdSuite extends SparkFunSuite {
-  def assertSame(id1: BlockId, id2: BlockId) {
+  def assertSame(id1: BlockId, id2: BlockId): Unit = {
     assert(id1.name === id2.name)
     assert(id1.hashCode === id2.hashCode)
     assert(id1 === id2)
   }
 
-  def assertDifferent(id1: BlockId, id2: BlockId) {
+  def assertDifferent(id1: BlockId, id2: BlockId): Unit = {
     assert(id1.name != id2.name)
     assert(id1.hashCode != id2.hashCode)
     assert(id1 != id2)
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 05a9ac685e5e7..d8f42ea9557d9 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -308,7 +308,7 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
    * is correct. Then it also drops the block from memory of each store (using LRU) and
    * again checks whether the master's knowledge gets updated.
    */
-  protected def testReplication(maxReplication: Int, storageLevels: Seq[StorageLevel]) {
+  protected def testReplication(maxReplication: Int, storageLevels: Seq[StorageLevel]): Unit = {
     import org.apache.spark.storage.StorageLevel._
 
     assert(maxReplication > 1,
@@ -431,7 +431,7 @@ class BlockManagerProactiveReplicationSuite extends BlockManagerReplicationBehav
     }
   }
 
-  def testProactiveReplication(replicationFactor: Int) {
+  def testProactiveReplication(replicationFactor: Int): Unit = {
     val blockSize = 1000
     val storeSize = 10000
     val initialStores = (1 to 10).map { i => makeBlockManager(storeSize, s"store$i") }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 509d4efcab67a..43a0cc7e31b40 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -28,8 +28,8 @@ import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
 import org.apache.commons.lang3.RandomUtils
-import org.mockito.{ArgumentMatchers => mc}
-import org.mockito.Mockito.{doAnswer, mock, spy, times, verify, when}
+import org.mockito.{ArgumentCaptor, ArgumentMatchers => mc}
+import org.mockito.Mockito.{doAnswer, mock, never, spy, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.scalatest._
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
@@ -143,9 +143,10 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     // need to create a SparkContext is to initialize LiveListenerBus.
     sc = mock(classOf[SparkContext])
     when(sc.conf).thenReturn(conf)
-    master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager",
-      new BlockManagerMasterEndpoint(rpcEnv, true, conf,
-        new LiveListenerBus(conf), None)), conf, true)
+    master = spy(new BlockManagerMaster(
+      rpcEnv.setupEndpoint("blockmanager",
+        new BlockManagerMasterEndpoint(rpcEnv, true, conf,
+          new LiveListenerBus(conf), None)), conf, true))
 
     val initialize = PrivateMethod[Unit]('initialize)
     SizeEstimator invokePrivate initialize()
@@ -289,14 +290,19 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!store.hasLocalBlock("a1-to-remove"))
       master.getLocations("a1-to-remove") should have size 0
+      assertUpdateBlockInfoReportedForRemovingBlock(store, "a1-to-remove",
+        removedFromMemory = true, removedFromDisk = false)
     }
     eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!store.hasLocalBlock("a2-to-remove"))
       master.getLocations("a2-to-remove") should have size 0
+      assertUpdateBlockInfoReportedForRemovingBlock(store, "a2-to-remove",
+        removedFromMemory = true, removedFromDisk = false)
     }
     eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(store.hasLocalBlock("a3-to-remove"))
       master.getLocations("a3-to-remove") should have size 0
+      assertUpdateBlockInfoNotReported(store, "a3-to-remove")
     }
     eventually(timeout(1.second), interval(10.milliseconds)) {
       val memStatus = master.getMemoryStatus.head._2
@@ -375,16 +381,21 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     assert(!executorStore.hasLocalBlock(broadcast0BlockId))
     assert(executorStore.hasLocalBlock(broadcast1BlockId))
     assert(executorStore.hasLocalBlock(broadcast2BlockId))
+    assertUpdateBlockInfoReportedForRemovingBlock(executorStore, broadcast0BlockId,
+      removedFromMemory = false, removedFromDisk = true)
 
     // nothing should be removed from the driver store
     assert(driverStore.hasLocalBlock(broadcast0BlockId))
     assert(driverStore.hasLocalBlock(broadcast1BlockId))
     assert(driverStore.hasLocalBlock(broadcast2BlockId))
+    assertUpdateBlockInfoNotReported(driverStore, broadcast0BlockId)
 
     // remove broadcast 0 block from the driver as well
     master.removeBroadcast(0, removeFromMaster = true, blocking = true)
     assert(!driverStore.hasLocalBlock(broadcast0BlockId))
     assert(driverStore.hasLocalBlock(broadcast1BlockId))
+    assertUpdateBlockInfoReportedForRemovingBlock(driverStore, broadcast0BlockId,
+      removedFromMemory = false, removedFromDisk = true)
 
     // remove broadcast 1 block from both the stores asynchronously
     // and verify all broadcast 1 blocks have been removed
@@ -392,6 +403,10 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     eventually(timeout(1.second), interval(10.milliseconds)) {
       assert(!driverStore.hasLocalBlock(broadcast1BlockId))
       assert(!executorStore.hasLocalBlock(broadcast1BlockId))
+      assertUpdateBlockInfoReportedForRemovingBlock(driverStore, broadcast1BlockId,
+        removedFromMemory = false, removedFromDisk = true)
+      assertUpdateBlockInfoReportedForRemovingBlock(executorStore, broadcast1BlockId,
+        removedFromMemory = false, removedFromDisk = true)
     }
 
     // remove broadcast 2 from both the stores asynchronously
@@ -402,11 +417,46 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       assert(!driverStore.hasLocalBlock(broadcast2BlockId2))
       assert(!executorStore.hasLocalBlock(broadcast2BlockId))
       assert(!executorStore.hasLocalBlock(broadcast2BlockId2))
+      assertUpdateBlockInfoReportedForRemovingBlock(driverStore, broadcast2BlockId,
+        removedFromMemory = false, removedFromDisk = true)
+      assertUpdateBlockInfoReportedForRemovingBlock(driverStore, broadcast2BlockId2,
+        removedFromMemory = false, removedFromDisk = true)
+      assertUpdateBlockInfoReportedForRemovingBlock(executorStore, broadcast2BlockId,
+        removedFromMemory = false, removedFromDisk = true)
+      assertUpdateBlockInfoReportedForRemovingBlock(executorStore, broadcast2BlockId2,
+        removedFromMemory = false, removedFromDisk = true)
     }
     executorStore.stop()
     driverStore.stop()
   }
 
+  private def assertUpdateBlockInfoReportedForRemovingBlock(
+      store: BlockManager,
+      blockId: BlockId,
+      removedFromMemory: Boolean,
+      removedFromDisk: Boolean): Unit = {
+    def assertSizeReported(captor: ArgumentCaptor[Long], expectRemoved: Boolean): Unit = {
+      assert(captor.getAllValues().size() === 1)
+      if (expectRemoved) {
+        assert(captor.getValue() > 0)
+      } else {
+        assert(captor.getValue() === 0)
+      }
+    }
+
+    val memSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
+    val diskSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
+    verify(master).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
+      mc.eq(StorageLevel.NONE), memSizeCaptor.capture(), diskSizeCaptor.capture())
+    assertSizeReported(memSizeCaptor, removedFromMemory)
+    assertSizeReported(diskSizeCaptor, removedFromDisk)
+  }
+
+  private def assertUpdateBlockInfoNotReported(store: BlockManager, blockId: BlockId): Unit = {
+    verify(master, never()).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
+      mc.eq(StorageLevel.NONE), mc.anyInt(), mc.anyInt())
+  }
+
   test("reregistration on heart beat") {
     val store = makeBlockManager(2000)
     val a1 = new Array[Byte](400)
@@ -451,18 +501,18 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     for (i <- 1 to 100) {
       master.removeExecutor(store.blockManagerId.executorId)
       val t1 = new Thread {
-        override def run() {
+        override def run(): Unit = {
           store.putIterator(
             "a2", a2.iterator, StorageLevel.MEMORY_ONLY, tellMaster = true)
         }
       }
       val t2 = new Thread {
-        override def run() {
+        override def run(): Unit = {
           store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
         }
       }
       val t3 = new Thread {
-        override def run() {
+        override def run(): Unit = {
           store.reregister()
         }
       }
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
index 0c4f3c48ef802..c757dee43808d 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.storage
 
 import java.io.{File, FileWriter}
-import java.util.UUID
 
 import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
@@ -33,14 +32,14 @@ class DiskBlockManagerSuite extends SparkFunSuite with BeforeAndAfterEach with B
 
   var diskBlockManager: DiskBlockManager = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     rootDir0 = Utils.createTempDir()
     rootDir1 = Utils.createTempDir()
     rootDirs = rootDir0.getAbsolutePath + "," + rootDir1.getAbsolutePath
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(rootDir0)
       Utils.deleteRecursively(rootDir1)
@@ -49,14 +48,14 @@ class DiskBlockManagerSuite extends SparkFunSuite with BeforeAndAfterEach with B
     }
   }
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     super.beforeEach()
     val conf = testConf.clone
     conf.set("spark.local.dir", rootDirs)
     diskBlockManager = new DiskBlockManager(conf, deleteFilesOnStop = true)
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       diskBlockManager.stop()
     } finally {
@@ -86,7 +85,7 @@ class DiskBlockManagerSuite extends SparkFunSuite with BeforeAndAfterEach with B
     assert(diskBlockManager.getAllBlocks().isEmpty)
   }
 
-  def writeToFile(file: File, numBytes: Int) {
+  def writeToFile(file: File, numBytes: Int): Unit = {
     val writer = new FileWriter(file, true)
     for (i <- 0 until numBytes) writer.write(i)
     writer.close()
diff --git a/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala
index 56860b2e55709..74442c2966a72 100644
--- a/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/PartiallyUnrolledIteratorSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.storage
 
 import org.mockito.ArgumentMatchers.{eq => meq}
 import org.mockito.Mockito._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.memory.MemoryMode.ON_HEAP
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index ed402440e74f1..e5a615c2c2cbb 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -98,9 +98,9 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     val transfer = createMockTransfer(remoteBlocks)
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (localBmId, localBlocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq),
-      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (localBmId, localBlocks.keys.map(blockId => (blockId, 1L, 0)).toSeq),
+      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 1L, 1)).toSeq)
     ).toIterator
 
     val taskContext = TaskContext.empty()
@@ -179,8 +179,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         }
       })
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1L, 0)).toSeq)).toIterator
 
     val taskContext = TaskContext.empty()
     val iterator = new ShuffleBlockFetcherIterator(
@@ -247,8 +247,9 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         }
       })
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1L, 0)).toSeq))
+      .toIterator
 
     val taskContext = TaskContext.empty()
     val iterator = new ShuffleBlockFetcherIterator(
@@ -336,8 +337,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         }
       })
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1L, 0)).toSeq)).toIterator
 
     val taskContext = TaskContext.empty()
     val iterator = new ShuffleBlockFetcherIterator(
@@ -389,8 +390,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val corruptBuffer1 = mockCorruptBuffer(streamLength, 0)
     val blockManagerId1 = BlockManagerId("remote-client-1", "remote-client-1", 1)
     val shuffleBlockId1 = ShuffleBlockId(0, 1, 0)
-    val blockLengths1 = Seq[Tuple2[BlockId, Long]](
-      shuffleBlockId1 -> corruptBuffer1.size()
+    val blockLengths1 = Seq[Tuple3[BlockId, Long, Int]](
+      (shuffleBlockId1, corruptBuffer1.size(), 1)
     )
 
     val streamNotCorruptTill = 8 * 1024
@@ -398,13 +399,13 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val corruptBuffer2 = mockCorruptBuffer(streamLength, streamNotCorruptTill)
     val blockManagerId2 = BlockManagerId("remote-client-2", "remote-client-2", 2)
     val shuffleBlockId2 = ShuffleBlockId(0, 2, 0)
-    val blockLengths2 = Seq[Tuple2[BlockId, Long]](
-      shuffleBlockId2 -> corruptBuffer2.size()
+    val blockLengths2 = Seq[Tuple3[BlockId, Long, Int]](
+      (shuffleBlockId2, corruptBuffer2.size(), 2)
     )
 
     val transfer = createMockTransfer(
       Map(shuffleBlockId1 -> corruptBuffer1, shuffleBlockId2 -> corruptBuffer2))
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
       (blockManagerId1, blockLengths1),
       (blockManagerId2, blockLengths2)
     ).toIterator
@@ -465,11 +466,11 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val localBmId = BlockManagerId("test-client", "test-client", 1)
     doReturn(localBmId).when(blockManager).blockManagerId
     doReturn(managedBuffer).when(blockManager).getBlockData(ShuffleBlockId(0, 0, 0))
-    val localBlockLengths = Seq[Tuple2[BlockId, Long]](
-      ShuffleBlockId(0, 0, 0) -> 10000
+    val localBlockLengths = Seq[Tuple3[BlockId, Long, Int]](
+      (ShuffleBlockId(0, 0, 0), 10000, 0)
     )
     val transfer = createMockTransfer(Map(ShuffleBlockId(0, 0, 0) -> managedBuffer))
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
       (localBmId, localBlockLengths)
     ).toIterator
 
@@ -531,8 +532,9 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         }
       })
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq)).toIterator
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1L, 0)).toSeq))
+      .toIterator
 
     val taskContext = TaskContext.empty()
     val iterator = new ShuffleBlockFetcherIterator(
@@ -591,7 +593,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       })
 
     def fetchShuffleBlock(
-        blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long)])]): Unit = {
+        blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]): Unit = {
       // Set `maxBytesInFlight` and `maxReqsInFlight` to `Int.MaxValue`, so that during the
       // construction of `ShuffleBlockFetcherIterator`, all requests to fetch remote shuffle blocks
       // are issued. The `maxReqSizeShuffleToMem` is hard-coded as 200 here.
@@ -611,15 +613,15 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
         taskContext.taskMetrics.createTempShuffleReadMetrics())
     }
 
-    val blocksByAddress1 = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 100L)).toSeq)).toIterator
+    val blocksByAddress1 = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 100L, 0)).toSeq)).toIterator
     fetchShuffleBlock(blocksByAddress1)
     // `maxReqSizeShuffleToMem` is 200, which is greater than the block size 100, so don't fetch
     // shuffle block to disk.
     assert(tempFileManager == null)
 
-    val blocksByAddress2 = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 300L)).toSeq)).toIterator
+    val blocksByAddress2 = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, remoteBlocks.keys.map(blockId => (blockId, 300L, 0)).toSeq)).toIterator
     fetchShuffleBlock(blocksByAddress2)
     // `maxReqSizeShuffleToMem` is 200, which is smaller than the block size 300, so fetch
     // shuffle block to disk.
@@ -640,8 +642,8 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     val transfer = createMockTransfer(blocks.mapValues(_ => createMockManagedBuffer(0)))
 
-    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long)])](
-      (remoteBmId, blocks.keys.map(blockId => (blockId, 1.asInstanceOf[Long])).toSeq))
+    val blocksByAddress = Seq[(BlockManagerId, Seq[(BlockId, Long, Int)])](
+      (remoteBmId, blocks.keys.map(blockId => (blockId, 1L, 0)).toSeq))
 
     val taskContext = TaskContext.empty()
     val iterator = new ShuffleBlockFetcherIterator(
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index 1913b8d425519..f0736348940ca 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -31,8 +31,8 @@ import org.openqa.selenium.{By, WebDriver}
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
 import org.scalatest._
 import org.scalatest.concurrent.Eventually._
-import org.scalatest.selenium.WebBrowser
 import org.scalatest.time.SpanSugar._
+import org.scalatestplus.selenium.WebBrowser
 import org.w3c.css.sac.CSSParseException
 
 import org.apache.spark._
@@ -233,7 +233,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
 
   test("spark.ui.killEnabled should properly control kill button display") {
     def hasKillLink: Boolean = find(className("kill-link")).isDefined
-    def runSlowJob(sc: SparkContext) {
+    def runSlowJob(sc: SparkContext): Unit = {
       sc.parallelize(1 to 10).map{x => Thread.sleep(10000); x}.countAsync()
     }
 
@@ -316,10 +316,12 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
           val env = SparkEnv.get
           val bmAddress = env.blockManager.blockManagerId
           val shuffleId = shuffleHandle.shuffleId
-          val mapId = 0
+          val mapId = 0L
+          val mapIndex = 0
           val reduceId = taskContext.partitionId()
           val message = "Simulated fetch failure"
-          throw new FetchFailedException(bmAddress, shuffleId, mapId, reduceId, message)
+          throw new FetchFailedException(
+            bmAddress, shuffleId, mapId, mapIndex, reduceId, message)
         } else {
           x
         }
diff --git a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
index de105b6f188f5..82773e3cc6860 100644
--- a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ui
 
 import scala.xml.{Node, Text}
+import scala.xml.Utility.trim
 
 import org.apache.spark.SparkFunSuite
 
@@ -129,6 +130,55 @@ class UIUtilsSuite extends SparkFunSuite {
     assert(decoded1 === decodeURLParameter(decoded1))
   }
 
+  test("listingTable with tooltips") {
+
+    def generateDataRowValue: String => Seq[Node] = row => <a>{row}</a>
+    val header = Seq("Header1", "Header2")
+    val data = Seq("Data1", "Data2")
+    val tooltip = Seq(None, Some("tooltip"))
+
+    val generated = listingTable(header, generateDataRowValue, data, tooltipHeaders = tooltip)
+
+    val expected: Node =
+      <table class="table table-bordered table-condensed table-striped sortable">
+        <thead>
+          <th width="" class="">{header(0)}</th>
+          <th width="" class="">
+              <span data-toggle="tooltip" title="tooltip">
+                {header(1)}
+              </span>
+          </th>
+        </thead>
+      <tbody>
+        {data.map(generateDataRowValue)}
+      </tbody>
+    </table>
+
+    assert(trim(generated(0)) == trim(expected))
+  }
+
+  test("listingTable without tooltips") {
+
+    def generateDataRowValue: String => Seq[Node] = row => <a>{row}</a>
+    val header = Seq("Header1", "Header2")
+    val data = Seq("Data1", "Data2")
+
+    val generated = listingTable(header, generateDataRowValue, data)
+
+    val expected =
+      <table class="table table-bordered table-condensed table-striped sortable">
+        <thead>
+          <th width="" class="">{header(0)}</th>
+          <th width="" class="">{header(1)}</th>
+        </thead>
+        <tbody>
+          {data.map(generateDataRowValue)}
+        </tbody>
+      </table>
+
+    assert(trim(generated(0)) == trim(expected))
+  }
+
   private def verify(
       desc: String,
       expected: Node,
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index f5f93ece660b8..21e69550785a4 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -356,7 +356,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
   }
 
   /** Delete all the generated rolled over files */
-  def cleanup() {
+  def cleanup(): Unit = {
     testFile.getParentFile.listFiles.filter { file =>
       file.getName.startsWith(testFile.getName)
     }.foreach { _.delete() }
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index e781c5f71faf4..a2a4b3aa974fc 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -179,7 +179,7 @@ class JsonProtocolSuite extends SparkFunSuite {
     testJobResult(jobFailed)
 
     // TaskEndReason
-    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
+    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 16L, 18, 19,
       "Some exception")
     val fetchMetadataFailed = new MetadataFetchFailedException(17,
       19, "metadata Fetch failed exception").toTaskFailedReason
@@ -296,12 +296,12 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("FetchFailed backwards compatibility") {
     // FetchFailed in Spark 1.1.0 does not have a "Message" property.
-    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
+    val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 16L, 18, 19,
       "ignored")
     val oldEvent = JsonProtocol.taskEndReasonToJson(fetchFailed)
       .removeField({ _._1 == "Message" })
-    val expectedFetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 18, 19,
-      "Unknown reason")
+    val expectedFetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 16L,
+      18, 19, "Unknown reason")
     assert(expectedFetchFailed === JsonProtocol.taskEndReasonFromJson(oldEvent))
   }
 
@@ -496,59 +496,59 @@ private[spark] object JsonProtocolSuite extends Assertions {
   private val nodeBlacklistedTime = 1421458952000L
   private val nodeUnblacklistedTime = 1421458962000L
 
-  private def testEvent(event: SparkListenerEvent, jsonString: String) {
+  private def testEvent(event: SparkListenerEvent, jsonString: String): Unit = {
     val actualJsonString = compact(render(JsonProtocol.sparkEventToJson(event)))
     val newEvent = JsonProtocol.sparkEventFromJson(parse(actualJsonString))
     assertJsonStringEquals(jsonString, actualJsonString, event.getClass.getSimpleName)
     assertEquals(event, newEvent)
   }
 
-  private def testRDDInfo(info: RDDInfo) {
+  private def testRDDInfo(info: RDDInfo): Unit = {
     val newInfo = JsonProtocol.rddInfoFromJson(JsonProtocol.rddInfoToJson(info))
     assertEquals(info, newInfo)
   }
 
-  private def testStageInfo(info: StageInfo) {
+  private def testStageInfo(info: StageInfo): Unit = {
     val newInfo = JsonProtocol.stageInfoFromJson(JsonProtocol.stageInfoToJson(info))
     assertEquals(info, newInfo)
   }
 
-  private def testStorageLevel(level: StorageLevel) {
+  private def testStorageLevel(level: StorageLevel): Unit = {
     val newLevel = JsonProtocol.storageLevelFromJson(JsonProtocol.storageLevelToJson(level))
     assertEquals(level, newLevel)
   }
 
-  private def testTaskMetrics(metrics: TaskMetrics) {
+  private def testTaskMetrics(metrics: TaskMetrics): Unit = {
     val newMetrics = JsonProtocol.taskMetricsFromJson(JsonProtocol.taskMetricsToJson(metrics))
     assertEquals(metrics, newMetrics)
   }
 
-  private def testBlockManagerId(id: BlockManagerId) {
+  private def testBlockManagerId(id: BlockManagerId): Unit = {
     val newId = JsonProtocol.blockManagerIdFromJson(JsonProtocol.blockManagerIdToJson(id))
     assert(id === newId)
   }
 
-  private def testTaskInfo(info: TaskInfo) {
+  private def testTaskInfo(info: TaskInfo): Unit = {
     val newInfo = JsonProtocol.taskInfoFromJson(JsonProtocol.taskInfoToJson(info))
     assertEquals(info, newInfo)
   }
 
-  private def testJobResult(result: JobResult) {
+  private def testJobResult(result: JobResult): Unit = {
     val newResult = JsonProtocol.jobResultFromJson(JsonProtocol.jobResultToJson(result))
     assertEquals(result, newResult)
   }
 
-  private def testTaskEndReason(reason: TaskEndReason) {
+  private def testTaskEndReason(reason: TaskEndReason): Unit = {
     val newReason = JsonProtocol.taskEndReasonFromJson(JsonProtocol.taskEndReasonToJson(reason))
     assertEquals(reason, newReason)
   }
 
-  private def testBlockId(blockId: BlockId) {
+  private def testBlockId(blockId: BlockId): Unit = {
     val newBlockId = BlockId(blockId.toString)
     assert(blockId === newBlockId)
   }
 
-  private def testExecutorInfo(info: ExecutorInfo) {
+  private def testExecutorInfo(info: ExecutorInfo): Unit = {
     val newInfo = JsonProtocol.executorInfoFromJson(JsonProtocol.executorInfoToJson(info))
     assertEquals(info, newInfo)
   }
@@ -565,7 +565,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
    | Util methods for comparing events |
    * --------------------------------- */
 
-  private[spark] def assertEquals(event1: SparkListenerEvent, event2: SparkListenerEvent) {
+  private[spark] def assertEquals(event1: SparkListenerEvent, event2: SparkListenerEvent): Unit = {
     (event1, event2) match {
       case (e1: SparkListenerStageSubmitted, e2: SparkListenerStageSubmitted) =>
         assert(e1.properties === e2.properties)
@@ -633,7 +633,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
-  private def assertEquals(info1: StageInfo, info2: StageInfo) {
+  private def assertEquals(info1: StageInfo, info2: StageInfo): Unit = {
     assert(info1.stageId === info2.stageId)
     assert(info1.name === info2.name)
     assert(info1.numTasks === info2.numTasks)
@@ -647,7 +647,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assert(info1.details === info2.details)
   }
 
-  private def assertEquals(info1: RDDInfo, info2: RDDInfo) {
+  private def assertEquals(info1: RDDInfo, info2: RDDInfo): Unit = {
     assert(info1.id === info2.id)
     assert(info1.name === info2.name)
     assert(info1.numPartitions === info2.numPartitions)
@@ -657,14 +657,14 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assertEquals(info1.storageLevel, info2.storageLevel)
   }
 
-  private def assertEquals(level1: StorageLevel, level2: StorageLevel) {
+  private def assertEquals(level1: StorageLevel, level2: StorageLevel): Unit = {
     assert(level1.useDisk === level2.useDisk)
     assert(level1.useMemory === level2.useMemory)
     assert(level1.deserialized === level2.deserialized)
     assert(level1.replication === level2.replication)
   }
 
-  private def assertEquals(info1: TaskInfo, info2: TaskInfo) {
+  private def assertEquals(info1: TaskInfo, info2: TaskInfo): Unit = {
     assert(info1.taskId === info2.taskId)
     assert(info1.index === info2.index)
     assert(info1.attemptNumber === info2.attemptNumber)
@@ -679,12 +679,12 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assert(info1.accumulables === info2.accumulables)
   }
 
-  private def assertEquals(info1: ExecutorInfo, info2: ExecutorInfo) {
+  private def assertEquals(info1: ExecutorInfo, info2: ExecutorInfo): Unit = {
     assert(info1.executorHost == info2.executorHost)
     assert(info1.totalCores == info2.totalCores)
   }
 
-  private def assertEquals(metrics1: TaskMetrics, metrics2: TaskMetrics) {
+  private def assertEquals(metrics1: TaskMetrics, metrics2: TaskMetrics): Unit = {
     assert(metrics1.executorDeserializeTime === metrics2.executorDeserializeTime)
     assert(metrics1.executorDeserializeCpuTime === metrics2.executorDeserializeCpuTime)
     assert(metrics1.executorRunTime === metrics2.executorRunTime)
@@ -700,23 +700,23 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assertBlocksEquals(metrics1.updatedBlockStatuses, metrics2.updatedBlockStatuses)
   }
 
-  private def assertEquals(metrics1: ShuffleReadMetrics, metrics2: ShuffleReadMetrics) {
+  private def assertEquals(metrics1: ShuffleReadMetrics, metrics2: ShuffleReadMetrics): Unit = {
     assert(metrics1.remoteBlocksFetched === metrics2.remoteBlocksFetched)
     assert(metrics1.localBlocksFetched === metrics2.localBlocksFetched)
     assert(metrics1.fetchWaitTime === metrics2.fetchWaitTime)
     assert(metrics1.remoteBytesRead === metrics2.remoteBytesRead)
   }
 
-  private def assertEquals(metrics1: ShuffleWriteMetrics, metrics2: ShuffleWriteMetrics) {
+  private def assertEquals(metrics1: ShuffleWriteMetrics, metrics2: ShuffleWriteMetrics): Unit = {
     assert(metrics1.bytesWritten === metrics2.bytesWritten)
     assert(metrics1.writeTime === metrics2.writeTime)
   }
 
-  private def assertEquals(metrics1: InputMetrics, metrics2: InputMetrics) {
+  private def assertEquals(metrics1: InputMetrics, metrics2: InputMetrics): Unit = {
     assert(metrics1.bytesRead === metrics2.bytesRead)
   }
 
-  private def assertEquals(result1: JobResult, result2: JobResult) {
+  private def assertEquals(result1: JobResult, result2: JobResult): Unit = {
     (result1, result2) match {
       case (JobSucceeded, JobSucceeded) =>
       case (r1: JobFailed, r2: JobFailed) =>
@@ -725,13 +725,14 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
-  private def assertEquals(reason1: TaskEndReason, reason2: TaskEndReason) {
+  private def assertEquals(reason1: TaskEndReason, reason2: TaskEndReason): Unit = {
     (reason1, reason2) match {
       case (Success, Success) =>
       case (Resubmitted, Resubmitted) =>
       case (r1: FetchFailed, r2: FetchFailed) =>
         assert(r1.shuffleId === r2.shuffleId)
         assert(r1.mapId === r2.mapId)
+        assert(r1.mapIndex === r2.mapIndex)
         assert(r1.reduceId === r2.reduceId)
         assert(r1.bmAddress === r2.bmAddress)
         assert(r1.message === r2.message)
@@ -761,7 +762,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
 
   private def assertEquals(
       details1: Map[String, Seq[(String, String)]],
-      details2: Map[String, Seq[(String, String)]]) {
+      details2: Map[String, Seq[(String, String)]]): Unit = {
     details1.zip(details2).foreach {
       case ((key1, values1: Seq[(String, String)]), (key2, values2: Seq[(String, String)])) =>
         assert(key1 === key2)
@@ -769,7 +770,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
-  private def assertEquals(exception1: Exception, exception2: Exception) {
+  private def assertEquals(exception1: Exception, exception2: Exception): Unit = {
     assert(exception1.getMessage === exception2.getMessage)
     assertSeqEquals(
       exception1.getStackTrace,
@@ -783,7 +784,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
-  private def assertJsonStringEquals(expected: String, actual: String, metadata: String) {
+  private def assertJsonStringEquals(expected: String, actual: String, metadata: String): Unit = {
     val expectedJson = parse(expected)
     val actualJson = parse(actual)
     if (expectedJson != actualJson) {
@@ -796,7 +797,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
-  private def assertSeqEquals[T](seq1: Seq[T], seq2: Seq[T], assertEquals: (T, T) => Unit) {
+  private def assertSeqEquals[T](seq1: Seq[T], seq2: Seq[T], assertEquals: (T, T) => Unit): Unit = {
     assert(seq1.length === seq2.length)
     seq1.zip(seq2).foreach { case (t1, t2) =>
       assertEquals(t1, t2)
@@ -806,7 +807,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
   private def assertOptionEquals[T](
       opt1: Option[T],
       opt2: Option[T],
-      assertEquals: (T, T) => Unit) {
+      assertEquals: (T, T) => Unit): Unit = {
     if (opt1.isDefined) {
       assert(opt2.isDefined)
       assertEquals(opt1.get, opt2.get)
@@ -825,11 +826,12 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assertSeqEquals(blocks1, blocks2, assertBlockEquals)
   }
 
-  private def assertBlockEquals(b1: (BlockId, BlockStatus), b2: (BlockId, BlockStatus)) {
+  private def assertBlockEquals(b1: (BlockId, BlockStatus), b2: (BlockId, BlockStatus)): Unit = {
     assert(b1 === b2)
   }
 
-  private def assertStackTraceElementEquals(ste1: StackTraceElement, ste2: StackTraceElement) {
+  private def assertStackTraceElementEquals(ste1: StackTraceElement,
+      ste2: StackTraceElement): Unit = {
     // This mimics the equals() method from Java 8 and earlier. Java 9 adds checks for
     // class loader and module, which will cause them to be not equal, when we don't
     // care about those
@@ -936,6 +938,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     t.setExecutorDeserializeCpuTime(a)
     t.setExecutorRunTime(b)
     t.setExecutorCpuTime(b)
+    t.setPeakExecutionMemory(c)
     t.setResultSize(c)
     t.setJvmGCTime(d)
     t.setResultSerializationTime(a + b)
@@ -1241,6 +1244,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Executor Deserialize CPU Time": 300,
       |    "Executor Run Time": 400,
       |    "Executor CPU Time": 400,
+      |    "Peak Execution Memory": 500,
       |    "Result Size": 500,
       |    "JVM GC Time": 600,
       |    "Result Serialization Time": 700,
@@ -1364,6 +1368,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Executor Deserialize CPU Time": 300,
       |    "Executor Run Time": 400,
       |    "Executor CPU Time": 400,
+      |    "Peak Execution Memory": 500,
       |    "Result Size": 500,
       |    "JVM GC Time": 600,
       |    "Result Serialization Time": 700,
@@ -1487,6 +1492,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Executor Deserialize CPU Time": 300,
       |    "Executor Run Time": 400,
       |    "Executor CPU Time": 400,
+      |    "Peak Execution Memory": 500,
       |    "Result Size": 500,
       |    "JVM GC Time": 600,
       |    "Result Serialization Time": 700,
@@ -2050,7 +2056,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |        {
       |          "ID": 9,
       |          "Name": "$PEAK_EXECUTION_MEMORY",
-      |          "Update": 0,
+      |          "Update": 500,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
diff --git a/core/src/test/scala/org/apache/spark/util/KeyLockSuite.scala b/core/src/test/scala/org/apache/spark/util/KeyLockSuite.scala
index 2169a0e4d442f..6888e492a8d33 100644
--- a/core/src/test/scala/org/apache/spark/util/KeyLockSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/KeyLockSuite.scala
@@ -49,7 +49,7 @@ class KeyLockSuite extends SparkFunSuite with TimeLimits {
     @volatile var e: Throwable = null
     val threads = (0 until numThreads).map { i =>
       new Thread() {
-        override def run(): Unit = try {
+        override def run(): Unit = {
           latch.await(foreverMs, TimeUnit.MILLISECONDS)
           keyLock.withLock(keys(i)) {
             var cur = numThreadsHoldingLock.get()
diff --git a/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala b/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
index 4b7164d8acbce..1efd399b5db68 100644
--- a/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/NextIteratorSuite.scala
@@ -81,7 +81,7 @@ class NextIteratorSuite extends SparkFunSuite with Matchers {
       }
     }
 
-    override def close() {
+    override def close(): Unit = {
       closeCalled += 1
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/util/PropertiesCloneBenchmark.scala b/core/src/test/scala/org/apache/spark/util/PropertiesCloneBenchmark.scala
new file mode 100644
index 0000000000000..0726886c70fe6
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/PropertiesCloneBenchmark.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import java.util.Properties
+
+import scala.util.Random
+
+import org.apache.commons.lang.SerializationUtils
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+
+
+/**
+ * Benchmark for Kryo Unsafe vs safe Serialization.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar>
+ *   2. build/sbt "core/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      Results will be written to "benchmarks/PropertiesCloneBenchmark-results.txt".
+ * }}}
+ */
+object PropertiesCloneBenchmark extends BenchmarkBase {
+  /**
+   * Benchmark various cases of cloning properties objects
+   */
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("Properties Cloning") {
+      def compareSerialization(name: String, props: Properties): Unit = {
+        val benchmark = new Benchmark(name, 1, output = output)
+        benchmark.addCase("SerializationUtils.clone") { _ =>
+          SerializationUtils.clone(props)
+        }
+        benchmark.addCase("Utils.cloneProperties") { _ =>
+          Utils.cloneProperties(props)
+        }
+        benchmark.run()
+      }
+      compareSerialization("Empty Properties", new Properties)
+      compareSerialization("System Properties", System.getProperties)
+      compareSerialization("Small Properties", makeRandomProps(10, 40, 100))
+      compareSerialization("Medium Properties", makeRandomProps(50, 40, 100))
+      compareSerialization("Large Properties", makeRandomProps(100, 40, 100))
+    }
+  }
+
+  def makeRandomProps(numProperties: Int, keySize: Int, valueSize: Int): Properties = {
+    val props = new Properties
+    for (_ <- 1 to numProperties) {
+      props.put(
+        Random.alphanumeric.take(keySize),
+        Random.alphanumeric.take(valueSize)
+      )
+    }
+    props
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala b/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
index 75e4504850679..0b1796540abbb 100644
--- a/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
+++ b/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
@@ -19,7 +19,6 @@ package org.apache.spark.util
 
 import java.util.Properties
 
-import org.apache.commons.lang3.SerializationUtils
 import org.scalatest.{BeforeAndAfterEach, Suite}
 
 /**
@@ -43,11 +42,11 @@ private[spark] trait ResetSystemProperties extends BeforeAndAfterEach { this: Su
   var oldProperties: Properties = null
 
   override def beforeEach(): Unit = {
-    // we need SerializationUtils.clone instead of `new Properties(System.getProperties())` because
+    // we need Utils.cloneProperties instead of `new Properties(System.getProperties())` because
     // the later way of creating a copy does not copy the properties but it initializes a new
     // Properties object with the given properties as defaults. They are not recognized at all
     // by standard Scala wrapper over Java Properties then.
-    oldProperties = SerializationUtils.clone(System.getProperties)
+    oldProperties = Utils.cloneProperties(System.getProperties)
     super.beforeEach()
   }
 
diff --git a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
index 8bc62db81e4f9..73bf7762f37c1 100644
--- a/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala
@@ -73,7 +73,7 @@ class SizeEstimatorSuite
   with PrivateMethodTester
   with ResetSystemProperties {
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     // Set the arch to 64-bit and compressedOops to true to get a deterministic test-case
     super.beforeEach()
     System.setProperty("os.arch", "amd64")
diff --git a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
index aa3f062e582c3..ac36e537c75bb 100644
--- a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
@@ -132,7 +132,7 @@ class ThreadUtilsSuite extends SparkFunSuite {
     val t = new Thread() {
       setDaemon(true)
 
-      override def run() {
+      override def run(): Unit = {
         try {
           // "par" is uninterruptible. The following will keep running even if the thread is
           // interrupted. We should prefer to use "ThreadUtils.parmap".
diff --git a/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
index 77a92e7e1eb43..1644540946839 100644
--- a/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/TimeStampedHashMapSuite.scala
@@ -63,7 +63,7 @@ class TimeStampedHashMapSuite extends SparkFunSuite {
   }
 
   /** Test basic operations of a Scala mutable Map. */
-  def testMap(hashMapConstructor: => mutable.Map[String, String]) {
+  def testMap(hashMapConstructor: => mutable.Map[String, String]): Unit = {
     def newMap() = hashMapConstructor
     val testMap1 = newMap()
     val testMap2 = newMap()
@@ -134,7 +134,7 @@ class TimeStampedHashMapSuite extends SparkFunSuite {
   }
 
   /** Test thread safety of a Scala mutable map. */
-  def testMapThreadSafety(hashMapConstructor: => mutable.Map[String, String]) {
+  def testMapThreadSafety(hashMapConstructor: => mutable.Map[String, String]): Unit = {
     def newMap() = hashMapConstructor
     val name = newMap().getClass.getSimpleName
     val testMap = newMap()
@@ -150,7 +150,7 @@ class TimeStampedHashMapSuite extends SparkFunSuite {
     }
 
     val threads = (1 to 25).map(i => new Thread() {
-      override def run() {
+      override def run(): Unit = {
         try {
           for (j <- 1 to 1000) {
             Random.nextInt(3) match {
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
index 2bad56d7ff424..a6de64b6c68a0 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSuite.scala
@@ -294,7 +294,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
    |  Helper methods that contain the test body  |
    * =========================================== */
 
-  private def emptyDataStream(conf: SparkConf) {
+  private def emptyDataStream(conf: SparkConf): Unit = {
     conf.set(SHUFFLE_MANAGER, "sort")
     sc = new SparkContext("local", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
@@ -327,7 +327,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     sorter4.stop()
   }
 
-  private def fewElementsPerPartition(conf: SparkConf) {
+  private def fewElementsPerPartition(conf: SparkConf): Unit = {
     conf.set(SHUFFLE_MANAGER, "sort")
     sc = new SparkContext("local", "test", conf)
     val context = MemoryTestingUtils.fakeTaskContext(sc.env)
@@ -368,7 +368,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     sorter4.stop()
   }
 
-  private def emptyPartitionsWithSpilling(conf: SparkConf) {
+  private def emptyPartitionsWithSpilling(conf: SparkConf): Unit = {
     val size = 1000
     conf.set(SHUFFLE_MANAGER, "sort")
     conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
@@ -393,7 +393,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     sorter.stop()
   }
 
-  private def testSpillingInLocalCluster(conf: SparkConf, numReduceTasks: Int) {
+  private def testSpillingInLocalCluster(conf: SparkConf, numReduceTasks: Int): Unit = {
     val size = 5000
     conf.set(SHUFFLE_MANAGER, "sort")
     conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 4)
@@ -517,7 +517,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
       conf: SparkConf,
       withPartialAgg: Boolean,
       withOrdering: Boolean,
-      withSpilling: Boolean) {
+      withSpilling: Boolean): Unit = {
     val size = 1000
     if (withSpilling) {
       conf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, size / 2)
@@ -551,7 +551,7 @@ class ExternalSorterSuite extends SparkFunSuite with LocalSparkContext {
     assert(results === expected)
   }
 
-  private def sortWithoutBreakingSortingContracts(conf: SparkConf) {
+  private def sortWithoutBreakingSortingContracts(conf: SparkConf): Unit = {
     val size = 100000
     val conf = createSparkConf(loadDefaults = true, kryo = false)
     conf.set(SHUFFLE_MANAGER, "sort")
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
index 4759a830da4ca..8aa4be6c2ff8d 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SizeTrackerSuite.scala
@@ -71,7 +71,7 @@ class SizeTrackerSuite extends SparkFunSuite {
     testMap[String, Int](10000, i => (randString(0, 10000), i))
   }
 
-  def testVector[T: ClassTag](numElements: Int, makeElement: Int => T) {
+  def testVector[T: ClassTag](numElements: Int, makeElement: Int => T): Unit = {
     val vector = new SizeTrackingVector[T]
     for (i <- 0 until numElements) {
       val item = makeElement(i)
@@ -80,7 +80,7 @@ class SizeTrackerSuite extends SparkFunSuite {
     }
   }
 
-  def testMap[K, V](numElements: Int, makeElement: (Int) => (K, V)) {
+  def testMap[K, V](numElements: Int, makeElement: (Int) => (K, V)): Unit = {
     val map = new SizeTrackingAppendOnlyMap[K, V]
     for (i <- 0 until numElements) {
       val (k, v) = makeElement(i)
@@ -89,7 +89,7 @@ class SizeTrackerSuite extends SparkFunSuite {
     }
   }
 
-  def expectWithinError(obj: AnyRef, estimatedSize: Long, error: Double) {
+  def expectWithinError(obj: AnyRef, estimatedSize: Long, error: Double): Unit = {
     val betterEstimatedSize = SizeEstimator.estimate(obj)
     assert(betterEstimatedSize * (1 - error) < estimatedSize,
       s"Estimated size $estimatedSize was less than expected size $betterEstimatedSize")
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
index e80bd96c982df..d1603b85a8e94 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
@@ -311,12 +311,13 @@ abstract class AbstractIntArraySortDataFormat[K] extends SortDataFormat[K, Array
     data(pos1) = tmp
   }
 
-  override def copyElement(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int) {
+  override def copyElement(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int): Unit = {
     dst(dstPos) = src(srcPos)
   }
 
   /** Copy a range of elements starting at src(srcPos) to dest, starting at destPos. */
-  override def copyRange(src: Array[Int], srcPos: Int, dst: Array[Int], dstPos: Int, length: Int) {
+  override def copyRange(src: Array[Int], srcPos: Int,
+      dst: Array[Int], dstPos: Int, length: Int): Unit = {
     System.arraycopy(src, srcPos, dst, dstPos, length)
   }
 
@@ -334,13 +335,13 @@ abstract class AbstractByteArraySortDataFormat[K] extends SortDataFormat[K, Arra
     data(pos1) = tmp
   }
 
-  override def copyElement(src: Array[Byte], srcPos: Int, dst: Array[Byte], dstPos: Int) {
+  override def copyElement(src: Array[Byte], srcPos: Int, dst: Array[Byte], dstPos: Int): Unit = {
     dst(dstPos) = src(srcPos)
   }
 
   /** Copy a range of elements starting at src(srcPos) to dest, starting at destPos. */
   override def copyRange(src: Array[Byte],
-                         srcPos: Int, dst: Array[Byte], dstPos: Int, length: Int) {
+                         srcPos: Int, dst: Array[Byte], dstPos: Int, length: Int): Unit = {
     System.arraycopy(src, srcPos, dst, dstPos, length)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
index 38cb37c524594..a55004f664a54 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
@@ -20,12 +20,12 @@ package org.apache.spark.util.collection.unsafe.sort
 import java.nio.charset.StandardCharsets
 
 import com.google.common.primitives.UnsignedBytes
-import org.scalatest.prop.PropertyChecks
+import org.scalatestplus.scalacheck.ScalaCheckPropertyChecks
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.unsafe.types.UTF8String
 
-class PrefixComparatorsSuite extends SparkFunSuite with PropertyChecks {
+class PrefixComparatorsSuite extends SparkFunSuite with ScalaCheckPropertyChecks {
 
   test("String prefix comparator") {
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
index a3c006b43d8e4..9ae6a8ef879f3 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
@@ -108,7 +108,8 @@ class RadixSortSuite extends SparkFunSuite with Logging {
     }
   }
 
-  private def referenceKeyPrefixSort(buf: LongArray, lo: Long, hi: Long, refCmp: PrefixComparator) {
+  private def referenceKeyPrefixSort(buf: LongArray, lo: Long, hi: Long,
+      refCmp: PrefixComparator): Unit = {
     val sortBuffer = new LongArray(MemoryBlock.fromLongArray(new Array[Long](buf.size().toInt)))
     new Sorter(new UnsafeSortDataFormat(sortBuffer)).sort(
       buf, Ints.checkedCast(lo), Ints.checkedCast(hi),
diff --git a/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala b/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala
index 973f71cdeb755..bd7ec242a9317 100644
--- a/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.util.logging
 
-import java.io.{BufferedInputStream, File, FileInputStream}
+import java.io.File
 
 import org.apache.commons.io.FileUtils
 
diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index d33a107cc86a5..e0976e66db29f 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -90,7 +90,7 @@ Invoke-Expression "7z.exe x maven.zip"
 # add maven to environment variables
 $env:PATH = "$tools\apache-maven-$mavenVer\bin;" + $env:PATH
 $env:M2_HOME = "$tools\apache-maven-$mavenVer"
-$env:MAVEN_OPTS = "-Xmx2g -XX:ReservedCodeCacheSize=512m"
+$env:MAVEN_OPTS = "-Xmx2g -XX:ReservedCodeCacheSize=1g"
 
 Pop-Location
 
diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh
index c1a122ebfb12e..f643c060eb321 100755
--- a/dev/create-release/do-release-docker.sh
+++ b/dev/create-release/do-release-docker.sh
@@ -127,6 +127,7 @@ GPG_KEY=$GPG_KEY
 ASF_PASSWORD=$ASF_PASSWORD
 GPG_PASSPHRASE=$GPG_PASSPHRASE
 RELEASE_STEP=$RELEASE_STEP
+USER=$USER
 EOF
 
 JAVA_VOL=
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index f35bc4f48652b..61951e73f4bab 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -164,7 +164,6 @@ DEST_DIR_NAME="$SPARK_PACKAGE_VERSION"
 
 git clean -d -f -x
 rm .gitignore
-rm -rf .git
 cd ..
 
 if [[ "$1" == "package" ]]; then
@@ -179,7 +178,7 @@ if [[ "$1" == "package" ]]; then
     rm -r spark-$SPARK_VERSION/licenses-binary
   fi
 
-  tar cvzf spark-$SPARK_VERSION.tgz spark-$SPARK_VERSION
+  tar cvzf spark-$SPARK_VERSION.tgz --exclude spark-$SPARK_VERSION/.git spark-$SPARK_VERSION
   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour --output spark-$SPARK_VERSION.tgz.asc \
     --detach-sig spark-$SPARK_VERSION.tgz
   echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \
diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7
index 96cc76d0f2abb..3d15fc627f6de 100644
--- a/dev/deps/spark-deps-hadoop-2.7
+++ b/dev/deps/spark-deps-hadoop-2.7
@@ -1,8 +1,11 @@
+JLargeArrays-1.5.jar
+JTransforms-3.1.jar
 JavaEWAH-0.3.2.jar
 RoaringBitmap-0.7.45.jar
 ST4-4.0.4.jar
 activation-1.1.1.jar
 aircompressor-0.10.jar
+algebra_2.12-2.0.0-M2.jar
 antlr-2.7.7.jar
 antlr-runtime-3.4.jar
 antlr4-runtime-4.7.1.jar
@@ -17,13 +20,15 @@ arpack_combined_all-0.1.jar
 arrow-format-0.12.0.jar
 arrow-memory-0.12.0.jar
 arrow-vector-0.12.0.jar
+audience-annotations-0.5.0.jar
 automaton-1.11-8.jar
 avro-1.8.2.jar
 avro-ipc-1.8.2.jar
 avro-mapred-1.8.2-hadoop2.jar
 bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.12-0.13.2.jar
-breeze_2.12-0.13.2.jar
+breeze-macros_2.12-1.0.jar
+breeze_2.12-1.0.jar
+cats-kernel_2.12-2.0.0-M4.jar
 chill-java-0.9.3.jar
 chill_2.12-0.9.3.jar
 commons-beanutils-1.9.3.jar
@@ -84,16 +89,16 @@ httpclient-4.5.6.jar
 httpcore-4.4.10.jar
 istack-commons-runtime-3.0.8.jar
 ivy-2.4.0.jar
-jackson-annotations-2.9.9.jar
-jackson-core-2.9.9.jar
+jackson-annotations-2.9.10.jar
+jackson-core-2.9.10.jar
 jackson-core-asl-1.9.13.jar
-jackson-databind-2.9.9.3.jar
-jackson-dataformat-yaml-2.9.9.jar
+jackson-databind-2.9.10.jar
+jackson-dataformat-yaml-2.9.10.jar
 jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations-2.9.9.jar
-jackson-module-paranamer-2.9.9.jar
-jackson-module-scala_2.12-2.9.9.jar
+jackson-module-jaxb-annotations-2.9.10.jar
+jackson-module-paranamer-2.9.10.jar
+jackson-module-scala_2.12-2.9.10.jar
 jackson-xc-1.9.13.jar
 jakarta.annotation-api-1.3.4.jar
 jakarta.inject-2.5.0.jar
@@ -130,7 +135,6 @@ json4s-scalap_2.12-3.6.6.jar
 jsp-api-2.1.jar
 jsr305-3.0.0.jar
 jta-1.1.jar
-jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
 kryo-shaded-4.0.2.jar
 kubernetes-client-4.4.2.jar
@@ -142,7 +146,7 @@ libthrift-0.12.0.jar
 log4j-1.2.17.jar
 logging-interceptor-3.12.0.jar
 lz4-java-1.6.0.jar
-machinist_2.12-0.6.1.jar
+machinist_2.12-0.6.8.jar
 macro-compat_2.12-1.1.1.jar
 mesos-1.4.0-shaded-protobuf.jar
 metrics-core-3.1.5.jar
@@ -156,9 +160,9 @@ okapi-shade-0.4.2.jar
 okhttp-3.8.1.jar
 okio-1.13.0.jar
 opencsv-2.3.jar
-orc-core-1.5.5-nohive.jar
-orc-mapreduce-1.5.5-nohive.jar
-orc-shims-1.5.5.jar
+orc-core-1.5.6-nohive.jar
+orc-mapreduce-1.5.6-nohive.jar
+orc-shims-1.5.6.jar
 oro-2.0.8.jar
 osgi-resource-locator-1.0.3.jar
 paranamer-2.8.jar
@@ -172,20 +176,23 @@ parquet-jackson-1.10.1.jar
 protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
 pyrolite-4.30.jar
-scala-compiler-2.12.8.jar
-scala-library-2.12.8.jar
-scala-parser-combinators_2.12-1.1.0.jar
-scala-reflect-2.12.8.jar
+scala-collection-compat_2.12-2.1.1.jar
+scala-compiler-2.12.10.jar
+scala-library-2.12.10.jar
+scala-parser-combinators_2.12-1.1.2.jar
+scala-reflect-2.12.10.jar
 scala-xml_2.12-1.2.0.jar
-shapeless_2.12-2.3.2.jar
+shapeless_2.12-2.3.3.jar
 shims-0.7.45.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snakeyaml-1.23.jar
 snappy-0.2.jar
 snappy-java-1.1.7.3.jar
-spire-macros_2.12-0.13.0.jar
-spire_2.12-0.13.0.jar
+spire-macros_2.12-0.17.0-M1.jar
+spire-platform_2.12-0.17.0-M1.jar
+spire-util_2.12-0.17.0-M1.jar
+spire_2.12-0.17.0-M1.jar
 stax-api-1.0-2.jar
 stax-api-1.0.1.jar
 stream-2.9.6.jar
@@ -198,5 +205,5 @@ xercesImpl-2.9.1.jar
 xmlenc-0.52.jar
 xz-1.5.jar
 zjsonpatch-0.3.0.jar
-zookeeper-3.4.6.jar
-zstd-jni-1.4.2-1.jar
+zookeeper-3.4.14.jar
+zstd-jni-1.4.3-1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2 b/dev/deps/spark-deps-hadoop-3.2
index a3a5b51226462..6318217d4332f 100644
--- a/dev/deps/spark-deps-hadoop-3.2
+++ b/dev/deps/spark-deps-hadoop-3.2
@@ -1,9 +1,12 @@
+JLargeArrays-1.5.jar
+JTransforms-3.1.jar
 JavaEWAH-0.3.2.jar
 RoaringBitmap-0.7.45.jar
 ST4-4.0.4.jar
 accessors-smart-1.2.jar
 activation-1.1.1.jar
 aircompressor-0.10.jar
+algebra_2.12-2.0.0-M2.jar
 antlr-2.7.7.jar
 antlr-runtime-3.4.jar
 antlr4-runtime-4.7.1.jar
@@ -20,8 +23,9 @@ avro-1.8.2.jar
 avro-ipc-1.8.2.jar
 avro-mapred-1.8.2-hadoop2.jar
 bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.12-0.13.2.jar
-breeze_2.12-0.13.2.jar
+breeze-macros_2.12-1.0.jar
+breeze_2.12-1.0.jar
+cats-kernel_2.12-2.0.0-M4.jar
 chill-java-0.9.3.jar
 chill_2.12-0.9.3.jar
 commons-beanutils-1.9.3.jar
@@ -85,17 +89,17 @@ httpclient-4.5.6.jar
 httpcore-4.4.10.jar
 istack-commons-runtime-3.0.8.jar
 ivy-2.4.0.jar
-jackson-annotations-2.9.9.jar
-jackson-core-2.9.9.jar
+jackson-annotations-2.9.10.jar
+jackson-core-2.9.10.jar
 jackson-core-asl-1.9.13.jar
-jackson-databind-2.9.9.3.jar
-jackson-dataformat-yaml-2.9.9.jar
+jackson-databind-2.9.10.jar
+jackson-dataformat-yaml-2.9.10.jar
 jackson-jaxrs-base-2.9.5.jar
 jackson-jaxrs-json-provider-2.9.5.jar
 jackson-mapper-asl-1.9.13.jar
-jackson-module-jaxb-annotations-2.9.9.jar
-jackson-module-paranamer-2.9.9.jar
-jackson-module-scala_2.12-2.9.9.jar
+jackson-module-jaxb-annotations-2.9.10.jar
+jackson-module-paranamer-2.9.10.jar
+jackson-module-scala_2.12-2.9.10.jar
 jakarta.annotation-api-1.3.4.jar
 jakarta.inject-2.5.0.jar
 jakarta.ws.rs-api-2.1.5.jar
@@ -132,7 +136,6 @@ json4s-scalap_2.12-3.6.6.jar
 jsp-api-2.1.jar
 jsr305-3.0.0.jar
 jta-1.1.jar
-jtransforms-2.4.0.jar
 jul-to-slf4j-1.7.16.jar
 kerb-admin-1.0.1.jar
 kerb-client-1.0.1.jar
@@ -158,7 +161,7 @@ libthrift-0.12.0.jar
 log4j-1.2.17.jar
 logging-interceptor-3.12.0.jar
 lz4-java-1.6.0.jar
-machinist_2.12-0.6.1.jar
+machinist_2.12-0.6.8.jar
 macro-compat_2.12-1.1.1.jar
 mesos-1.4.0-shaded-protobuf.jar
 metrics-core-3.1.5.jar
@@ -175,9 +178,9 @@ okhttp-2.7.5.jar
 okhttp-3.8.1.jar
 okio-1.13.0.jar
 opencsv-2.3.jar
-orc-core-1.5.5-nohive.jar
-orc-mapreduce-1.5.5-nohive.jar
-orc-shims-1.5.5.jar
+orc-core-1.5.6-nohive.jar
+orc-mapreduce-1.5.6-nohive.jar
+orc-shims-1.5.6.jar
 oro-2.0.8.jar
 osgi-resource-locator-1.0.3.jar
 paranamer-2.8.jar
@@ -191,20 +194,23 @@ protobuf-java-2.5.0.jar
 py4j-0.10.8.1.jar
 pyrolite-4.30.jar
 re2j-1.1.jar
-scala-compiler-2.12.8.jar
-scala-library-2.12.8.jar
-scala-parser-combinators_2.12-1.1.0.jar
-scala-reflect-2.12.8.jar
+scala-collection-compat_2.12-2.1.1.jar
+scala-compiler-2.12.10.jar
+scala-library-2.12.10.jar
+scala-parser-combinators_2.12-1.1.2.jar
+scala-reflect-2.12.10.jar
 scala-xml_2.12-1.2.0.jar
-shapeless_2.12-2.3.2.jar
+shapeless_2.12-2.3.3.jar
 shims-0.7.45.jar
 slf4j-api-1.7.16.jar
 slf4j-log4j12-1.7.16.jar
 snakeyaml-1.23.jar
 snappy-0.2.jar
 snappy-java-1.1.7.3.jar
-spire-macros_2.12-0.13.0.jar
-spire_2.12-0.13.0.jar
+spire-macros_2.12-0.17.0-M1.jar
+spire-platform_2.12-0.17.0-M1.jar
+spire-util_2.12-0.17.0-M1.jar
+spire_2.12-0.17.0-M1.jar
 stax-api-1.0.1.jar
 stax2-api-3.1.4.jar
 stream-2.9.6.jar
@@ -217,5 +223,5 @@ woodstox-core-5.0.3.jar
 xbean-asm7-shaded-4.14.jar
 xz-1.5.jar
 zjsonpatch-0.3.0.jar
-zookeeper-3.4.13.jar
-zstd-jni-1.4.2-1.jar
+zookeeper-3.4.14.jar
+zstd-jni-1.4.3-1.jar
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index a550af93feecd..cd18b6870e07c 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -160,7 +160,7 @@ fi
 # Build uber fat JAR
 cd "$SPARK_HOME"
 
-export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:ReservedCodeCacheSize=512m}"
+export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:ReservedCodeCacheSize=1g}"
 
 # Store the command as an array because $MVN variable might have spaces in it.
 # Normal quoting tricks don't work.
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index fa3d50b8989f1..967cdace60dc9 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -97,9 +97,9 @@ def fail(msg):
 def run_cmd(cmd):
     print(cmd)
     if isinstance(cmd, list):
-        return subprocess.check_output(cmd).decode(sys.getdefaultencoding())
+        return subprocess.check_output(cmd).decode('utf-8')
     else:
-        return subprocess.check_output(cmd.split(" ")).decode(sys.getdefaultencoding())
+        return subprocess.check_output(cmd.split(" ")).decode('utf-8')
 
 
 def continue_maybe(prompt):
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 0f6dbf2f99a97..c7ea065b28ed8 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -362,7 +362,6 @@ def __hash__(self):
         "pyspark.sql.window",
         "pyspark.sql.avro.functions",
         # unittests
-        "pyspark.sql.tests.test_appsubmit",
         "pyspark.sql.tests.test_arrow",
         "pyspark.sql.tests.test_catalog",
         "pyspark.sql.tests.test_column",
@@ -373,6 +372,7 @@ def __hash__(self):
         "pyspark.sql.tests.test_functions",
         "pyspark.sql.tests.test_group",
         "pyspark.sql.tests.test_pandas_udf",
+        "pyspark.sql.tests.test_pandas_udf_cogrouped_map",
         "pyspark.sql.tests.test_pandas_udf_grouped_agg",
         "pyspark.sql.tests.test_pandas_udf_grouped_map",
         "pyspark.sql.tests.test_pandas_udf_scalar",
diff --git a/docs/_config.yml b/docs/_config.yml
index 146c90fcff6e5..57b8d716ee55c 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -17,7 +17,7 @@ include:
 SPARK_VERSION: 3.0.0-SNAPSHOT
 SPARK_VERSION_SHORT: 3.0.0
 SCALA_BINARY_VERSION: "2.12"
-SCALA_VERSION: "2.12.8"
+SCALA_VERSION: "2.12.10"
 MESOS_VERSION: 1.0.0
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
diff --git a/docs/_data/menu-migration.yaml b/docs/_data/menu-migration.yaml
new file mode 100644
index 0000000000000..1d8b311dd64fb
--- /dev/null
+++ b/docs/_data/menu-migration.yaml
@@ -0,0 +1,12 @@
+- text: Spark Core
+  url: core-migration-guide.html
+- text: SQL, Datasets and DataFrame
+  url: sql-migration-guide.html
+- text: Structured Streaming
+  url: ss-migration-guide.html
+- text: MLlib (Machine Learning)
+  url: ml-migration-guide.html
+- text: PySpark (Python on Spark)
+  url: pyspark-migration-guide.html
+- text: SparkR (R on Spark)
+  url: sparkr-migration-guide.html
diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index 717911b5a4645..edcdad4ee7db5 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -64,20 +64,14 @@
     - text: Usage Notes
       url: sql-pyspark-pandas-with-arrow.html#usage-notes
 - text: Migration Guide
-  url: sql-migration-guide.html
-  subitems:
-    - text: Spark SQL Upgrading Guide
-      url: sql-migration-guide-upgrade.html
-    - text: Compatibility with Apache Hive
-      url: sql-migration-guide-hive-compatibility.html
-    - text: SQL Reserved/Non-Reserved Keywords
-      url: sql-reserved-and-non-reserved-keywords.html
-
+  url: sql-migration-old.html
 - text: SQL Reference
   url: sql-ref.html
   subitems:
     - text: Data Types
       url: sql-ref-datatypes.html
+    - text: Null Semantics
+      url: sql-ref-null-semantics.html
     - text: NaN Semantics
       url: sql-ref-nan-semantics.html
     - text: SQL Syntax
@@ -139,6 +133,8 @@
                   url: sql-ref-syntax-qry-select-limit.html
                 - text: Set operations
                   url: sql-ref-syntax-qry-select-setops.html
+                - text: USE database
+                  url: sql-ref-syntax-qry-select-usedb.html
                 - text: Common Table Expression(CTE)
                   url: sql-ref-syntax-qry-select-cte.html
                 - text: Subqueries
@@ -170,6 +166,8 @@
                   url: sql-ref-syntax-aux-cache-uncache-table.html
                 - text: CLEAR CACHE
                   url: sql-ref-syntax-aux-cache-clear-cache.html
+                - text: REFRESH TABLE
+                  url: sql-ref-syntax-aux-refresh-table.html
             - text: Describe Commands
               url: sql-ref-syntax-aux-describe.html
               subitems:
diff --git a/docs/_includes/nav-left-wrapper-migration.html b/docs/_includes/nav-left-wrapper-migration.html
new file mode 100644
index 0000000000000..4318a324a9475
--- /dev/null
+++ b/docs/_includes/nav-left-wrapper-migration.html
@@ -0,0 +1,6 @@
+<div class="left-menu-wrapper">
+    <div class="left-menu">
+        <h3><a href="migration-guide.html">Migration Guide</a></h3>
+        {% include nav-left.html nav=include.nav-migration %}
+    </div>
+</div>
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 8ea15dc71d541..d5fb18bfb06c0 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -112,6 +112,7 @@
                                 <li><a href="job-scheduling.html">Job Scheduling</a></li>
                                 <li><a href="security.html">Security</a></li>
                                 <li><a href="hardware-provisioning.html">Hardware Provisioning</a></li>
+                                <li><a href="migration-guide.html">Migration Guide</a></li>
                                 <li class="divider"></li>
                                 <li><a href="building-spark.html">Building Spark</a></li>
                                 <li><a href="https://spark.apache.org/contributing.html">Contributing to Spark</a></li>
@@ -126,8 +127,10 @@
 
         <div class="container-wrapper">
 
-            {% if page.url contains "/ml" or page.url contains "/sql" %}
-                {% if page.url contains "/ml" %}
+            {% if page.url contains "/ml" or page.url contains "/sql" or page.url contains "migration-guide.html" %}
+                {% if page.url contains "migration-guide.html" %}
+                    {% include nav-left-wrapper-migration.html nav-migration=site.data.menu-migration %}
+                {% elsif page.url contains "/ml" %}
                     {% include nav-left-wrapper-ml.html nav-mllib=site.data.menu-mllib nav-ml=site.data.menu-ml %}
                 {% else %}
                     {% include nav-left-wrapper-sql.html nav-sql=site.data.menu-sql %}
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 37f898645da68..13f848eff88db 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -34,7 +34,7 @@ Spark requires Scala 2.12; support for Scala 2.11 was removed in Spark 3.0.0.
 
 You'll need to configure Maven to use more memory than usual by setting `MAVEN_OPTS`:
 
-    export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
+    export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
 
 (The `ReservedCodeCacheSize` setting is optional but recommended.)
 If you don't add these parameters to `MAVEN_OPTS`, you may see errors and warnings like the following:
@@ -82,7 +82,7 @@ Example:
 ## Building With Hive and JDBC Support
 
 To enable Hive integration for Spark SQL along with its JDBC server and CLI,
-add the `-Phive` and `Phive-thriftserver` profiles to your existing build options.
+add the `-Phive` and `-Phive-thriftserver` profiles to your existing build options.
 By default, Spark will use Hive 1.2.1 with the `hadoop-2.7` profile, and Hive 2.3.6 with the `hadoop-3.2` profile.
 
     # With Hive 1.2.1 support
@@ -160,7 +160,7 @@ prompt.
 Configure the JVM options for SBT in `.jvmopts` at the project root, for example:
 
     -Xmx2g
-    -XX:ReservedCodeCacheSize=512m
+    -XX:ReservedCodeCacheSize=1g
 
 For the meanings of these two options, please carefully read the [Setting up Maven's Memory Usage section](https://spark.apache.org/docs/latest/building-spark.html#setting-up-mavens-memory-usage).
 
diff --git a/docs/cloud-integration.md b/docs/cloud-integration.md
index a8d40fe7456e4..b2a3e77f1ee9d 100644
--- a/docs/cloud-integration.md
+++ b/docs/cloud-integration.md
@@ -257,4 +257,5 @@ Here is the documentation on the standard connectors both from Apache and the cl
 * [Amazon EMR File System (EMRFS)](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-fs.html). From Amazon
 * [Google Cloud Storage Connector for Spark and Hadoop](https://cloud.google.com/hadoop/google-cloud-storage-connector). From Google
 * [The Azure Blob Filesystem driver (ABFS)](https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-abfs-driver)
+* IBM Cloud Object Storage connector for Apache Spark: [Stocator](https://github.com/CODAIT/stocator), [IBM Object Storage](https://www.ibm.com/cloud/object-storage), [how-to-use-connector](https://developer.ibm.com/code/2018/08/16/installing-running-stocator-apache-spark-ibm-cloud-object-storage). From IBM
 
diff --git a/docs/configuration.md b/docs/configuration.md
index 9933283cdad87..729b1ba7ed2ca 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -230,7 +230,7 @@ of the most common options to set are:
     write to STDOUT a JSON string in the format of the ResourceInformation class. This has a
     name and an array of addresses. For a client-submitted driver in Standalone, discovery
     script must assign different resource addresses to this driver comparing to workers' and
-    other dirvers' when <code>spark.resources.coordinate.enable</code> is off.
+    other drivers' when <code>spark.resources.coordinate.enable</code> is off.
   </td>
 </tr>
 <tr>
@@ -411,6 +411,16 @@ of the most common options to set are:
     use the default layout.
   </td>
 </tr>
+<tr>
+  <td><code>spark.driver.log.allowErasureCoding</code></td>
+  <td>false</td>
+  <td>
+    Whether to allow driver logs to use erasure coding.  On HDFS, erasure coded files will not
+    update as quickly as regular replicated files, so they make take longer to reflect changes
+    written by the application. Note that even if this is true, Spark will still not force the
+    file to use erasure coding, it will simply use file system defaults.
+  </td>
+</tr>
 </table>
 
 Apart from these, the following properties are also available, and may be useful in some situations:
@@ -866,7 +876,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.service.index.cache.size</code></td>
   <td>100m</td>
   <td>
-    Cache entries limited to the specified memory footprint in bytes.
+    Cache entries limited to the specified memory footprint, in bytes unless otherwise specified.
   </td>
 </tr>
 <tr>
@@ -1207,16 +1217,18 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.io.compression.lz4.blockSize</code></td>
   <td>32k</td>
   <td>
-    Block size in bytes used in LZ4 compression, in the case when LZ4 compression codec
+    Block size used in LZ4 compression, in the case when LZ4 compression codec
     is used. Lowering this block size will also lower shuffle memory usage when LZ4 is used.
+    Default unit is bytes, unless otherwise specified.
   </td>
 </tr>
 <tr>
   <td><code>spark.io.compression.snappy.blockSize</code></td>
   <td>32k</td>
   <td>
-    Block size in bytes used in Snappy compression, in the case when Snappy compression codec
-    is used. Lowering this block size will also lower shuffle memory usage when Snappy is used.
+    Block size in Snappy compression, in the case when Snappy compression codec is used. 
+    Lowering this block size will also lower shuffle memory usage when Snappy is used.
+    Default unit is bytes, unless otherwise specified.
   </td>
 </tr>
 <tr>
@@ -1384,7 +1396,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.memory.offHeap.size</code></td>
   <td>0</td>
   <td>
-    The absolute amount of memory in bytes which can be used for off-heap allocation.
+    The absolute amount of memory which can be used for off-heap allocation, in bytes unless otherwise specified.
     This setting has no impact on heap memory usage, so if your executors' total memory consumption 
     must fit within some hard limit then be sure to shrink your JVM heap size accordingly.
     This must be set to a positive value when <code>spark.memory.offHeap.enabled=true</code>.
@@ -1568,9 +1580,9 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.storage.memoryMapThreshold</code></td>
   <td>2m</td>
   <td>
-    Size in bytes of a block above which Spark memory maps when reading a block from disk.
-    This prevents Spark from memory mapping very small blocks. In general, memory
-    mapping has high overhead for blocks close to or below the page size of the operating system.
+    Size of a block above which Spark memory maps when reading a block from disk. Default unit is bytes,
+    unless specified otherwise. This prevents Spark from memory mapping very small blocks. In general, 
+    memory mapping has high overhead for blocks close to or below the page size of the operating system.
   </td>
 </tr>
 <tr>
@@ -2596,11 +2608,14 @@ You can copy and modify `hdfs-site.xml`, `core-site.xml`, `yarn-site.xml`, `hive
 Spark's classpath for each application. In a Spark cluster running on YARN, these configuration
 files are set cluster-wide, and cannot safely be changed by the application.
 
-The better choice is to use spark hadoop properties in the form of `spark.hadoop.*`. 
+The better choice is to use spark hadoop properties in the form of `spark.hadoop.*`, and use
+spark hive properties in the form of `spark.hive.*`.
+For example, adding configuration "spark.hadoop.abc.def=xyz" represents adding hadoop property "abc.def=xyz",
+and adding configuration "spark.hive.abc=xyz" represents adding hive property "hive.abc=xyz".
 They can be considered as same as normal spark properties which can be set in `$SPARK_HOME/conf/spark-defaults.conf`
 
 In some cases, you may want to avoid hard-coding certain configurations in a `SparkConf`. For
-instance, Spark allows you to simply create an empty conf and set spark/spark hadoop properties.
+instance, Spark allows you to simply create an empty conf and set spark/spark hadoop/spark hive properties.
 
 {% highlight scala %}
 val conf = new SparkConf().set("spark.hadoop.abc.def", "xyz")
@@ -2614,6 +2629,19 @@ Also, you can modify or add configurations at runtime:
   --master local[4] \  
   --conf spark.eventLog.enabled=false \ 
   --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" \ 
-  --conf spark.hadoop.abc.def=xyz \ 
+  --conf spark.hadoop.abc.def=xyz \
+  --conf spark.hive.abc=xyz
   myApp.jar
 {% endhighlight %}
+
+# Custom Resource Scheduling and Configuration Overview
+
+GPUs and other accelerators have been widely used for accelerating special workloads, e.g.,
+deep learning and signal processing. Spark now supports requesting and scheduling generic resources, such as GPUs, with a few caveats. The current implementation requires that the resource have addresses that can be allocated by the scheduler. It requires your cluster manager to support and be properly configured with the resources.
+
+There are configurations available to request resources for the driver: <code>spark.driver.resource.{resourceName}.amount</code>, request resources for the executor(s): <code>spark.executor.resource.{resourceName}.amount</code> and specify the requirements for each task: <code>spark.task.resource.{resourceName}.amount</code>. The <code>spark.driver.resource.{resourceName}.discoveryScript</code> config is required on YARN, Kubernetes and a client side Driver on Spark Standalone. <code>spark.driver.executor.{resourceName}.discoveryScript</code> config is required for YARN and Kubernetes. Kubernetes also requires <code>spark.driver.resource.{resourceName}.vendor</code> and/or <code>spark.executor.resource.{resourceName}.vendor</code>. See the config descriptions above for more information on each.
+
+Spark will use the configurations specified to first request containers with the corresponding resources from the cluster manager. Once it gets the container, Spark launches an Executor in that container which will discover what resources the container has and the addresses associated with each resource. The Executor will register with the Driver and report back the resources available to that Executor. The Spark scheduler can then schedule tasks to each Executor and assign specific resource addresses based on the resource requirements the user specified. The user can see the resources assigned to a task using the <code>TaskContext.get().resources</code> api. On the driver, the user can see the resources assigned with the SparkContext <code>resources</code> call. It's then up to the user to use the assignedaddresses to do the processing they want or pass those into the ML/AI framework they are using.
+
+See your cluster manager specific page for requirements and details on each of - [YARN](running-on-yarn.html#resource-allocation-and-configuration-overview), [Kubernetes](running-on-kubernetes.html#resource-allocation-and-configuration-overview) and [Standalone Mode](spark-standalone.html#resource-allocation-and-configuration-overview). It is currently not available with Mesos or local mode. If using local-cluster mode see the Spark Standalone documentation but be aware only a single worker resources file or discovery script can be specified the is shared by all the Workers so you should enable resource coordination (see <code>spark.resources.coordinate.enable</code>).
+
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
new file mode 100644
index 0000000000000..2d4d91dab075e
--- /dev/null
+++ b/docs/core-migration-guide.md
@@ -0,0 +1,33 @@
+---
+layout: global
+title: "Migration Guide: Spark Core"
+displayTitle: "Migration Guide: Spark Core"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* Table of contents
+{:toc}
+
+## Upgrading from Core 2.4 to 3.0
+
+- In Spark 3.0, deprecated method `TaskContext.isRunningLocally` has been removed. Local execution was removed and it always has returned `false`.
+
+- In Spark 3.0, deprecated method `shuffleBytesWritten`, `shuffleWriteTime` and `shuffleRecordsWritten` in `ShuffleWriteMetrics` have been removed. Instead, use `bytesWritten`, `writeTime ` and `recordsWritten` respectively.
+
+- In Spark 3.0, deprecated method `AccumulableInfo.apply` have been removed because creating `AccumulableInfo` is disallowed.
+
+- In Spark 3.0, event log file will be written as UTF-8 encoding, and Spark History Server will replay event log files as UTF-8 encoding. Previously Spark writes event log file as default charset of driver JVM process, so Spark History Server of Spark 2.x is needed to read the old event log files in case of incompatible encoding.
\ No newline at end of file
diff --git a/docs/img/JDBCServer1.png b/docs/img/JDBCServer1.png
new file mode 100644
index 0000000000000..c568b199353ae
Binary files /dev/null and b/docs/img/JDBCServer1.png differ
diff --git a/docs/img/JDBCServer2.png b/docs/img/JDBCServer2.png
new file mode 100644
index 0000000000000..84008c78ef269
Binary files /dev/null and b/docs/img/JDBCServer2.png differ
diff --git a/docs/img/JDBCServer3.png b/docs/img/JDBCServer3.png
new file mode 100644
index 0000000000000..772c3cfdeb967
Binary files /dev/null and b/docs/img/JDBCServer3.png differ
diff --git a/docs/index.md b/docs/index.md
index 4217918a87462..edb1c421fb794 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -46,7 +46,7 @@ Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS). It's easy
 locally on one machine --- all you need is to have `java` installed on your system `PATH`,
 or the `JAVA_HOME` environment variable pointing to a Java installation.
 
-Spark runs on Java 8, Scala 2.12, Python 2.7+/3.4+ and R 3.1+.
+Spark runs on Java 8/11, Scala 2.12, Python 2.7+/3.4+ and R 3.1+.
 Python 2 support is deprecated as of Spark 3.0.0.
 R prior to version 3.4 support is deprecated as of Spark 3.0.0.
 For the Scala API, Spark {{site.SPARK_VERSION}}
@@ -146,6 +146,7 @@ options for deployment:
 * Integration with other storage systems:
   * [Cloud Infrastructures](cloud-integration.html)
   * [OpenStack Swift](storage-openstack-swift.html)
+* [Migration Guide](migration-guide.html): Migration guides for Spark components
 * [Building Spark](building-spark.html): build Spark using the Maven system
 * [Contributing to Spark](https://spark.apache.org/contributing.html)
 * [Third Party Projects](https://spark.apache.org/third-party-projects.html): related third party Spark projects
diff --git a/docs/migration-guide.md b/docs/migration-guide.md
new file mode 100644
index 0000000000000..9ca0ada37a2fe
--- /dev/null
+++ b/docs/migration-guide.md
@@ -0,0 +1,30 @@
+---
+layout: global
+title: Migration Guide
+displayTitle: Migration Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+This page documents sections of the migration guide for each component in order
+for users to migrate effectively.
+
+* [Spark Core](core-migration-guide.html)
+* [SQL, Datasets, and DataFrame](sql-migration-guide.html)
+* [Structured Streaming](ss-migration-guide.html)
+* [MLlib (Machine Learning)](ml-migration-guide.html)
+* [PySpark (Python on Spark)](pyspark-migration-guide.html)
+* [SparkR (R on Spark)](sparkr-migration-guide.html)
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index 4661d6cd87c04..7b4fa4f651e64 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -113,68 +113,7 @@ transforming multiple columns.
 * Robust linear regression with Huber loss
 ([SPARK-3181](https://issues.apache.org/jira/browse/SPARK-3181)).
 
-# Migration guide
+# Migration Guide
 
-MLlib is under active development.
-The APIs marked `Experimental`/`DeveloperApi` may change in future releases,
-and the migration guide below will explain all changes between releases.
+The migration guide is now archived [on this page](ml-migration-guide.html).
 
-## From 2.4 to 3.0
-
-### Breaking changes
-
-* `OneHotEncoder` which is deprecated in 2.3, is removed in 3.0 and `OneHotEncoderEstimator` is now renamed to `OneHotEncoder`.
-
-### Changes of behavior
-
-* [SPARK-11215](https://issues.apache.org/jira/browse/SPARK-11215):
- In Spark 2.4 and previous versions, when specifying `frequencyDesc` or `frequencyAsc` as
- `stringOrderType` param in `StringIndexer`, in case of equal frequency, the order of
- strings is undefined. Since Spark 3.0, the strings with equal frequency are further
- sorted by alphabet. And since Spark 3.0, `StringIndexer` supports encoding multiple
- columns.
-
-## From 2.2 to 2.3
-
-### Breaking changes
-
-* The class and trait hierarchy for logistic regression model summaries was changed to be cleaner
-and better accommodate the addition of the multi-class summary. This is a breaking change for user
-code that casts a `LogisticRegressionTrainingSummary` to a
-`BinaryLogisticRegressionTrainingSummary`. Users should instead use the `model.binarySummary`
-method. See [SPARK-17139](https://issues.apache.org/jira/browse/SPARK-17139) for more detail 
-(_note_ this is an `Experimental` API). This _does not_ affect the Python `summary` method, which
-will still work correctly for both multinomial and binary cases.
-
-### Deprecations and changes of behavior
-
-**Deprecations**
-
-* `OneHotEncoder` has been deprecated and will be removed in `3.0`. It has been replaced by the
-new [`OneHotEncoderEstimator`](ml-features.html#onehotencoderestimator)
-(see [SPARK-13030](https://issues.apache.org/jira/browse/SPARK-13030)). **Note** that
-`OneHotEncoderEstimator` will be renamed to `OneHotEncoder` in `3.0` (but
-`OneHotEncoderEstimator` will be kept as an alias).
-
-**Changes of behavior**
-
-* [SPARK-21027](https://issues.apache.org/jira/browse/SPARK-21027):
- The default parallelism used in `OneVsRest` is now set to 1 (i.e. serial). In `2.2` and
- earlier versions, the level of parallelism was set to the default threadpool size in Scala.
-* [SPARK-22156](https://issues.apache.org/jira/browse/SPARK-22156):
- The learning rate update for `Word2Vec` was incorrect when `numIterations` was set greater than
- `1`. This will cause training results to be different between `2.3` and earlier versions.
-* [SPARK-21681](https://issues.apache.org/jira/browse/SPARK-21681):
- Fixed an edge case bug in multinomial logistic regression that resulted in incorrect coefficients
- when some features had zero variance.
-* [SPARK-16957](https://issues.apache.org/jira/browse/SPARK-16957):
- Tree algorithms now use mid-points for split values. This may change results from model training.
-* [SPARK-14657](https://issues.apache.org/jira/browse/SPARK-14657):
- Fixed an issue where the features generated by `RFormula` without an intercept were inconsistent
- with the output in R. This may change results from model training in this scenario.
-  
-## Previous Spark versions
-
-Earlier migration guides are archived [on this page](ml-migration-guides.html).
-
----
diff --git a/docs/ml-migration-guides.md b/docs/ml-migration-guide.md
similarity index 85%
rename from docs/ml-migration-guides.md
rename to docs/ml-migration-guide.md
index 99edd9bd69efa..9e8cd3e07b1ee 100644
--- a/docs/ml-migration-guides.md
+++ b/docs/ml-migration-guide.md
@@ -1,8 +1,7 @@
 ---
 layout: global
-title: Old Migration Guides - MLlib
-displayTitle: Old Migration Guides - MLlib
-description: MLlib migration guides from before Spark SPARK_VERSION_SHORT
+title: "Migration Guide: MLlib (Machine Learning)"
+displayTitle: "Migration Guide: MLlib (Machine Learning)"
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -20,15 +19,80 @@ license: |
   limitations under the License.
 ---
 
-The migration guide for the current Spark version is kept on the [MLlib Guide main page](ml-guide.html#migration-guide).
+* Table of contents
+{:toc}
 
-## From 2.1 to 2.2
+Note that this migration guide describes the items specific to MLlib.
+Many items of SQL migration can be applied when migrating MLlib to higher versions for DataFrame-based APIs.
+Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
+
+## Upgrading from MLlib 2.4 to 3.0
+
+### Breaking changes
+{:.no_toc}
+
+* `OneHotEncoder` which is deprecated in 2.3, is removed in 3.0 and `OneHotEncoderEstimator` is now renamed to `OneHotEncoder`.
+
+### Changes of behavior
+{:.no_toc}
+
+* [SPARK-11215](https://issues.apache.org/jira/browse/SPARK-11215):
+ In Spark 2.4 and previous versions, when specifying `frequencyDesc` or `frequencyAsc` as
+ `stringOrderType` param in `StringIndexer`, in case of equal frequency, the order of
+ strings is undefined. Since Spark 3.0, the strings with equal frequency are further
+ sorted by alphabet. And since Spark 3.0, `StringIndexer` supports encoding multiple
+ columns.
+
+## Upgrading from MLlib 2.2 to 2.3
+
+### Breaking changes
+{:.no_toc}
+
+* The class and trait hierarchy for logistic regression model summaries was changed to be cleaner
+and better accommodate the addition of the multi-class summary. This is a breaking change for user
+code that casts a `LogisticRegressionTrainingSummary` to a
+`BinaryLogisticRegressionTrainingSummary`. Users should instead use the `model.binarySummary`
+method. See [SPARK-17139](https://issues.apache.org/jira/browse/SPARK-17139) for more detail
+(_note_ this is an `Experimental` API). This _does not_ affect the Python `summary` method, which
+will still work correctly for both multinomial and binary cases.
+
+### Deprecations and changes of behavior
+{:.no_toc}
+
+**Deprecations**
+
+* `OneHotEncoder` has been deprecated and will be removed in `3.0`. It has been replaced by the
+new [`OneHotEncoderEstimator`](ml-features.html#onehotencoderestimator)
+(see [SPARK-13030](https://issues.apache.org/jira/browse/SPARK-13030)). **Note** that
+`OneHotEncoderEstimator` will be renamed to `OneHotEncoder` in `3.0` (but
+`OneHotEncoderEstimator` will be kept as an alias).
+
+**Changes of behavior**
+
+* [SPARK-21027](https://issues.apache.org/jira/browse/SPARK-21027):
+ The default parallelism used in `OneVsRest` is now set to 1 (i.e. serial). In `2.2` and
+ earlier versions, the level of parallelism was set to the default threadpool size in Scala.
+* [SPARK-22156](https://issues.apache.org/jira/browse/SPARK-22156):
+ The learning rate update for `Word2Vec` was incorrect when `numIterations` was set greater than
+ `1`. This will cause training results to be different between `2.3` and earlier versions.
+* [SPARK-21681](https://issues.apache.org/jira/browse/SPARK-21681):
+ Fixed an edge case bug in multinomial logistic regression that resulted in incorrect coefficients
+ when some features had zero variance.
+* [SPARK-16957](https://issues.apache.org/jira/browse/SPARK-16957):
+ Tree algorithms now use mid-points for split values. This may change results from model training.
+* [SPARK-14657](https://issues.apache.org/jira/browse/SPARK-14657):
+ Fixed an issue where the features generated by `RFormula` without an intercept were inconsistent
+ with the output in R. This may change results from model training in this scenario.
+
+## Upgrading from MLlib 2.1 to 2.2
 
 ### Breaking changes
+{:.no_toc}
 
 There are no breaking changes.
 
 ### Deprecations and changes of behavior
+{:.no_toc}
 
 **Deprecations**
 
@@ -45,9 +109,10 @@ There are no deprecations.
  `StringIndexer` now handles `NULL` values in the same way as unseen values. Previously an exception
  would always be thrown regardless of the setting of the `handleInvalid` parameter.
  
-## From 2.0 to 2.1
+## Upgrading from MLlib 2.0 to 2.1
 
 ### Breaking changes
+{:.no_toc}
  
 **Deprecated methods removed**
 
@@ -59,6 +124,7 @@ There are no deprecations.
 * `validateParams` in `Evaluator`
 
 ### Deprecations and changes of behavior
+{:.no_toc}
 
 **Deprecations**
 
@@ -74,9 +140,10 @@ There are no deprecations.
 * [SPARK-17389](https://issues.apache.org/jira/browse/SPARK-17389):
  `KMeans` reduces the default number of steps from 5 to 2 for the k-means|| initialization mode.
 
-## From 1.6 to 2.0
+## Upgrading from MLlib 1.6 to 2.0
 
 ### Breaking changes
+{:.no_toc}
 
 There were several breaking changes in Spark 2.0, which are outlined below.
 
@@ -171,6 +238,7 @@ Several deprecated methods were removed in the `spark.mllib` and `spark.ml` pack
 A full list of breaking changes can be found at [SPARK-14810](https://issues.apache.org/jira/browse/SPARK-14810).
 
 ### Deprecations and changes of behavior
+{:.no_toc}
 
 **Deprecations**
 
@@ -221,7 +289,7 @@ Changes of behavior in the `spark.mllib` and `spark.ml` packages include:
  `QuantileDiscretizer` now uses `spark.sql.DataFrameStatFunctions.approxQuantile` to find splits (previously used custom sampling logic).
  The output buckets will differ for same input data and params.
 
-## From 1.5 to 1.6
+## Upgrading from MLlib 1.5 to 1.6
 
 There are no breaking API changes in the `spark.mllib` or `spark.ml` packages, but there are
 deprecations and changes of behavior.
@@ -248,7 +316,7 @@ Changes of behavior:
  tokenizing. Now, it converts to lowercase by default, with an option not to. This matches the
  behavior of the simpler `Tokenizer` transformer.
 
-## From 1.4 to 1.5
+## Upgrading from MLlib 1.4 to 1.5
 
 In the `spark.mllib` package, there are no breaking API changes but several behavior changes:
 
@@ -267,7 +335,7 @@ In the `spark.ml` package, there exists one breaking API change and one behavior
 * [SPARK-10097](https://issues.apache.org/jira/browse/SPARK-10097): `Evaluator.isLargerBetter` is
   added to indicate metric ordering. Metrics like RMSE no longer flip signs as in 1.4.
 
-## From 1.3 to 1.4
+## Upgrading from MLlib 1.3 to 1.4
 
 In the `spark.mllib` package, there were several breaking changes, but all in `DeveloperApi` or `Experimental` APIs:
 
@@ -286,7 +354,7 @@ Since the `spark.ml` API was an alpha component in Spark 1.3, we do not list all
 However, since 1.4 `spark.ml` is no longer an alpha component, we will provide details on any API
 changes for future releases.
 
-## From 1.2 to 1.3
+## Upgrading from MLlib 1.2 to 1.3
 
 In the `spark.mllib` package, there were several breaking changes.  The first change (in `ALS`) is the only one in a component not marked as Alpha or Experimental.
 
@@ -313,7 +381,7 @@ Other changes were in `LogisticRegression`:
 * The `scoreCol` output column (with default value "score") was renamed to be `probabilityCol` (with default value "probability").  The type was originally `Double` (for the probability of class 1.0), but it is now `Vector` (for the probability of each class, to support multiclass classification in the future).
 * In Spark 1.2, `LogisticRegressionModel` did not include an intercept.  In Spark 1.3, it includes an intercept; however, it will always be 0.0 since it uses the default settings for [spark.mllib.LogisticRegressionWithLBFGS](api/scala/index.html#org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS).  The option to use an intercept will be added in the future.
 
-## From 1.1 to 1.2
+## Upgrading from MLlib 1.1 to 1.2
 
 The only API changes in MLlib v1.2 are in
 [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree),
@@ -339,7 +407,7 @@ The tree `Node` now includes more information, including the probability of the
 Examples in the Spark distribution and examples in the
 [Decision Trees Guide](mllib-decision-tree.html#examples) have been updated accordingly.
 
-## From 1.0 to 1.1
+## Upgrading from MLlib 1.0 to 1.1
 
 The only API changes in MLlib v1.1 are in
 [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree),
@@ -365,7 +433,7 @@ simple `String` types.
 Examples of the new recommended `trainClassifier` and `trainRegressor` are given in the
 [Decision Trees Guide](mllib-decision-tree.html#examples).
 
-## From 0.9 to 1.0
+## Upgrading from MLlib 0.9 to 1.0
 
 In MLlib v1.0, we support both dense and sparse input in a unified way, which introduces a few
 breaking changes.  If your data is sparse, please store it in a sparse format instead of dense to
diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md
index f931fa32ea541..fb2883de6810a 100644
--- a/docs/mllib-evaluation-metrics.md
+++ b/docs/mllib-evaluation-metrics.md
@@ -577,31 +577,3 @@ variable from a number of independent variables.
     </tr>
   </tbody>
 </table>
-
-**Examples**
-
-<div class="codetabs">
-The following code snippets illustrate how to load a sample dataset, train a linear regression algorithm on the data,
-and evaluate the performance of the algorithm by several regression metrics.
-
-<div data-lang="scala" markdown="1">
-Refer to the [`RegressionMetrics` Scala docs](api/scala/index.html#org.apache.spark.mllib.evaluation.RegressionMetrics) for details on the API.
-
-{% include_example scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala %}
-
-</div>
-
-<div data-lang="java" markdown="1">
-Refer to the [`RegressionMetrics` Java docs](api/java/org/apache/spark/mllib/evaluation/RegressionMetrics.html) for details on the API.
-
-{% include_example java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java %}
-
-</div>
-
-<div data-lang="python" markdown="1">
-Refer to the [`RegressionMetrics` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.evaluation.RegressionMetrics) for more details on the API.
-
-{% include_example python/mllib/regression_metrics_example.py %}
-
-</div>
-</div>
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
index b7f8ae9d07b0a..33a223ad486af 100644
--- a/docs/mllib-feature-extraction.md
+++ b/docs/mllib-feature-extraction.md
@@ -348,17 +348,3 @@ Refer to the [`ElementwiseProduct` Python docs](api/python/pyspark.mllib.html#py
 
 A feature transformer that projects vectors to a low-dimensional space using PCA.
 Details you can read at [dimensionality reduction](mllib-dimensionality-reduction.html).
-
-### Example
-
-The following code demonstrates how to compute principal components on a `Vector`
-and use them to project the vectors into a low-dimensional space while keeping associated labels
-for calculation a [Linear Regression](mllib-linear-methods.html)
-
-<div class="codetabs">
-<div data-lang="scala" markdown="1">
-Refer to the [`PCA` Scala docs](api/scala/index.html#org.apache.spark.mllib.feature.PCA) for details on the API.
-
-{% include_example scala/org/apache/spark/examples/mllib/PCAExample.scala %}
-</div>
-</div>
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 2d3ec4ca24443..801876dbffa79 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -360,57 +360,6 @@ regularization; and [*Lasso*](http://en.wikipedia.org/wiki/Lasso_(statistics)) u
 regularization.  For all of these models, the average loss or training error, $\frac{1}{n} \sum_{i=1}^n (\wv^T x_i - y_i)^2$, is
 known as the [mean squared error](http://en.wikipedia.org/wiki/Mean_squared_error).
 
-**Examples**
-
-<div class="codetabs">
-
-<div data-lang="scala" markdown="1">
-The following example demonstrates how to load training data, parse it as an RDD of LabeledPoint.
-The example then uses LinearRegressionWithSGD to build a simple linear model to predict label
-values. We compute the mean squared error at the end to evaluate
-[goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit).
-
-Refer to the [`LinearRegressionWithSGD` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.LinearRegressionWithSGD) and [`LinearRegressionModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.regression.LinearRegressionModel) for details on the API.
-
-{% include_example scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala %}
-
-[`RidgeRegressionWithSGD`](api/scala/index.html#org.apache.spark.mllib.regression.RidgeRegressionWithSGD)
-and [`LassoWithSGD`](api/scala/index.html#org.apache.spark.mllib.regression.LassoWithSGD) can be used in a similar fashion as `LinearRegressionWithSGD`.
-
-</div>
-
-<div data-lang="java" markdown="1">
-All of MLlib's methods use Java-friendly types, so you can import and call them there the same
-way you do in Scala. The only caveat is that the methods take Scala RDD objects, while the
-Spark Java API uses a separate `JavaRDD` class. You can convert a Java RDD to a Scala one by
-calling `.rdd()` on your `JavaRDD` object. The corresponding Java example to
-the Scala snippet provided, is presented below:
-
-Refer to the [`LinearRegressionWithSGD` Java docs](api/java/org/apache/spark/mllib/regression/LinearRegressionWithSGD.html) and [`LinearRegressionModel` Java docs](api/java/org/apache/spark/mllib/regression/LinearRegressionModel.html) for details on the API.
-
-{% include_example java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java %}
-</div>
-
-<div data-lang="python" markdown="1">
-The following example demonstrate how to load training data, parse it as an RDD of LabeledPoint.
-The example then uses LinearRegressionWithSGD to build a simple linear model to predict label
-values. We compute the mean squared error at the end to evaluate
-[goodness of fit](http://en.wikipedia.org/wiki/Goodness_of_fit).
-
-Note that the Python API does not yet support model save/load but will in the future.
-
-Refer to the [`LinearRegressionWithSGD` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.regression.LinearRegressionWithSGD) and [`LinearRegressionModel` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.regression.LinearRegressionModel) for more details on the API.
-
-{% include_example python/mllib/linear_regression_with_sgd_example.py %}
-</div>
-</div>
-
-In order to run the above application, follow the instructions
-provided in the [Self-Contained Applications](quick-start.html#self-contained-applications)
-section of the Spark
-quick-start guide. Be sure to also include *spark-mllib* to your build file as
-a dependency.
-
 ### Streaming linear regression
 
 When data arrive in a streaming fashion, it is useful to fit regression models online,
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 8c81916d4f7d0..a45a41dc78cc3 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -1059,6 +1059,11 @@ when running in local mode.
   - hiveClientCalls.count
   - sourceCodeSize (histogram)
 
+- namespace=<Executor Plugin Class Name>
+  - Optional namespace(s). Metrics in this namespace are defined by user-supplied code, and 
+  configured using the Spark executor plugin infrastructure.
+  See also the configuration parameter `spark.executor.plugins`
+
 ### Source = JVM Source 
 Notes: 
   - Activate this source by setting the relevant `metrics.properties` file entry or the 
diff --git a/docs/pyspark-migration-guide.md b/docs/pyspark-migration-guide.md
new file mode 100644
index 0000000000000..889941c37bf43
--- /dev/null
+++ b/docs/pyspark-migration-guide.md
@@ -0,0 +1,120 @@
+---
+layout: global
+title: "Migration Guide: PySpark (Python on Spark)"
+displayTitle: "Migration Guide: PySpark (Python on Spark)"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* Table of contents
+{:toc}
+
+Note that this migration guide describes the items specific to PySpark.
+Many items of SQL migration can be applied when migrating PySpark to higher versions.
+Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
+
+## Upgrading from PySpark 2.4 to 3.0
+
+  - Since Spark 3.0, PySpark requires a Pandas version of 0.23.2 or higher to use Pandas related functionality, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc.
+
+  - Since Spark 3.0, PySpark requires a PyArrow version of 0.12.1 or higher to use PyArrow related functionality, such as `pandas_udf`, `toPandas` and `createDataFrame` with "spark.sql.execution.arrow.enabled=true", etc.
+
+  - In PySpark, when creating a `SparkSession` with `SparkSession.builder.getOrCreate()`, if there is an existing `SparkContext`, the builder was trying to update the `SparkConf` of the existing `SparkContext` with configurations specified to the builder, but the `SparkContext` is shared by all `SparkSession`s, so we should not update them. Since 3.0, the builder comes to not update the configurations. This is the same behavior as Java/Scala API in 2.3 and above. If you want to update them, you need to update them prior to creating a `SparkSession`.
+
+  - In PySpark, when Arrow optimization is enabled, if Arrow version is higher than 0.11.0, Arrow can perform safe type conversion when converting Pandas.Series to Arrow array during serialization. Arrow will raise errors when detecting unsafe type conversion like overflow. Setting `spark.sql.execution.pandas.arrowSafeTypeConversion` to true can enable it. The default setting is false. PySpark's behavior for Arrow versions is illustrated in the table below:
+    <table class="table">
+        <tr>
+          <th>
+            <b>PyArrow version</b>
+          </th>
+          <th>
+            <b>Integer Overflow</b>
+          </th>
+          <th>
+            <b>Floating Point Truncation</b>
+          </th>
+        </tr>
+        <tr>
+          <td>
+            version < 0.11.0
+          </td>
+          <td>
+            Raise error
+          </td>
+          <td>
+            Silently allows
+          </td>
+        </tr>
+        <tr>
+          <td>
+            version > 0.11.0, arrowSafeTypeConversion=false
+          </td>
+          <td>
+            Silent overflow
+          </td>
+          <td>
+            Silently allows
+          </td>
+        </tr>
+        <tr>
+          <td>
+            version > 0.11.0, arrowSafeTypeConversion=true
+          </td>
+          <td>
+            Raise error
+          </td>
+          <td>
+            Raise error
+          </td>
+        </tr>
+    </table>
+
+  - Since Spark 3.0, `createDataFrame(..., verifySchema=True)` validates `LongType` as well in PySpark. Previously, `LongType` was not verified and resulted in `None` in case the value overflows. To restore this behavior, `verifySchema` can be set to `False` to disable the validation.
+
+## Upgrading from PySpark 2.3 to 2.4
+
+  - In PySpark, when Arrow optimization is enabled, previously `toPandas` just failed when Arrow optimization is unable to be used whereas `createDataFrame` from Pandas DataFrame allowed the fallback to non-optimization. Now, both `toPandas` and `createDataFrame` from Pandas DataFrame allow the fallback by default, which can be switched off by `spark.sql.execution.arrow.fallback.enabled`.
+
+## Upgrading from PySpark 2.3.0 to 2.3.1 and above
+
+  - As of version 2.3.1 Arrow functionality, including `pandas_udf` and `toPandas()`/`createDataFrame()` with `spark.sql.execution.arrow.enabled` set to `True`, has been marked as experimental. These are still evolving and not currently recommended for use in production.
+
+## Upgrading from PySpark 2.2 to 2.3
+
+  - In PySpark, now we need Pandas 0.19.2 or upper if you want to use Pandas related functionalities, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc.
+
+  - In PySpark, the behavior of timestamp values for Pandas related functionalities was changed to respect session timezone. If you want to use the old behavior, you need to set a configuration `spark.sql.execution.pandas.respectSessionTimeZone` to `False`. See [SPARK-22395](https://issues.apache.org/jira/browse/SPARK-22395) for details.
+
+  - In PySpark, `na.fill()` or `fillna` also accepts boolean and replaces nulls with booleans. In prior Spark versions, PySpark just ignores it and returns the original Dataset/DataFrame.
+
+  - In PySpark, `df.replace` does not allow to omit `value` when `to_replace` is not a dictionary. Previously, `value` could be omitted in the other cases and had `None` by default, which is counterintuitive and error-prone.
+
+## Upgrading from PySpark 1.4 to 1.5
+
+ - Resolution of strings to columns in Python now supports using dots (`.`) to qualify the column or
+   access nested values. For example `df['table.column.nestedField']`. However, this means that if
+   your column name contains any dots you must now escape them using backticks (e.g., ``table.`column.with.dots`.nested``).
+
+ - DataFrame.withColumn method in PySpark supports adding a new column or replacing existing columns of the same name.
+
+
+## Upgrading from PySpark 1.0-1.2 to 1.3
+
+#### Python DataTypes No Longer Singletons
+{:.no_toc}
+
+When using DataTypes in Python you will need to construct them (i.e. `StringType()`) instead of
+referencing a singleton.
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 2d4e5cd65f497..4ef738ed9ef6e 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1266,3 +1266,14 @@ The following affect the driver and executor containers. All other containers in
   </td>
 </tr>
 </table>
+
+### Resource Allocation and Configuration Overview
+
+Please make sure to have read the Custom Resource Scheduling and Configuration Overview section on the [configuration page](configuration.html). This section only talks about the Kubernetes specific aspects of resource scheduling.
+
+The user is responsible to properly configuring the Kubernetes cluster to have the resources available and ideally isolate each resource per container so that a resource is not shared between multiple containers. If the resource is not isolated the user is responsible for writing a discovery script so that the resource is not shared between containers. See the Kubernetes documentation for specifics on configuring Kubernetes with [custom resources](https://kubernetes.io/docs/concepts/extend-kubernetes/compute-storage-net/device-plugins/).
+
+Spark automatically handles translating the Spark configs <code>spark.{driver/executor}.resource.{resourceType}</code> into the kubernetes configs as long as the Kubernetes resource type follows the Kubernetes device plugin format of `vendor-domain/resourcetype`. The user must specify the vendor using the <code>spark.{driver/executor}.resource.{resourceType}.vendor</code> config. The user does not need to explicitly add anything if you are using Pod templates. For reference and an example, you can see the Kubernetes documentation for scheduling [GPUs](https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/). Spark only supports setting the resource limits.
+
+Kubernetes does not tell Spark the addresses of the resources allocated to each container. For that reason, the user must specify a discovery script that gets run by the executor on startup to discover what resources are available to that executor. You can find an example scripts in `examples/src/main/scripts/getGpusResources.sh`. The script must have execute permissions set and the user should setup permissions to not allow malicious users to modify it. The script should write to STDOUT a JSON string in the format of the ResourceInformation class. This has the resource name and an array of resource addresses available to just that executor.
+
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index d3d049e6fef70..418db41216cdb 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -542,6 +542,20 @@ For example, suppose you would like to point log url link to Job History Server
 
  NOTE: you need to replace `<JHS_POST>` and `<JHS_PORT>` with actual value.
 
+# Resource Allocation and Configuration Overview
+
+Please make sure to have read the Custom Resource Scheduling and Configuration Overview section on the [configuration page](configuration.html). This section only talks about the YARN specific aspects of resource scheduling.
+
+YARN needs to be configured to support any resources the user wants to use with Spark. Resource scheduling on YARN was added in YARN 3.1.0. See the YARN documentation for more information on configuring resources and properly setting up isolation. Ideally the resources are setup isolated so that an executor can only see the resources it was allocated. If you do not have isolation enabled, the user is responsible for creating a discovery script that ensures the resource is not shared between executors.
+
+YARN currently supports any user defined resource type but has built in types for GPU (<code>yarn.io/gpu</code>) and FPGA (<code>yarn.io/fpga</code>). For that reason, if you are using either of those resources, Spark can translate your request for spark resources into YARN resources and you only have to specify the <code>spark.{driver/executor}.resource.</code> configs. If you are using a resource other then FPGA or GPU, the user is responsible for specifying the configs for both YARN (<code>spark.yarn.{driver/executor}.resource.</code>) and Spark (<code>spark.{driver/executor}.resource.</code>).
+
+For example, the user wants to request 2 GPUs for each executor. The user can just specify <code>spark.executor.resource.gpu.amount=2</code> and Spark will handle requesting <code>yarn.io/gpu</code> resource type from YARN.
+
+If the user has a user defined YARN resource, lets call it `acceleratorX` then the user must specify <code>spark.yarn.executor.resource.acceleratorX.amount=2</code> and <code>spark.executor.resource.acceleratorX.amount=2</code>.
+
+YARN does not tell Spark the addresses of the resources allocated to each container. For that reason, the user must specify a discovery script that gets run by the executor on startup to discover what resources are available to that executor. You can find an example scripts in `examples/src/main/scripts/getGpusResources.sh`. The script must have execute permissions set and the user should setup permissions to not allow malicious users to modify it. The script should write to STDOUT a JSON string in the format of the ResourceInformation class. This has the resource name and an array of resource addresses available to just that executor.
+
 # Important notes
 
 - Whether core requests are honored in scheduling decisions depends on which scheduler is in use and how it is configured.
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index bc77469b6664f..1264951a2f270 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -340,6 +340,18 @@ SPARK_WORKER_OPTS supports the following system properties:
 </tr>
 </table>
 
+# Resource Allocation and Configuration Overview
+
+Please make sure to have read the Custom Resource Scheduling and Configuration Overview section on the [configuration page](configuration.html). This section only talks about the Spark Standalone specific aspects of resource scheduling.
+
+Spark Standalone has 2 parts, the first is configuring the resources for the Worker, the second is the resource allocation for a specific application.
+
+The user must configure the Workers to have a set of resources available so that it can assign them out to Executors. The <code>spark.worker.resource.{resourceName}.amount</code> is used to control the amount of each resource the worker has allocated. The user must also specify either <code>spark.worker.resourcesFile</code> or <code>spark.worker.resource.{resourceName}.discoveryScript</code> to specify how the Worker discovers the resources its assigned. See the descriptions above for each of those to see which method works best for your setup. Please take note of <code>spark.resources.coordinate.enable</code> as it indicates whether Spark should handle coordinating resources or if the user has made sure each Worker has separate resources. Also note that if using the resources coordination <code>spark.resources.dir</code> can be used to specify the directory used to do that coordination.
+
+The second part is running an application on Spark Standalone. The only special case from the standard Spark resource configs is when you are running the Driver in client mode. For a Driver in client mode, the user can specify the resources it uses via <code>spark.driver.resourcesfile</code> or <code>spark.driver.resources.{resourceName}.discoveryScript</code>. If the Driver is running on the same host as other Drivers or Workers there are 2 ways to make sure the they don't use the same resources. The user can either configure <code>spark.resources.coordinate.enable</code> on and give all the Driver/Workers the same set or resources and Spark will handle make sure each Driver/Worker has separate resources, or the user can make sure the resources file or discovery script only returns resources the do not conflict with other Drivers or Workers running on the same node.
+
+Note, the user does not need to specify a discovery script when submitting an application as the Worker will start each Executor with the resources it allocates to it.
+
 # Connecting an Application to the Cluster
 
 To run an application on the Spark cluster, simply pass the `spark://IP:PORT` URL of the master as to the [`SparkContext`
@@ -420,7 +432,7 @@ In addition, detailed log output for each job is also written to the work direct
 
 # Running Alongside Hadoop
 
-You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the same machines. To access Hadoop data from Spark, just use a hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in the same local area network (e.g. you place a few Spark machines on each rack that you have Hadoop on).
+You can run Spark alongside your existing Hadoop cluster by just launching it as a separate service on the same machines. To access Hadoop data from Spark, just use an hdfs:// URL (typically `hdfs://<namenode>:9000/path`, but you can find the right URL on your Hadoop Namenode's web UI). Alternatively, you can set up a separate cluster for Spark, and still have it access HDFS over the network; this will be slower than disk-local access, but may not be a concern if you are still running in the same local area network (e.g. you place a few Spark machines on each rack that you have Hadoop on).
 
 
 # Configuring Ports for Network Security
diff --git a/docs/sparkr-migration-guide.md b/docs/sparkr-migration-guide.md
new file mode 100644
index 0000000000000..6fbc4c03aefc1
--- /dev/null
+++ b/docs/sparkr-migration-guide.md
@@ -0,0 +1,77 @@
+---
+layout: global
+title: "Migration Guide: SparkR (R on Spark)"
+displayTitle: "Migration Guide: SparkR (R on Spark)"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* Table of contents
+{:toc}
+
+Note that this migration guide describes the items specific to SparkR.
+Many items of SQL migration can be applied when migrating SparkR to higher versions.
+Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
+
+## Upgrading from SparkR 2.4 to 3.0
+
+ - The deprecated methods `sparkR.init`, `sparkRSQL.init`, `sparkRHive.init` have been removed. Use `sparkR.session` instead.
+ - The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `registerTempTable`, `createExternalTable`, and `dropTempTable` have been removed. Use `read.parquet`, `write.parquet`, `read.json`, `createOrReplaceTempView`, `createTable`, `dropTempView`, `union` instead.
+
+## Upgrading from SparkR 2.3 to 2.4
+
+ - Previously, we don't check the validity of the size of the last layer in `spark.mlp`. For example, if the training data only has two labels, a `layers` param like `c(1, 3)` doesn't cause an error previously, now it does.
+
+## Upgrading from SparkR 2.3 to 2.3.1 and above
+
+ - In SparkR 2.3.0 and earlier, the `start` parameter of `substr` method was wrongly subtracted by one and considered as 0-based. This can lead to inconsistent substring results and also does not match with the behaviour with `substr` in R. In version 2.3.1 and later, it has been fixed so the `start` parameter of `substr` method is now 1-based. As an example, `substr(lit('abcdef'), 2, 4))` would result to `abc` in SparkR 2.3.0, and the result would be `bcd` in SparkR 2.3.1.
+
+## Upgrading from SparkR 2.2 to 2.3
+
+ - The `stringsAsFactors` parameter was previously ignored with `collect`, for example, in `collect(createDataFrame(iris), stringsAsFactors = TRUE))`. It has been corrected.
+ - For `summary`, option for statistics to compute has been added. Its output is changed from that from `describe`.
+ - A warning can be raised if versions of SparkR package and the Spark JVM do not match.
+
+## Upgrading from SparkR 2.1 to 2.2
+
+ - A `numPartitions` parameter has been added to `createDataFrame` and `as.DataFrame`. When splitting the data, the partition position calculation has been made to match the one in Scala.
+ - The method `createExternalTable` has been deprecated to be replaced by `createTable`. Either methods can be called to create external or managed table. Additional catalog methods have also been added.
+ - By default, derby.log is now saved to `tempdir()`. This will be created when instantiating the SparkSession with `enableHiveSupport` set to `TRUE`.
+ - `spark.lda` was not setting the optimizer correctly. It has been corrected.
+ - Several model summary outputs are updated to have `coefficients` as `matrix`. This includes `spark.logit`, `spark.kmeans`, `spark.glm`. Model summary outputs for `spark.gaussianMixture` have added log-likelihood as `loglik`.
+
+## Upgrading from SparkR 2.0 to 3.1
+
+ - `join` no longer performs Cartesian Product by default, use `crossJoin` instead.
+
+
+## Upgrading from SparkR 1.6 to 2.0
+
+ - The method `table` has been removed and replaced by `tableToDF`.
+ - The class `DataFrame` has been renamed to `SparkDataFrame` to avoid name conflicts.
+ - Spark's `SQLContext` and `HiveContext` have been deprecated to be replaced by `SparkSession`. Instead of `sparkR.init()`, call `sparkR.session()` in its place to instantiate the SparkSession. Once that is done, that currently active SparkSession will be used for SparkDataFrame operations.
+ - The parameter `sparkExecutorEnv` is not supported by `sparkR.session`. To set environment for the executors, set Spark config properties with the prefix "spark.executorEnv.VAR_NAME", for example, "spark.executorEnv.PATH"
+ - The `sqlContext` parameter is no longer required for these functions: `createDataFrame`, `as.DataFrame`, `read.json`, `jsonFile`, `read.parquet`, `parquetFile`, `read.text`, `sql`, `tables`, `tableNames`, `cacheTable`, `uncacheTable`, `clearCache`, `dropTempTable`, `read.df`, `loadDF`, `createExternalTable`.
+ - The method `registerTempTable` has been deprecated to be replaced by `createOrReplaceTempView`.
+ - The method `dropTempTable` has been deprecated to be replaced by `dropTempView`.
+ - The `sc` SparkContext parameter is no longer required for these functions: `setJobGroup`, `clearJobGroup`, `cancelJobGroup`
+
+## Upgrading from SparkR 1.5 to 1.6
+
+ - Before Spark 1.6.0, the default mode for writes was `append`. It was changed in Spark 1.6.0 to `error` to match the Scala API.
+ - SparkSQL converts `NA` in R to `null` and vice-versa.
+ - Since 1.6.1, withColumn method in SparkR supports adding a new column to or replacing existing columns
+   of the same name of a DataFrame.
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 7431d025aa629..24fa3b4feac19 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -663,13 +663,20 @@ Apache Arrow is an in-memory columnar data format that is used in Spark to effic
 
 ## Ensure Arrow Installed
 
-Currently, Arrow R library is not on CRAN yet [ARROW-3204](https://issues.apache.org/jira/browse/ARROW-3204). Therefore, it should be installed directly from Github. You can use `remotes::install_github` as below.
+Arrow R library is available on CRAN as of [ARROW-3204](https://issues.apache.org/jira/browse/ARROW-3204). It can be installed as below.
+
+```bash
+Rscript -e 'install.packages("arrow", repos="https://cloud.r-project.org/")'
+```
+
+If you need to install old versions, it should be installed directly from Github. You can use `remotes::install_github` as below.
 
 ```bash
 Rscript -e 'remotes::install_github("apache/arrow@apache-arrow-0.12.1", subdir = "r")'
 ```
 
-`apache-arrow-0.12.1` is a version tag that can be checked in [Arrow at Github](https://github.com/apache/arrow/releases). You must ensure that Arrow R package is installed and available on all cluster nodes. The current supported version is 0.12.1.
+`apache-arrow-0.12.1` is a version tag that can be checked in [Arrow at Github](https://github.com/apache/arrow/releases). You must ensure that Arrow R package is installed and available on all cluster nodes.
+The current supported minimum version is 0.12.1; however, this might change between the minor releases since Arrow optimization in SparkR is experimental.
 
 ## Enabling for Conversion to/from R DataFrame, `dapply` and `gapply`
 
@@ -748,49 +755,5 @@ You can inspect the search path in R with [`search()`](https://stat.ethz.ch/R-ma
 
 # Migration Guide
 
-## Upgrading From SparkR 1.5.x to 1.6.x
-
- - Before Spark 1.6.0, the default mode for writes was `append`. It was changed in Spark 1.6.0 to `error` to match the Scala API.
- - SparkSQL converts `NA` in R to `null` and vice-versa.
-
-## Upgrading From SparkR 1.6.x to 2.0
-
- - The method `table` has been removed and replaced by `tableToDF`.
- - The class `DataFrame` has been renamed to `SparkDataFrame` to avoid name conflicts.
- - Spark's `SQLContext` and `HiveContext` have been deprecated to be replaced by `SparkSession`. Instead of `sparkR.init()`, call `sparkR.session()` in its place to instantiate the SparkSession. Once that is done, that currently active SparkSession will be used for SparkDataFrame operations.
- - The parameter `sparkExecutorEnv` is not supported by `sparkR.session`. To set environment for the executors, set Spark config properties with the prefix "spark.executorEnv.VAR_NAME", for example, "spark.executorEnv.PATH"
- - The `sqlContext` parameter is no longer required for these functions: `createDataFrame`, `as.DataFrame`, `read.json`, `jsonFile`, `read.parquet`, `parquetFile`, `read.text`, `sql`, `tables`, `tableNames`, `cacheTable`, `uncacheTable`, `clearCache`, `dropTempTable`, `read.df`, `loadDF`, `createExternalTable`.
- - The method `registerTempTable` has been deprecated to be replaced by `createOrReplaceTempView`.
- - The method `dropTempTable` has been deprecated to be replaced by `dropTempView`.
- - The `sc` SparkContext parameter is no longer required for these functions: `setJobGroup`, `clearJobGroup`, `cancelJobGroup`
-
-## Upgrading to SparkR 2.1.0
-
- - `join` no longer performs Cartesian Product by default, use `crossJoin` instead.
-
-## Upgrading to SparkR 2.2.0
-
- - A `numPartitions` parameter has been added to `createDataFrame` and `as.DataFrame`. When splitting the data, the partition position calculation has been made to match the one in Scala.
- - The method `createExternalTable` has been deprecated to be replaced by `createTable`. Either methods can be called to create external or managed table. Additional catalog methods have also been added.
- - By default, derby.log is now saved to `tempdir()`. This will be created when instantiating the SparkSession with `enableHiveSupport` set to `TRUE`.
- - `spark.lda` was not setting the optimizer correctly. It has been corrected.
- - Several model summary outputs are updated to have `coefficients` as `matrix`. This includes `spark.logit`, `spark.kmeans`, `spark.glm`. Model summary outputs for `spark.gaussianMixture` have added log-likelihood as `loglik`.
-
-## Upgrading to SparkR 2.3.0
-
- - The `stringsAsFactors` parameter was previously ignored with `collect`, for example, in `collect(createDataFrame(iris), stringsAsFactors = TRUE))`. It has been corrected.
- - For `summary`, option for statistics to compute has been added. Its output is changed from that from `describe`.
- - A warning can be raised if versions of SparkR package and the Spark JVM do not match.
-
-## Upgrading to SparkR 2.3.1 and above
-
- - In SparkR 2.3.0 and earlier, the `start` parameter of `substr` method was wrongly subtracted by one and considered as 0-based. This can lead to inconsistent substring results and also does not match with the behaviour with `substr` in R. In version 2.3.1 and later, it has been fixed so the `start` parameter of `substr` method is now 1-based. As an example, `substr(lit('abcdef'), 2, 4))` would result to `abc` in SparkR 2.3.0, and the result would be `bcd` in SparkR 2.3.1.
-
-## Upgrading to SparkR 2.4.0
-
- - Previously, we don't check the validity of the size of the last layer in `spark.mlp`. For example, if the training data only has two labels, a `layers` param like `c(1, 3)` doesn't cause an error previously, now it does.
-
-## Upgrading to SparkR 3.0.0
+The migration guide is now archived [on this page](sparkr-migration-guide.html).
 
- - The deprecated methods `sparkR.init`, `sparkRSQL.init`, `sparkRHive.init` have been removed. Use `sparkR.session` instead.
- - The deprecated methods `parquetFile`, `saveAsParquetFile`, `jsonFile`, `registerTempTable`, `createExternalTable`, and `dropTempTable` have been removed. Use `read.parquet`, `write.parquet`, `read.json`, `createOrReplaceTempView`, `createTable`, `dropTempView`, `union` instead.
diff --git a/docs/sql-keywords.md b/docs/sql-keywords.md
index 08be6b62a88e7..7a0e3efee8ffa 100644
--- a/docs/sql-keywords.md
+++ b/docs/sql-keywords.md
@@ -19,15 +19,15 @@ license: |
   limitations under the License.
 ---
 
-When `spark.sql.parser.ansi.enabled` is true, Spark SQL has two kinds of keywords:
+When `spark.sql.ansi.enabled` is true, Spark SQL has two kinds of keywords:
 * Reserved keywords: Keywords that are reserved and can't be used as identifiers for table, view, column, function, alias, etc.
 * Non-reserved keywords: Keywords that have a special meaning only in particular contexts and can be used as identifiers in other contexts. For example, `SELECT 1 WEEK` is an interval literal, but WEEK can be used as identifiers in other places.
 
-When `spark.sql.parser.ansi.enabled` is false, Spark SQL has two kinds of keywords:
-* Non-reserved keywords: Same definition as the one when `spark.sql.parser.ansi.enabled=true`.
+When `spark.sql.ansi.enabled` is false, Spark SQL has two kinds of keywords:
+* Non-reserved keywords: Same definition as the one when `spark.sql.ansi.enabled=true`.
 * Strict-non-reserved keywords: A strict version of non-reserved keywords, which can not be used as table alias.
 
-By default `spark.sql.parser.ansi.enabled` is false.
+By default `spark.sql.ansi.enabled` is false.
 
 Below is a list of all the keywords in Spark SQL.
 
@@ -179,6 +179,8 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>MONTH</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>MONTHS</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>MSCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>NAMESPACE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>NAMESPACES</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>NATURAL</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
   <tr><td>NO</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>NOT</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
@@ -279,6 +281,7 @@ Below is a list of all the keywords in Spark SQL.
   <tr><td>UNKNOWN</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>UNLOCK</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>UNSET</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
+  <tr><td>UPDATE</td><td>non-reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>USE</td><td>non-reserved</td><td>non-reserved</td><td>non-reserved</td></tr>
   <tr><td>USER</td><td>reserved</td><td>non-reserved</td><td>reserved</td></tr>
   <tr><td>USING</td><td>reserved</td><td>strict-non-reserved</td><td>reserved</td></tr>
diff --git a/docs/sql-migration-guide-hive-compatibility.md b/docs/sql-migration-guide-hive-compatibility.md
deleted file mode 100644
index d4b4fdf19d926..0000000000000
--- a/docs/sql-migration-guide-hive-compatibility.md
+++ /dev/null
@@ -1,167 +0,0 @@
----
-layout: global
-title: Compatibility with Apache Hive
-displayTitle: Compatibility with Apache Hive
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
- 
-     http://www.apache.org/licenses/LICENSE-2.0
- 
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-* Table of contents
-{:toc}
-
-Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.
-Currently, Hive SerDes and UDFs are based on Hive 1.2.1,
-and Spark SQL can be connected to different versions of Hive Metastore
-(from 0.12.0 to 2.3.6 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
-
-#### Deploying in Existing Hive Warehouses
-
-The Spark SQL Thrift JDBC server is designed to be "out of the box" compatible with existing Hive
-installations. You do not need to modify your existing Hive Metastore or change the data placement
-or partitioning of your tables.
-
-### Supported Hive Features
-
-Spark SQL supports the vast majority of Hive features, such as:
-
-* Hive query statements, including:
-  * `SELECT`
-  * `GROUP BY`
-  * `ORDER BY`
-  * `CLUSTER BY`
-  * `SORT BY`
-* All Hive operators, including:
-  * Relational operators (`=`, `⇔`, `==`, `<>`, `<`, `>`, `>=`, `<=`, etc)
-  * Arithmetic operators (`+`, `-`, `*`, `/`, `%`, etc)
-  * Logical operators (`AND`, `&&`, `OR`, `||`, etc)
-  * Complex type constructors
-  * Mathematical functions (`sign`, `ln`, `cos`, etc)
-  * String functions (`instr`, `length`, `printf`, etc)
-* User defined functions (UDF)
-* User defined aggregation functions (UDAF)
-* User defined serialization formats (SerDes)
-* Window functions
-* Joins
-  * `JOIN`
-  * `{LEFT|RIGHT|FULL} OUTER JOIN`
-  * `LEFT SEMI JOIN`
-  * `CROSS JOIN`
-* Unions
-* Sub-queries
-  * `SELECT col FROM ( SELECT a + b AS col from t1) t2`
-* Sampling
-* Explain
-* Partitioned tables including dynamic partition insertion
-* View
-  * If column aliases are not specified in view definition queries, both Spark and Hive will
-    generate alias names, but in different ways. In order for Spark to be able to read views created
-    by Hive, users should explicitly specify column aliases in view definition queries. As an
-    example, Spark cannot read `v1` created as below by Hive.
-
-    ```
-    CREATE VIEW v1 AS SELECT * FROM (SELECT c + 1 FROM (SELECT 1 c) t1) t2;
-    ```
-
-    Instead, you should create `v1` as below with column aliases explicitly specified.
-
-    ```
-    CREATE VIEW v1 AS SELECT * FROM (SELECT c + 1 AS inc_c FROM (SELECT 1 c) t1) t2;
-    ```
-
-* All Hive DDL Functions, including:
-  * `CREATE TABLE`
-  * `CREATE TABLE AS SELECT`
-  * `ALTER TABLE`
-* Most Hive Data types, including:
-  * `TINYINT`
-  * `SMALLINT`
-  * `INT`
-  * `BIGINT`
-  * `BOOLEAN`
-  * `FLOAT`
-  * `DOUBLE`
-  * `STRING`
-  * `BINARY`
-  * `TIMESTAMP`
-  * `DATE`
-  * `ARRAY<>`
-  * `MAP<>`
-  * `STRUCT<>`
-
-### Unsupported Hive Functionality
-
-Below is a list of Hive features that we don't support yet. Most of these features are rarely used
-in Hive deployments.
-
-**Major Hive Features**
-
-* Tables with buckets: bucket is the hash partitioning within a Hive table partition. Spark SQL
-  doesn't support buckets yet.
-
-
-**Esoteric Hive Features**
-
-* `UNION` type
-* Unique join
-* Column statistics collecting: Spark SQL does not piggyback scans to collect column statistics at
-  the moment and only supports populating the sizeInBytes field of the hive metastore.
-
-**Hive Input/Output Formats**
-
-* File format for CLI: For results showing back to the CLI, Spark SQL only supports TextOutputFormat.
-* Hadoop archive
-
-**Hive Optimizations**
-
-A handful of Hive optimizations are not yet included in Spark. Some of these (such as indexes) are
-less important due to Spark SQL's in-memory computational model. Others are slotted for future
-releases of Spark SQL.
-
-* Block-level bitmap indexes and virtual columns (used to build indexes)
-* Automatically determine the number of reducers for joins and groupbys: Currently, in Spark SQL, you
-  need to control the degree of parallelism post-shuffle using "`SET spark.sql.shuffle.partitions=[num_tasks];`".
-* Meta-data only query: For queries that can be answered by using only metadata, Spark SQL still
-  launches tasks to compute the result.
-* Skew data flag: Spark SQL does not follow the skew data flags in Hive.
-* `STREAMTABLE` hint in join: Spark SQL does not follow the `STREAMTABLE` hint.
-* Merge multiple small files for query results: if the result output contains multiple small files,
-  Hive can optionally merge the small files into fewer large files to avoid overflowing the HDFS
-  metadata. Spark SQL does not support that.
-
-**Hive UDF/UDTF/UDAF**
-
-Not all the APIs of the Hive UDF/UDTF/UDAF are supported by Spark SQL. Below are the unsupported APIs:
-
-* `getRequiredJars` and `getRequiredFiles` (`UDF` and `GenericUDF`) are functions to automatically
-  include additional resources required by this UDF.
-* `initialize(StructObjectInspector)` in `GenericUDTF` is not supported yet. Spark SQL currently uses
-  a deprecated interface `initialize(ObjectInspector[])` only.
-* `configure` (`GenericUDF`, `GenericUDTF`, and `GenericUDAFEvaluator`) is a function to initialize
-  functions with `MapredContext`, which is inapplicable to Spark.
-* `close` (`GenericUDF` and `GenericUDAFEvaluator`) is a function to release associated resources.
-  Spark SQL does not call this function when tasks finish.
-* `reset` (`GenericUDAFEvaluator`) is a function to re-initialize aggregation for reusing the same aggregation.
-  Spark SQL currently does not support the reuse of aggregation.
-* `getWindowingEvaluator` (`GenericUDAFEvaluator`) is a function to optimize aggregation by evaluating
-  an aggregate over a fixed window.
-
-### Incompatible Hive UDF
-
-Below are the scenarios in which Hive and Spark generate different results:
-
-* `SQRT(n)` If n < 0, Hive returns null, Spark SQL returns NaN.
-* `ACOS(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN.
-* `ASIN(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN.
diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md
deleted file mode 100644
index cc3ef1e757756..0000000000000
--- a/docs/sql-migration-guide-upgrade.md
+++ /dev/null
@@ -1,829 +0,0 @@
----
-layout: global
-title: Spark SQL Upgrading Guide
-displayTitle: Spark SQL Upgrading Guide
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-* Table of contents
-{:toc}
-
-## Upgrading From Spark SQL 2.4 to 3.0
-  - Since Spark 3.0, configuration `spark.sql.crossJoin.enabled` become internal configuration, and is true by default, so by default spark won't raise exception on sql with implicit cross join.
-
-  - Since Spark 3.0, we reversed argument order of the trim function from `TRIM(trimStr, str)` to `TRIM(str, trimStr)` to be compatible with other databases.
-
-  - Since Spark 3.0, PySpark requires a Pandas version of 0.23.2 or higher to use Pandas related functionality, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc.
-
-  - Since Spark 3.0, PySpark requires a PyArrow version of 0.12.1 or higher to use PyArrow related functionality, such as `pandas_udf`, `toPandas` and `createDataFrame` with "spark.sql.execution.arrow.enabled=true", etc.
-
-  - In Spark version 2.4 and earlier, SQL queries such as `FROM <table>` or `FROM <table> UNION ALL FROM <table>` are supported by accident. In hive-style `FROM <table> SELECT <expr>`, the `SELECT` clause is not negligible. Neither Hive nor Presto support this syntax. Therefore we will treat these queries as invalid since Spark 3.0.
-
-  - Since Spark 3.0, the Dataset and DataFrame API `unionAll` is not deprecated any more. It is an alias for `union`.
-
-  - In PySpark, when creating a `SparkSession` with `SparkSession.builder.getOrCreate()`, if there is an existing `SparkContext`, the builder was trying to update the `SparkConf` of the existing `SparkContext` with configurations specified to the builder, but the `SparkContext` is shared by all `SparkSession`s, so we should not update them. Since 3.0, the builder comes to not update the configurations. This is the same behavior as Java/Scala API in 2.3 and above. If you want to update them, you need to update them prior to creating a `SparkSession`.
-
-  - In Spark version 2.4 and earlier, the parser of JSON data source treats empty strings as null for some data types such as `IntegerType`. For `FloatType` and `DoubleType`, it fails on empty strings and throws exceptions. Since Spark 3.0, we disallow empty strings and will throw exceptions for data types except for `StringType` and `BinaryType`.
-
-  - Since Spark 3.0, the `from_json` functions supports two modes - `PERMISSIVE` and `FAILFAST`. The modes can be set via the `mode` option. The default mode became `PERMISSIVE`. In previous versions, behavior of `from_json` did not conform to either `PERMISSIVE` nor `FAILFAST`, especially in processing of malformed JSON records. For example, the JSON string `{"a" 1}` with the schema `a INT` is converted to `null` by previous versions but Spark 3.0 converts it to `Row(null)`.
-
-  - The `ADD JAR` command previously returned a result set with the single value 0. It now returns an empty result set.
-
-  - In Spark version 2.4 and earlier, users can create map values with map type key via built-in function like `CreateMap`, `MapFromArrays`, etc. Since Spark 3.0, it's not allowed to create map values with map type key with these built-in functions. Users can still read map values with map type key from data source or Java/Scala collections, though they are not very useful.
-
-  - In Spark version 2.4 and earlier, `Dataset.groupByKey` results to a grouped dataset with key attribute wrongly named as "value", if the key is non-struct type, e.g. int, string, array, etc. This is counterintuitive and makes the schema of aggregation queries weird. For example, the schema of `ds.groupByKey(...).count()` is `(value, count)`. Since Spark 3.0, we name the grouping attribute to "key". The old behaviour is preserved under a newly added configuration `spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue` with a default value of `false`.
-
-  - In Spark version 2.4 and earlier, float/double -0.0 is semantically equal to 0.0, but -0.0 and 0.0 are considered as different values when used in aggregate grouping keys, window partition keys and join keys. Since Spark 3.0, this bug is fixed. For example, `Seq(-0.0, 0.0).toDF("d").groupBy("d").count()` returns `[(0.0, 2)]` in Spark 3.0, and `[(0.0, 1), (-0.0, 1)]` in Spark 2.4 and earlier.
-
-  - In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be undefined.
-
-  - In Spark version 2.4 and earlier, partition column value is converted as null if it can't be casted to corresponding user provided schema. Since 3.0, partition column value is validated with user provided schema. An exception is thrown if the validation fails. You can disable such validation by setting `spark.sql.sources.validatePartitionColumns` to `false`.
-
-  - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.setCommandRejectsSparkCoreConfs` to `false`.
-
-  - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, the returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
-
-  - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully.
-
-  - Refreshing a cached table would trigger a table uncache operation and then a table cache (lazily) operation. In Spark version 2.4 and earlier, the cache name and storage level are not preserved before the uncache operation. Therefore, the cache name and storage level could be changed unexpectedly. Since Spark 3.0, cache name and storage level will be first preserved for cache recreation. It helps to maintain a consistent cache behavior upon table refreshing.
-
-  - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
-
-  - In PySpark, when Arrow optimization is enabled, if Arrow version is higher than 0.11.0, Arrow can perform safe type conversion when converting Pandas.Series to Arrow array during serialization. Arrow will raise errors when detecting unsafe type conversion like overflow. Setting `spark.sql.execution.pandas.arrowSafeTypeConversion` to true can enable it. The default setting is false. PySpark's behavior for Arrow versions is illustrated in the table below:
-    <table class="table">
-        <tr>
-          <th>
-            <b>PyArrow version</b>
-          </th>
-          <th>
-            <b>Integer Overflow</b>
-          </th>
-          <th>
-            <b>Floating Point Truncation</b>
-          </th>
-        </tr>
-        <tr>
-          <td>
-            version < 0.11.0
-          </td>
-          <td>
-            Raise error
-          </td>
-          <td>
-            Silently allows
-          </td>
-        </tr>
-        <tr>
-          <td>
-            version > 0.11.0, arrowSafeTypeConversion=false
-          </td>
-          <td>
-            Silent overflow
-          </td>
-          <td>
-            Silently allows
-          </td>
-        </tr>
-        <tr>
-          <td>
-            version > 0.11.0, arrowSafeTypeConversion=true
-          </td>
-          <td>
-            Raise error
-          </td>
-          <td>
-            Raise error
-          </td>
-        </tr>
-    </table>
-
-  - In Spark version 2.4 and earlier, if `org.apache.spark.sql.functions.udf(Any, DataType)` gets a Scala closure with primitive-type argument, the returned UDF will return null if the input values is null. Since Spark 3.0, the UDF will return the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` will return null in Spark 2.4 and earlier if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
-
-  - Since Spark 3.0, Proleptic Gregorian calendar is used in parsing, formatting, and converting dates and timestamps as well as in extracting sub-components like years, days and etc. Spark 3.0 uses Java 8 API classes from the java.time packages that based on ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html). In Spark version 2.4 and earlier, those operations are performed by using the hybrid calendar (Julian + Gregorian, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html). The changes impact on the results for dates before October 15, 1582 (Gregorian) and affect on the following Spark 3.0 API:
-
-    - CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpose with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`.
-
-    - The `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`.
-
-    - The `weekofyear`, `weekday`, `dayofweek`, `date_trunc`, `from_utc_timestamp`, `to_utc_timestamp`, and `unix_timestamp` functions use java.time API for calculation week number of year, day number of week as well for conversion from/to TimestampType values in UTC time zone.
-
-    - the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
-
-    - Formatting of `TIMESTAMP` and `DATE` literals.
-
-  - In Spark version 2.4 and earlier, invalid time zone ids are silently ignored and replaced by GMT time zone, for example, in the from_utc_timestamp function. Since Spark 3.0, such time zone ids are rejected, and Spark throws `java.time.DateTimeException`.
-
-  - In Spark version 2.4 and earlier, the `current_timestamp` function returns a timestamp with millisecond resolution only. Since Spark 3.0, the function can return the result with microsecond resolution if the underlying clock available on the system offers such resolution.
-
-  - In Spark version 2.4 and earlier, when reading a Hive Serde table with Spark native data sources(parquet/orc), Spark will infer the actual file schema and update the table schema in metastore. Since Spark 3.0, Spark doesn't infer the schema anymore. This should not cause any problems to end users, but if it does, please set `spark.sql.hive.caseSensitiveInferenceMode` to `INFER_AND_SAVE`.
-
-  - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion uses the default time zone of the Java virtual machine.
-
-  - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`. The old behavior can be restored by setting `spark.sql.legacy.sessionInitWithConfigDefaults` to `true`.
-
-  - Since Spark 3.0, parquet logical type `TIMESTAMP_MICROS` is used by default while saving `TIMESTAMP` columns. In Spark version 2.4 and earlier, `TIMESTAMP` columns are saved as `INT96` in parquet files. To set `INT96` to `spark.sql.parquet.outputTimestampType` restores the previous behavior.
-
-  - Since Spark 3.0, if `hive.default.fileformat` is not found in `Spark SQL configuration` then it will fallback to hive-site.xml present in the `Hadoop configuration` of `SparkContext`.
-
-  - Since Spark 3.0, Spark will cast `String` to `Date/TimeStamp` in binary comparisons with dates/timestamps. The previous behaviour of casting `Date/Timestamp` to `String` can be restored by setting `spark.sql.legacy.typeCoercion.datetimeToString` to `true`.
-
-  - Since Spark 3.0, when Avro files are written with user provided schema, the fields will be matched by field names between catalyst schema and avro schema instead of positions.
-
-  - Since Spark 3.0, when Avro files are written with user provided non-nullable schema, even the catalyst schema is nullable, Spark is still able to write the files. However, Spark will throw runtime NPE if any of the records contains null.
-
-  - Since Spark 3.0, we use a new protocol for fetching shuffle blocks, for external shuffle service users, we need to upgrade the server correspondingly. Otherwise, we'll get the error message `UnsupportedOperationException: Unexpected message: FetchShuffleBlocks`. If it is hard to upgrade the shuffle service right now, you can still use the old protocol by setting `spark.shuffle.useOldFetchProtocol` to `true`.
-
-  - Since Spark 3.0, a higher-order function `exists` follows the three-valued boolean logic, i.e., if the `predicate` returns any `null`s and no `true` is obtained, then `exists` will return `null` instead of `false`. For example, `exists(array(1, null, 3), x -> x % 2 == 0)` will be `null`. The previous behaviour can be restored by setting `spark.sql.legacy.arrayExistsFollowsThreeValuedLogic` to `false`.
-
-  - Since Spark 3.0, if files or subdirectories disappear during recursive directory listing (i.e. they appear in an intermediate listing but then cannot be read or listed during later phases of the recursive directory listing, due to either concurrent file deletions or object store consistency issues) then the listing will fail with an exception unless `spark.sql.files.ignoreMissingFiles` is `true` (default `false`). In previous versions, these missing files or subdirectories would be ignored. Note that this change of behavior only applies during initial table file listing (or during `REFRESH TABLE`), not during query execution: the net change is that `spark.sql.files.ignoreMissingFiles` is now obeyed during table file listing / query planning, not only at query execution time.
-
-  - Since Spark 3.0, `createDataFrame(..., verifySchema=True)` validates `LongType` as well in PySpark. Previously, `LongType` was not verified and resulted in `None` in case the value overflows. To restore this behavior, `verifySchema` can be set to `False` to disable the validation.
-
-  - Since Spark 3.0, substitution order of nested WITH clauses is changed and an inner CTE definition takes precedence over an outer. In version 2.4 and earlier, `WITH t AS (SELECT 1), t2 AS (WITH t AS (SELECT 2) SELECT * FROM t) SELECT * FROM t2` returns `1` while in version 3.0 it returns `2`. The previous behaviour can be restored by setting `spark.sql.legacy.ctePrecedence.enabled` to `true`.
-
-  - Since Spark 3.0, the `add_months` function does not adjust the resulting date to a last day of month if the original date is a last day of months. For example, `select add_months(DATE'2019-02-28', 1)` results `2019-03-28`. In Spark version 2.4 and earlier, the resulting date is adjusted when the original date is a last day of months. For example, adding a month to `2019-02-28` results in `2019-03-31`.
-
-  - Since Spark 3.0, 0-argument Java UDF is executed in the executor side identically with other UDFs. In Spark version 2.4 and earlier, 0-argument Java UDF alone was executed in the driver side, and the result was propagated to executors, which might be more performant in some cases but caused inconsistency with a correctness issue in some cases.
-
-  - The result of `java.lang.Math`'s `log`, `log1p`, `exp`, `expm1`, and `pow` may vary across platforms. In Spark 3.0, the result of the equivalent SQL functions (including related SQL functions like `LOG10`) return values consistent with `java.lang.StrictMath`. In virtually all cases this makes no difference in the return value, and the difference is very small, but may not exactly match `java.lang.Math` on x86 platforms in cases like, for example, `log(3.0)`, whose value varies between `Math.log()` and `StrictMath.log()`.
-
-  - Since Spark 3.0, Dataset query fails if it contains ambiguous column reference that is caused by self join. A typical example: `val df1 = ...; val df2 = df1.filter(...);`, then `df1.join(df2, df1("a") > df2("a"))` returns an empty result which is quite confusing. This is because Spark cannot resolve Dataset column references that point to tables being self joined, and `df1("a")` is exactly the same as `df2("a")` in Spark. To restore the behavior before Spark 3.0, you can set `spark.sql.analyzer.failAmbiguousSelfJoin` to `false`.
-
-  - Since Spark 3.0, `Cast` function processes string literals such as 'Infinity', '+Infinity', '-Infinity', 'NaN', 'Inf', '+Inf', '-Inf' in case insensitive manner when casting the literals to `Double` or `Float` type to ensure greater compatibility with other database systems. This behaviour change is illustrated in the table below:
-    <table class="table">
-        <tr>
-          <th>
-            <b>Operation</b>
-          </th>
-          <th>
-            <b>Result prior to Spark 3.0</b>
-          </th>
-          <th>
-            <b>Result starting Spark 3.0</b>
-          </th>
-        </tr>
-        <tr>
-          <td>
-            CAST('infinity' AS DOUBLE)<br>
-            CAST('+infinity' AS DOUBLE)<br>
-            CAST('inf' AS DOUBLE)<br>
-            CAST('+inf' AS DOUBLE)<br>
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Double.PositiveInfinity
-          </td>
-        </tr>
-        <tr>
-          <td>
-            CAST('-infinity' AS DOUBLE)<br>
-            CAST('-inf' AS DOUBLE)<br>
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Double.NegativeInfinity
-          </td>
-        </tr>
-        <tr>
-          <td>
-            CAST('infinity' AS FLOAT)<br>
-            CAST('+infinity' AS FLOAT)<br>
-            CAST('inf' AS FLOAT)<br>
-            CAST('+inf' AS FLOAT)<br>
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Float.PositiveInfinity
-          </td>
-        </tr>
-        <tr>
-          <td>
-            CAST('-infinity' AS FLOAT)<br>
-            CAST('-inf' AS FLOAT)<br>
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Float.NegativeInfinity
-          </td>
-        </tr>
-        <tr>
-          <td>
-            CAST('nan' AS DOUBLE)
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Double.NaN
-          </td>
-        </tr>
-        <tr>
-          <td>
-            CAST('nan' AS FLOAT)
-          </td>
-          <td>
-            NULL
-          </td>
-          <td>
-            Float.NaN
-          </td>
-        </tr>
-    </table>
-
-## Upgrading from Spark SQL 2.4 to 2.4.1
-
-  - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
-    inconsistently interpreted as both seconds and milliseconds in Spark 2.4.0 in different parts of the code.
-    Unitless values are now consistently interpreted as milliseconds. Applications that set values like "30"
-    need to specify a value with units like "30s" now, to avoid being interpreted as milliseconds; otherwise,
-    the extremely short interval that results will likely cause applications to fail.
-
-  - When turning a Dataset to another Dataset, Spark will up cast the fields in the original Dataset to the type of corresponding fields in the target DataSet. In version 2.4 and earlier, this up cast is not very strict, e.g. `Seq("str").toDS.as[Int]` fails, but `Seq("str").toDS.as[Boolean]` works and throw NPE during execution. In Spark 3.0, the up cast is stricter and turning String into something else is not allowed, i.e. `Seq("str").toDS.as[Boolean]` will fail during analysis.
-
-## Upgrading From Spark SQL 2.3 to 2.4
-
-  - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
-    <table class="table">
-        <tr>
-          <th>
-            <b>Query</b>
-          </th>
-          <th>
-            <b>Spark 2.3 or Prior</b>
-          </th>
-          <th>
-            <b>Spark 2.4</b>
-          </th>
-          <th>
-            <b>Remarks</b>
-          </th>
-        </tr>
-        <tr>
-          <td>
-            <code>SELECT array_contains(array(1), 1.34D);</code>
-          </td>
-          <td>
-            <code>true</code>
-          </td>
-          <td>
-            <code>false</code>
-          </td>
-          <td>
-            In Spark 2.4, left and right parameters are promoted to array type of double type and double type respectively.
-          </td>
-        </tr>
-        <tr>
-          <td>
-            <code>SELECT array_contains(array(1), '1');</code>
-          </td>
-          <td>
-            <code>true</code>
-          </td>
-          <td>
-            <code>AnalysisException</code> is thrown.
-          </td>
-          <td>
-            Explicit cast can be used in arguments to avoid the exception. In Spark 2.4, <code>AnalysisException</code> is thrown since integer type can not be promoted to string type in a loss-less manner.
-          </td>
-        </tr>
-        <tr>
-          <td>
-            <code>SELECT array_contains(array(1), 'anystring');</code>
-          </td>
-          <td>
-            <code>null</code>
-          </td>
-          <td>
-            <code>AnalysisException</code> is thrown.
-          </td>
-          <td>
-            Explicit cast can be used in arguments to avoid the exception. In Spark 2.4, <code>AnalysisException</code> is thrown since integer type can not be promoted to string type in a loss-less manner.
-          </td>
-        </tr>
-    </table>
-
-  - Since Spark 2.4, when there is a struct field in front of the IN operator before a subquery, the inner query must contain a struct field as well. In previous versions, instead, the fields of the struct were compared to the output of the inner query. Eg. if `a` is a `struct(a string, b int)`, in Spark 2.4 `a in (select (1 as a, 'a' as b) from range(1))` is a valid query, while `a in (select 1, 'a' from range(1))` is not. In previous version it was the opposite.
-
-  - In versions 2.2.1+ and 2.3, if `spark.sql.caseSensitive` is set to true, then the `CURRENT_DATE` and `CURRENT_TIMESTAMP` functions incorrectly became case-sensitive and would resolve to columns (unless typed in lower case). In Spark 2.4 this has been fixed and the functions are no longer case-sensitive.
-
-  - Since Spark 2.4, Spark will evaluate the set operations referenced in a query by following a precedence rule as per the SQL standard. If the order is not specified by parentheses, set operations are performed from left to right with the exception that all INTERSECT operations are performed before any UNION, EXCEPT or MINUS operations. The old behaviour of giving equal precedence to all the set operations are preserved under a newly added configuration `spark.sql.legacy.setopsPrecedence.enabled` with a default value of `false`. When this property is set to `true`, spark will evaluate the set operators from left to right as they appear in the query given no explicit ordering is enforced by usage of parenthesis.
-
-  - Since Spark 2.4, Spark will display table description column Last Access value as UNKNOWN when the value was Jan 01 1970.
-
-  - Since Spark 2.4, Spark maximizes the usage of a vectorized ORC reader for ORC files by default. To do that, `spark.sql.orc.impl` and `spark.sql.orc.filterPushdown` change their default values to `native` and `true` respectively. ORC files created by native ORC writer cannot be read by some old Apache Hive releases. Use `spark.sql.orc.impl=hive` to create the files shared with Hive 2.1.1 and older.
-
-  - In PySpark, when Arrow optimization is enabled, previously `toPandas` just failed when Arrow optimization is unable to be used whereas `createDataFrame` from Pandas DataFrame allowed the fallback to non-optimization. Now, both `toPandas` and `createDataFrame` from Pandas DataFrame allow the fallback by default, which can be switched off by `spark.sql.execution.arrow.fallback.enabled`.
-
-  - Since Spark 2.4, writing an empty dataframe to a directory launches at least one write task, even if physically the dataframe has no partition. This introduces a small behavior change that for self-describing file formats like Parquet and Orc, Spark creates a metadata-only file in the target directory when writing a 0-partition dataframe, so that schema inference can still work if users read that directory later. The new behavior is more reasonable and more consistent regarding writing empty dataframe.
-
-  - Since Spark 2.4, expression IDs in UDF arguments do not appear in column names. For example, a column name in Spark 2.4 is not `UDF:f(col0 AS colA#28)` but ``UDF:f(col0 AS `colA`)``.
-
-  - Since Spark 2.4, writing a dataframe with an empty or nested empty schema using any file formats (parquet, orc, json, text, csv etc.) is not allowed. An exception is thrown when attempting to write dataframes with empty schema.
-
-  - Since Spark 2.4, Spark compares a DATE type with a TIMESTAMP type after promotes both sides to TIMESTAMP. To set `false` to `spark.sql.legacy.compareDateTimestampInTimestamp` restores the previous behavior. This option will be removed in Spark 3.0.
-
-  - Since Spark 2.4, creating a managed table with nonempty location is not allowed. An exception is thrown when attempting to create a managed table with nonempty location. To set `true` to `spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation` restores the previous behavior. This option will be removed in Spark 3.0.
-
-  - Since Spark 2.4, renaming a managed table to existing location is not allowed. An exception is thrown when attempting to rename a managed table to existing location.
-
-  - Since Spark 2.4, the type coercion rules can automatically promote the argument types of the variadic SQL functions (e.g., IN/COALESCE) to the widest common type, no matter how the input arguments order. In prior Spark versions, the promotion could fail in some specific orders (e.g., TimestampType, IntegerType and StringType) and throw an exception.
-
-  - Since Spark 2.4, Spark has enabled non-cascading SQL cache invalidation in addition to the traditional cache invalidation mechanism. The non-cascading cache invalidation mechanism allows users to remove a cache without impacting its dependent caches. This new cache invalidation mechanism is used in scenarios where the data of the cache to be removed is still valid, e.g., calling unpersist() on a Dataset, or dropping a temporary view. This allows users to free up memory and keep the desired caches valid at the same time.
-
-  - In version 2.3 and earlier, Spark converts Parquet Hive tables by default but ignores table properties like `TBLPROPERTIES (parquet.compression 'NONE')`. This happens for ORC Hive table properties like `TBLPROPERTIES (orc.compress 'NONE')` in case of `spark.sql.hive.convertMetastoreOrc=true`, too. Since Spark 2.4, Spark respects Parquet/ORC specific table properties while converting Parquet/ORC Hive tables. As an example, `CREATE TABLE t(id int) STORED AS PARQUET TBLPROPERTIES (parquet.compression 'NONE')` would generate Snappy parquet files during insertion in Spark 2.3, and in Spark 2.4, the result would be uncompressed parquet files.
-
-  - Since Spark 2.0, Spark converts Parquet Hive tables by default for better performance. Since Spark 2.4, Spark converts ORC Hive tables by default, too. It means Spark uses its own ORC support by default instead of Hive SerDe. As an example, `CREATE TABLE t(id int) STORED AS ORC` would be handled with Hive SerDe in Spark 2.3, and in Spark 2.4, it would be converted into Spark's ORC data source table and ORC vectorization would be applied. To set `false` to `spark.sql.hive.convertMetastoreOrc` restores the previous behavior.
-
-  - In version 2.3 and earlier, CSV rows are considered as malformed if at least one column value in the row is malformed. CSV parser dropped such rows in the DROPMALFORMED mode or outputs an error in the FAILFAST mode. Since Spark 2.4, CSV row is considered as malformed only when it contains malformed column values requested from CSV datasource, other values can be ignored. As an example, CSV file contains the "id,name" header and one row "1234". In Spark 2.4, selection of the id column consists of a row with one column value 1234 but in Spark 2.3 and earlier it is empty in the DROPMALFORMED mode. To restore the previous behavior, set `spark.sql.csv.parser.columnPruning.enabled` to `false`.
-
-  - Since Spark 2.4, File listing for compute statistics is done in parallel by default. This can be disabled by setting `spark.sql.statistics.parallelFileListingInStatsComputation.enabled` to `False`.
-
-  - Since Spark 2.4, Metadata files (e.g. Parquet summary files) and temporary files are not counted as data files when calculating table size during Statistics computation.
-
-  - Since Spark 2.4, empty strings are saved as quoted empty strings `""`. In version 2.3 and earlier, empty strings are equal to `null` values and do not reflect to any characters in saved CSV files. For example, the row of `"a", null, "", 1` was written as `a,,,1`. Since Spark 2.4, the same row is saved as `a,,"",1`. To restore the previous behavior, set the CSV option `emptyValue` to empty (not quoted) string.
-
-  - Since Spark 2.4, The LOAD DATA command supports wildcard `?` and `*`, which match any one character, and zero or more characters, respectively. Example: `LOAD DATA INPATH '/tmp/folder*/'` or `LOAD DATA INPATH '/tmp/part-?'`. Special Characters like `space` also now work in paths. Example: `LOAD DATA INPATH '/tmp/folder name/'`.
-
-  - In Spark version 2.3 and earlier, HAVING without GROUP BY is treated as WHERE. This means, `SELECT 1 FROM range(10) HAVING true` is executed as `SELECT 1 FROM range(10) WHERE true`  and returns 10 rows. This violates SQL standard, and has been fixed in Spark 2.4. Since Spark 2.4, HAVING without GROUP BY is treated as a global aggregate, which means `SELECT 1 FROM range(10) HAVING true` will return only one row. To restore the previous behavior, set `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` to `true`.
-
-  - In version 2.3 and earlier, when reading from a Parquet data source table, Spark always returns null for any column whose column names in Hive metastore schema and Parquet schema are in different letter cases, no matter whether `spark.sql.caseSensitive` is set to `true` or `false`. Since 2.4, when `spark.sql.caseSensitive` is set to `false`, Spark does case insensitive column name resolution between Hive metastore schema and Parquet schema, so even column names are in different letter cases, Spark returns corresponding column values. An exception is thrown if there is ambiguity, i.e. more than one Parquet column is matched. This change also applies to Parquet Hive tables when `spark.sql.hive.convertMetastoreParquet` is set to `true`.
-
-## Upgrading From Spark SQL 2.3.0 to 2.3.1 and above
-
-  - As of version 2.3.1 Arrow functionality, including `pandas_udf` and `toPandas()`/`createDataFrame()` with `spark.sql.execution.arrow.enabled` set to `True`, has been marked as experimental. These are still evolving and not currently recommended for use in production.
-
-## Upgrading From Spark SQL 2.2 to 2.3
-
-  - Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the referenced columns only include the internal corrupt record column (named `_corrupt_record` by default). For example, `spark.read.schema(schema).json(file).filter($"_corrupt_record".isNotNull).count()` and `spark.read.schema(schema).json(file).select("_corrupt_record").show()`. Instead, you can cache or save the parsed results and then send the same query. For example, `val df = spark.read.schema(schema).json(file).cache()` and then `df.filter($"_corrupt_record".isNotNull).count()`.
-
-  - The `percentile_approx` function previously accepted numeric type input and output double type results. Now it supports date type, timestamp type and numeric types as input types. The result type is also changed to be the same as the input type, which is more reasonable for percentiles.
-
-  - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown.
-
-  - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below:
-    <table class="table">
-      <tr>
-        <th>
-          <b>InputA \ InputB</b>
-        </th>
-        <th>
-          <b>NullType</b>
-        </th>
-        <th>
-          <b>IntegerType</b>
-        </th>
-        <th>
-          <b>LongType</b>
-        </th>
-        <th>
-          <b>DecimalType(38,0)*</b>
-        </th>
-        <th>
-          <b>DoubleType</b>
-        </th>
-        <th>
-          <b>DateType</b>
-        </th>
-        <th>
-          <b>TimestampType</b>
-        </th>
-        <th>
-          <b>StringType</b>
-        </th>
-      </tr>
-      <tr>
-        <td>
-          <b>NullType</b>
-        </td>
-        <td>NullType</td>
-        <td>IntegerType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>DoubleType</td>
-        <td>DateType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>IntegerType</b>
-        </td>
-        <td>IntegerType</td>
-        <td>IntegerType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>LongType</b>
-        </td>
-        <td>LongType</td>
-        <td>LongType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DecimalType(38,0)*</b>
-        </td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DoubleType</b>
-        </td>
-        <td>DoubleType</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DateType</b>
-        </td>
-        <td>DateType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>DateType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>TimestampType</b>
-        </td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>TimestampType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>StringType</b>
-        </td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-    </table>
-
-    Note that, for <b>DecimalType(38,0)*</b>, the table above intentionally does not cover all other combinations of scales and precisions because currently we only infer decimal type like `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type.
-
-  - In PySpark, now we need Pandas 0.19.2 or upper if you want to use Pandas related functionalities, such as `toPandas`, `createDataFrame` from Pandas DataFrame, etc.
-
-  - In PySpark, the behavior of timestamp values for Pandas related functionalities was changed to respect session timezone. If you want to use the old behavior, you need to set a configuration `spark.sql.execution.pandas.respectSessionTimeZone` to `False`. See [SPARK-22395](https://issues.apache.org/jira/browse/SPARK-22395) for details.
-
-  - In PySpark, `na.fill()` or `fillna` also accepts boolean and replaces nulls with booleans. In prior Spark versions, PySpark just ignores it and returns the original Dataset/DataFrame.
-
-  - Since Spark 2.3, when either broadcast hash join or broadcast nested loop join is applicable, we prefer to broadcasting the table that is explicitly specified in a broadcast hint. For details, see the section [Broadcast Hint](sql-performance-tuning.html#broadcast-hint-for-sql-queries) and [SPARK-22489](https://issues.apache.org/jira/browse/SPARK-22489).
-
-  - Since Spark 2.3, when all inputs are binary, `functions.concat()` returns an output as binary. Otherwise, it returns as a string. Until Spark 2.3, it always returns as a string despite of input types. To keep the old behavior, set `spark.sql.function.concatBinaryAsString` to `true`.
-
-  - Since Spark 2.3, when all inputs are binary, SQL `elt()` returns an output as binary. Otherwise, it returns as a string. Until Spark 2.3, it always returns as a string despite of input types. To keep the old behavior, set `spark.sql.function.eltOutputAsString` to `true`.
-
- - Since Spark 2.3, by default arithmetic operations between decimals return a rounded value if an exact representation is not possible (instead of returning NULL). This is compliant with SQL ANSI 2011 specification and Hive's new behavior introduced in Hive 2.2 (HIVE-15331). This involves the following changes
-
-    - The rules to determine the result type of an arithmetic operation have been updated. In particular, if the precision / scale needed are out of the range of available values, the scale is reduced up to 6, in order to prevent the truncation of the integer part of the decimals. All the arithmetic operations are affected by the change, ie. addition (`+`), subtraction (`-`), multiplication (`*`), division (`/`), remainder (`%`) and positive module (`pmod`).
-
-    - Literal values used in SQL operations are converted to DECIMAL with the exact precision and scale needed by them.
-
-    - The configuration `spark.sql.decimalOperations.allowPrecisionLoss` has been introduced. It defaults to `true`, which means the new behavior described here; if set to `false`, Spark uses previous rules, ie. it doesn't adjust the needed scale to represent the values and it returns NULL if an exact representation of the value is not possible.
-
-  - In PySpark, `df.replace` does not allow to omit `value` when `to_replace` is not a dictionary. Previously, `value` could be omitted in the other cases and had `None` by default, which is counterintuitive and error-prone.
-
-  - Un-aliased subquery's semantic has not been well defined with confusing behaviors. Since Spark 2.3, we invalidate such confusing cases, for example: `SELECT v.i from (SELECT i FROM v)`, Spark will throw an analysis exception in this case because users should not be able to use the qualifier inside a subquery. See [SPARK-20690](https://issues.apache.org/jira/browse/SPARK-20690) and [SPARK-21335](https://issues.apache.org/jira/browse/SPARK-21335) for more details.
-
-  - When creating a `SparkSession` with `SparkSession.builder.getOrCreate()`, if there is an existing `SparkContext`, the builder was trying to update the `SparkConf` of the existing `SparkContext` with configurations specified to the builder, but the `SparkContext` is shared by all `SparkSession`s, so we should not update them. Since 2.3, the builder comes to not update the configurations. If you want to update them, you need to update them prior to creating a `SparkSession`.
-
-## Upgrading From Spark SQL 2.1 to 2.2
-
-  - Spark 2.1.1 introduced a new configuration key: `spark.sql.hive.caseSensitiveInferenceMode`. It had a default setting of `NEVER_INFER`, which kept behavior identical to 2.1.0. However, Spark 2.2.0 changes this setting's default value to `INFER_AND_SAVE` to restore compatibility with reading Hive metastore tables whose underlying file schema have mixed-case column names. With the `INFER_AND_SAVE` configuration value, on first access Spark will perform schema inference on any Hive metastore table for which it has not already saved an inferred schema. Note that schema inference can be a very time-consuming operation for tables with thousands of partitions. If compatibility with mixed-case column names is not a concern, you can safely set `spark.sql.hive.caseSensitiveInferenceMode` to `NEVER_INFER` to avoid the initial overhead of schema inference. Note that with the new default `INFER_AND_SAVE` setting, the results of the schema inference are saved as a metastore key for future use. Therefore, the initial schema inference occurs only at a table's first access.
-
-  - Since Spark 2.2.1 and 2.3.0, the schema is always inferred at runtime when the data source tables have the columns that exist in both partition schema and data schema. The inferred schema does not have the partitioned columns. When reading the table, Spark respects the partition values of these overlapping columns instead of the values stored in the data source files. In 2.2.0 and 2.1.x release, the inferred schema is partitioned but the data of the table is invisible to users (i.e., the result set is empty).
-
-  - Since Spark 2.2, view definitions are stored in a different way from prior versions. This may cause Spark unable to read views created by prior versions. In such cases, you need to recreate the views using `ALTER VIEW AS` or `CREATE OR REPLACE VIEW AS` with newer Spark versions.
-
-## Upgrading From Spark SQL 2.0 to 2.1
-
- - Datasource tables now store partition metadata in the Hive metastore. This means that Hive DDLs such as `ALTER TABLE PARTITION ... SET LOCATION` are now available for tables created with the Datasource API.
-
-    - Legacy datasource tables can be migrated to this format via the `MSCK REPAIR TABLE` command. Migrating legacy tables is recommended to take advantage of Hive DDL support and improved planning performance.
-
-    - To determine if a table has been migrated, look for the `PartitionProvider: Catalog` attribute when issuing `DESCRIBE FORMATTED` on the table.
- - Changes to `INSERT OVERWRITE TABLE ... PARTITION ...` behavior for Datasource tables.
-
-    - In prior Spark versions `INSERT OVERWRITE` overwrote the entire Datasource table, even when given a partition specification. Now only partitions matching the specification are overwritten.
-
-    - Note that this still differs from the behavior of Hive tables, which is to overwrite only partitions overlapping with newly inserted data.
-
-## Upgrading From Spark SQL 1.6 to 2.0
-
- - `SparkSession` is now the new entry point of Spark that replaces the old `SQLContext` and
-
-   `HiveContext`. Note that the old SQLContext and HiveContext are kept for backward compatibility. A new `catalog` interface is accessible from `SparkSession` - existing API on databases and tables access such as `listTables`, `createExternalTable`, `dropTempView`, `cacheTable` are moved here.
-
- - Dataset API and DataFrame API are unified. In Scala, `DataFrame` becomes a type alias for
-   `Dataset[Row]`, while Java API users must replace `DataFrame` with `Dataset<Row>`. Both the typed
-   transformations (e.g., `map`, `filter`, and `groupByKey`) and untyped transformations (e.g.,
-   `select` and `groupBy`) are available on the Dataset class. Since compile-time type-safety in
-   Python and R is not a language feature, the concept of Dataset does not apply to these languages’
-   APIs. Instead, `DataFrame` remains the primary programming abstraction, which is analogous to the
-   single-node data frame notion in these languages.
-
- - Dataset and DataFrame API `unionAll` has been deprecated and replaced by `union`
-
- - Dataset and DataFrame API `explode` has been deprecated, alternatively, use `functions.explode()` with `select` or `flatMap`
-
- - Dataset and DataFrame API `registerTempTable` has been deprecated and replaced by `createOrReplaceTempView`
-
- - Changes to `CREATE TABLE ... LOCATION` behavior for Hive tables.
-
-    - From Spark 2.0, `CREATE TABLE ... LOCATION` is equivalent to `CREATE EXTERNAL TABLE ... LOCATION`
-      in order to prevent accidental dropping the existing data in the user-provided locations.
-      That means, a Hive table created in Spark SQL with the user-specified location is always a Hive external table.
-      Dropping external tables will not remove the data. Users are not allowed to specify the location for Hive managed tables.
-      Note that this is different from the Hive behavior.
-
-    - As a result, `DROP TABLE` statements on those tables will not remove the data.
-
- - `spark.sql.parquet.cacheMetadata` is no longer used.
-   See [SPARK-13664](https://issues.apache.org/jira/browse/SPARK-13664) for details.
-
-## Upgrading From Spark SQL 1.5 to 1.6
-
- - From Spark 1.6, by default, the Thrift server runs in multi-session mode. Which means each JDBC/ODBC
-   connection owns a copy of their own SQL configuration and temporary function registry. Cached
-   tables are still shared though. If you prefer to run the Thrift server in the old single-session
-   mode, please set option `spark.sql.hive.thriftServer.singleSession` to `true`. You may either add
-   this option to `spark-defaults.conf`, or pass it to `start-thriftserver.sh` via `--conf`:
-
-   {% highlight bash %}
-   ./sbin/start-thriftserver.sh \
-     --conf spark.sql.hive.thriftServer.singleSession=true \
-     ...
-   {% endhighlight %}
-
- - Since 1.6.1, withColumn method in sparkR supports adding a new column to or replacing existing columns
-   of the same name of a DataFrame.
-
- - From Spark 1.6, LongType casts to TimestampType expect seconds instead of microseconds. This
-   change was made to match the behavior of Hive 1.2 for more consistent type casting to TimestampType
-   from numeric types. See [SPARK-11724](https://issues.apache.org/jira/browse/SPARK-11724) for
-   details.
-
-## Upgrading From Spark SQL 1.4 to 1.5
-
- - Optimized execution using manually managed memory (Tungsten) is now enabled by default, along with
-   code generation for expression evaluation. These features can both be disabled by setting
-   `spark.sql.tungsten.enabled` to `false`.
-
- - Parquet schema merging is no longer enabled by default. It can be re-enabled by setting
-   `spark.sql.parquet.mergeSchema` to `true`.
-
- - Resolution of strings to columns in python now supports using dots (`.`) to qualify the column or
-   access nested values. For example `df['table.column.nestedField']`. However, this means that if
-   your column name contains any dots you must now escape them using backticks (e.g., ``table.`column.with.dots`.nested``).
-
- - In-memory columnar storage partition pruning is on by default. It can be disabled by setting
-   `spark.sql.inMemoryColumnarStorage.partitionPruning` to `false`.
-
- - Unlimited precision decimal columns are no longer supported, instead Spark SQL enforces a maximum
-   precision of 38. When inferring schema from `BigDecimal` objects, a precision of (38, 18) is now
-   used. When no precision is specified in DDL then the default remains `Decimal(10, 0)`.
-
- - Timestamps are now stored at a precision of 1us, rather than 1ns
-
- - In the `sql` dialect, floating point numbers are now parsed as decimal. HiveQL parsing remains
-   unchanged.
-
- - The canonical name of SQL/DataFrame functions are now lower case (e.g., sum vs SUM).
-
- - JSON data source will not automatically load new files that are created by other applications
-   (i.e. files that are not inserted to the dataset through Spark SQL).
-   For a JSON persistent table (i.e. the metadata of the table is stored in Hive Metastore),
-   users can use `REFRESH TABLE` SQL command or `HiveContext`'s `refreshTable` method
-   to include those new files to the table. For a DataFrame representing a JSON dataset, users need to recreate
-   the DataFrame and the new DataFrame will include new files.
-
- - DataFrame.withColumn method in pySpark supports adding a new column or replacing existing columns of the same name.
-
-## Upgrading from Spark SQL 1.3 to 1.4
-
-#### DataFrame data reader/writer interface
-
-Based on user feedback, we created a new, more fluid API for reading data in (`SQLContext.read`)
-and writing data out (`DataFrame.write`),
-and deprecated the old APIs (e.g., `SQLContext.parquetFile`, `SQLContext.jsonFile`).
-
-See the API docs for `SQLContext.read` (
-  <a href="api/scala/index.html#org.apache.spark.sql.SQLContext@read:DataFrameReader">Scala</a>,
-  <a href="api/java/org/apache/spark/sql/SQLContext.html#read()">Java</a>,
-  <a href="api/python/pyspark.sql.html#pyspark.sql.SQLContext.read">Python</a>
-) and `DataFrame.write` (
-  <a href="api/scala/index.html#org.apache.spark.sql.DataFrame@write:DataFrameWriter">Scala</a>,
-  <a href="api/java/org/apache/spark/sql/Dataset.html#write()">Java</a>,
-  <a href="api/python/pyspark.sql.html#pyspark.sql.DataFrame.write">Python</a>
-) more information.
-
-
-#### DataFrame.groupBy retains grouping columns
-
-Based on user feedback, we changed the default behavior of `DataFrame.groupBy().agg()` to retain the
-grouping columns in the resulting `DataFrame`. To keep the behavior in 1.3, set `spark.sql.retainGroupColumns` to `false`.
-
-<div class="codetabs">
-<div data-lang="scala"  markdown="1">
-{% highlight scala %}
-
-// In 1.3.x, in order for the grouping column "department" to show up,
-// it must be included explicitly as part of the agg function call.
-df.groupBy("department").agg($"department", max("age"), sum("expense"))
-
-// In 1.4+, grouping column "department" is included automatically.
-df.groupBy("department").agg(max("age"), sum("expense"))
-
-// Revert to 1.3 behavior (not retaining grouping column) by:
-sqlContext.setConf("spark.sql.retainGroupColumns", "false")
-
-{% endhighlight %}
-</div>
-
-<div data-lang="java"  markdown="1">
-{% highlight java %}
-
-// In 1.3.x, in order for the grouping column "department" to show up,
-// it must be included explicitly as part of the agg function call.
-df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
-
-// In 1.4+, grouping column "department" is included automatically.
-df.groupBy("department").agg(max("age"), sum("expense"));
-
-// Revert to 1.3 behavior (not retaining grouping column) by:
-sqlContext.setConf("spark.sql.retainGroupColumns", "false");
-
-{% endhighlight %}
-</div>
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-
-import pyspark.sql.functions as func
-
-# In 1.3.x, in order for the grouping column "department" to show up,
-# it must be included explicitly as part of the agg function call.
-df.groupBy("department").agg(df["department"], func.max("age"), func.sum("expense"))
-
-# In 1.4+, grouping column "department" is included automatically.
-df.groupBy("department").agg(func.max("age"), func.sum("expense"))
-
-# Revert to 1.3.x behavior (not retaining grouping column) by:
-sqlContext.setConf("spark.sql.retainGroupColumns", "false")
-
-{% endhighlight %}
-</div>
-
-</div>
-
-
-#### Behavior change on DataFrame.withColumn
-
-Prior to 1.4, DataFrame.withColumn() supports adding a column only. The column will always be added
-as a new column with its specified name in the result DataFrame even if there may be any existing
-columns of the same name. Since 1.4, DataFrame.withColumn() supports adding a column of a different
-name from names of all existing columns or replacing existing columns of the same name.
-
-Note that this change is only for Scala API, not for PySpark and SparkR.
-
-
-## Upgrading from Spark SQL 1.0-1.2 to 1.3
-
-In Spark 1.3 we removed the "Alpha" label from Spark SQL and as part of this did a cleanup of the
-available APIs. From Spark 1.3 onwards, Spark SQL will provide binary compatibility with other
-releases in the 1.X series. This compatibility guarantee excludes APIs that are explicitly marked
-as unstable (i.e., DeveloperAPI or Experimental).
-
-#### Rename of SchemaRDD to DataFrame
-
-The largest change that users will notice when upgrading to Spark SQL 1.3 is that `SchemaRDD` has
-been renamed to `DataFrame`. This is primarily because DataFrames no longer inherit from RDD
-directly, but instead provide most of the functionality that RDDs provide though their own
-implementation. DataFrames can still be converted to RDDs by calling the `.rdd` method.
-
-In Scala, there is a type alias from `SchemaRDD` to `DataFrame` to provide source compatibility for
-some use cases. It is still recommended that users update their code to use `DataFrame` instead.
-Java and Python users will need to update their code.
-
-#### Unification of the Java and Scala APIs
-
-Prior to Spark 1.3 there were separate Java compatible classes (`JavaSQLContext` and `JavaSchemaRDD`)
-that mirrored the Scala API. In Spark 1.3 the Java API and Scala API have been unified. Users
-of either language should use `SQLContext` and `DataFrame`. In general these classes try to
-use types that are usable from both languages (i.e. `Array` instead of language-specific collections).
-In some cases where no common type exists (e.g., for passing in closures or Maps) function overloading
-is used instead.
-
-Additionally, the Java specific types API has been removed. Users of both Scala and Java should
-use the classes present in `org.apache.spark.sql.types` to describe schema programmatically.
-
-
-#### Isolation of Implicit Conversions and Removal of dsl Package (Scala-only)
-
-Many of the code examples prior to Spark 1.3 started with `import sqlContext._`, which brought
-all of the functions from sqlContext into scope. In Spark 1.3 we have isolated the implicit
-conversions for converting `RDD`s into `DataFrame`s into an object inside of the `SQLContext`.
-Users should now write `import sqlContext.implicits._`.
-
-Additionally, the implicit conversions now only augment RDDs that are composed of `Product`s (i.e.,
-case classes or tuples) with a method `toDF`, instead of applying automatically.
-
-When using function inside of the DSL (now replaced with the `DataFrame` API) users used to import
-`org.apache.spark.sql.catalyst.dsl`. Instead the public dataframe functions API should be used:
-`import org.apache.spark.sql.functions._`.
-
-#### Removal of the type aliases in org.apache.spark.sql for DataType (Scala-only)
-
-Spark 1.3 removes the type aliases that were present in the base sql package for `DataType`. Users
-should instead import the classes in `org.apache.spark.sql.types`
-
-#### UDF Registration Moved to `sqlContext.udf` (Java & Scala)
-
-Functions that are used to register UDFs, either for use in the DataFrame DSL or SQL, have been
-moved into the udf object in `SQLContext`.
-
-<div class="codetabs">
-<div data-lang="scala"  markdown="1">
-{% highlight scala %}
-
-sqlContext.udf.register("strLen", (s: String) => s.length())
-
-{% endhighlight %}
-</div>
-
-<div data-lang="java"  markdown="1">
-{% highlight java %}
-
-sqlContext.udf().register("strLen", (String s) -> s.length(), DataTypes.IntegerType);
-
-{% endhighlight %}
-</div>
-
-</div>
-
-Python UDF registration is unchanged.
-
-#### Python DataTypes No Longer Singletons
-
-When using DataTypes in Python you will need to construct them (i.e. `StringType()`) instead of
-referencing a singleton.
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 4c23147106b65..b52a57acdd7bc 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Migration Guide
-displayTitle: Migration Guide
+title: "Migration Guide: SQL, Datasets and DataFrame"
+displayTitle: "Migration Guide: SQL, Datasets and DataFrame"
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,20 +19,906 @@ license: |
   limitations under the License.
 ---
 
-* [Spark SQL Upgrading Guide](sql-migration-guide-upgrade.html)
-  * [Upgrading From Spark SQL 2.4 to 3.0](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-24-to-30)
-  * [Upgrading From Spark SQL 2.3 to 2.4](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-23-to-24)
-  * [Upgrading From Spark SQL 2.3.0 to 2.3.1 and above](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-230-to-231-and-above)
-  * [Upgrading From Spark SQL 2.2 to 2.3](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-22-to-23)
-  * [Upgrading From Spark SQL 2.1 to 2.2](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-21-to-22)
-  * [Upgrading From Spark SQL 2.0 to 2.1](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-20-to-21)
-  * [Upgrading From Spark SQL 1.6 to 2.0](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-16-to-20)
-  * [Upgrading From Spark SQL 1.5 to 1.6](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-15-to-16)
-  * [Upgrading From Spark SQL 1.4 to 1.5](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-14-to-15)
-  * [Upgrading from Spark SQL 1.3 to 1.4](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-13-to-14)
-  * [Upgrading from Spark SQL 1.0-1.2 to 1.3](sql-migration-guide-upgrade.html#upgrading-from-spark-sql-10-12-to-13)
-* [Compatibility with Apache Hive](sql-migration-guide-hive-compatibility.html)
-  * [Deploying in Existing Hive Warehouses](sql-migration-guide-hive-compatibility.html#deploying-in-existing-hive-warehouses)
-  * [Supported Hive Features](sql-migration-guide-hive-compatibility.html#supported-hive-features)
-  * [Unsupported Hive Functionality](sql-migration-guide-hive-compatibility.html#unsupported-hive-functionality)
-  * [Incompatible Hive UDF](sql-migration-guide-hive-compatibility.html#incompatible-hive-udf)
+* Table of contents
+{:toc}
+
+## Upgrading from Spark SQL 2.4 to 3.0
+
+  - In Spark 3.0, the deprecated methods `SQLContext.createExternalTable` and `SparkSession.createExternalTable` have been removed in favor of its replacement, `createTable`.
+
+  - In Spark 3.0, the deprecated `HiveContext` class has been removed. Use `SparkSession.builder.enableHiveSupport()` instead.
+
+  - Since Spark 3.0, configuration `spark.sql.crossJoin.enabled` become internal configuration, and is true by default, so by default spark won't raise exception on sql with implicit cross join.
+
+  - Since Spark 3.0, we reversed argument order of the trim function from `TRIM(trimStr, str)` to `TRIM(str, trimStr)` to be compatible with other databases.
+
+  - In Spark version 2.4 and earlier, SQL queries such as `FROM <table>` or `FROM <table> UNION ALL FROM <table>` are supported by accident. In hive-style `FROM <table> SELECT <expr>`, the `SELECT` clause is not negligible. Neither Hive nor Presto support this syntax. Therefore we will treat these queries as invalid since Spark 3.0.
+
+  - Since Spark 3.0, the Dataset and DataFrame API `unionAll` is not deprecated any more. It is an alias for `union`.
+
+  - In Spark version 2.4 and earlier, the parser of JSON data source treats empty strings as null for some data types such as `IntegerType`. For `FloatType` and `DoubleType`, it fails on empty strings and throws exceptions. Since Spark 3.0, we disallow empty strings and will throw exceptions for data types except for `StringType` and `BinaryType`.
+
+  - Since Spark 3.0, the `from_json` functions supports two modes - `PERMISSIVE` and `FAILFAST`. The modes can be set via the `mode` option. The default mode became `PERMISSIVE`. In previous versions, behavior of `from_json` did not conform to either `PERMISSIVE` nor `FAILFAST`, especially in processing of malformed JSON records. For example, the JSON string `{"a" 1}` with the schema `a INT` is converted to `null` by previous versions but Spark 3.0 converts it to `Row(null)`.
+
+  - The `ADD JAR` command previously returned a result set with the single value 0. It now returns an empty result set.
+
+  - In Spark version 2.4 and earlier, users can create map values with map type key via built-in function like `CreateMap`, `MapFromArrays`, etc. Since Spark 3.0, it's not allowed to create map values with map type key with these built-in functions. Users can still read map values with map type key from data source or Java/Scala collections, though they are not very useful.
+
+  - In Spark version 2.4 and earlier, `Dataset.groupByKey` results to a grouped dataset with key attribute wrongly named as "value", if the key is non-struct type, e.g. int, string, array, etc. This is counterintuitive and makes the schema of aggregation queries weird. For example, the schema of `ds.groupByKey(...).count()` is `(value, count)`. Since Spark 3.0, we name the grouping attribute to "key". The old behaviour is preserved under a newly added configuration `spark.sql.legacy.dataset.nameNonStructGroupingKeyAsValue` with a default value of `false`.
+
+  - In Spark version 2.4 and earlier, float/double -0.0 is semantically equal to 0.0, but -0.0 and 0.0 are considered as different values when used in aggregate grouping keys, window partition keys and join keys. Since Spark 3.0, this bug is fixed. For example, `Seq(-0.0, 0.0).toDF("d").groupBy("d").count()` returns `[(0.0, 2)]` in Spark 3.0, and `[(0.0, 1), (-0.0, 1)]` in Spark 2.4 and earlier.
+
+  - In Spark version 2.4 and earlier, users can create a map with duplicated keys via built-in functions like `CreateMap`, `StringToMap`, etc. The behavior of map with duplicated keys is undefined, e.g. map look up respects the duplicated key appears first, `Dataset.collect` only keeps the duplicated key appears last, `MapKeys` returns duplicated keys, etc. Since Spark 3.0, these built-in functions will remove duplicated map keys with last wins policy. Users may still read map values with duplicated keys from data sources which do not enforce it (e.g. Parquet), the behavior will be undefined.
+
+  - In Spark version 2.4 and earlier, partition column value is converted as null if it can't be casted to corresponding user provided schema. Since 3.0, partition column value is validated with user provided schema. An exception is thrown if the validation fails. You can disable such validation by setting `spark.sql.sources.validatePartitionColumns` to `false`.
+
+  - In Spark version 2.4 and earlier, the `SET` command works without any warnings even if the specified key is for `SparkConf` entries and it has no effect because the command does not update `SparkConf`, but the behavior might confuse users. Since 3.0, the command fails if a `SparkConf` key is used. You can disable such a check by setting `spark.sql.legacy.setCommandRejectsSparkCoreConfs` to `false`.
+
+  - In Spark version 2.4 and earlier, CSV datasource converts a malformed CSV string to a row with all `null`s in the PERMISSIVE mode. Since Spark 3.0, the returned row can contain non-`null` fields if some of CSV column values were parsed and converted to desired types successfully.
+
+  - In Spark version 2.4 and earlier, JSON datasource and JSON functions like `from_json` convert a bad JSON record to a row with all `null`s in the PERMISSIVE mode when specified schema is `StructType`. Since Spark 3.0, the returned row can contain non-`null` fields if some of JSON column values were parsed and converted to desired types successfully.
+
+  - Refreshing a cached table would trigger a table uncache operation and then a table cache (lazily) operation. In Spark version 2.4 and earlier, the cache name and storage level are not preserved before the uncache operation. Therefore, the cache name and storage level could be changed unexpectedly. Since Spark 3.0, cache name and storage level will be first preserved for cache recreation. It helps to maintain a consistent cache behavior upon table refreshing.
+
+  - Since Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Set JSON option `inferTimestamp` to `false` to disable such type inferring.
+
+  - In Spark version 2.4 and earlier, if `org.apache.spark.sql.functions.udf(Any, DataType)` gets a Scala closure with primitive-type argument, the returned UDF will return null if the input values is null. Since Spark 3.0, the UDF will return the default value of the Java type if the input value is null. For example, `val f = udf((x: Int) => x, IntegerType)`, `f($"x")` will return null in Spark 2.4 and earlier if column `x` is null, and return 0 in Spark 3.0. This behavior change is introduced because Spark 3.0 is built with Scala 2.12 by default.
+
+  - Since Spark 3.0, Proleptic Gregorian calendar is used in parsing, formatting, and converting dates and timestamps as well as in extracting sub-components like years, days and etc. Spark 3.0 uses Java 8 API classes from the java.time packages that based on ISO chronology (https://docs.oracle.com/javase/8/docs/api/java/time/chrono/IsoChronology.html). In Spark version 2.4 and earlier, those operations are performed by using the hybrid calendar (Julian + Gregorian, see https://docs.oracle.com/javase/7/docs/api/java/util/GregorianCalendar.html). The changes impact on the results for dates before October 15, 1582 (Gregorian) and affect on the following Spark 3.0 API:
+
+    - CSV/JSON datasources use java.time API for parsing and generating CSV/JSON content. In Spark version 2.4 and earlier, java.text.SimpleDateFormat is used for the same purpose with fallbacks to the parsing mechanisms of Spark 2.0 and 1.x. For example, `2018-12-08 10:39:21.123` with the pattern `yyyy-MM-dd'T'HH:mm:ss.SSS` cannot be parsed since Spark 3.0 because the timestamp does not match to the pattern but it can be parsed by earlier Spark versions due to a fallback to `Timestamp.valueOf`. To parse the same timestamp since Spark 3.0, the pattern should be `yyyy-MM-dd HH:mm:ss.SSS`.
+
+    - The `unix_timestamp`, `date_format`, `to_unix_timestamp`, `from_unixtime`, `to_date`, `to_timestamp` functions. New implementation supports pattern formats as described here https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html and performs strict checking of its input. For example, the `2015-07-22 10:00:00` timestamp cannot be parse if pattern is `yyyy-MM-dd` because the parser does not consume whole input. Another example is the `31/01/2015 00:00` input cannot be parsed by the `dd/MM/yyyy hh:mm` pattern because `hh` supposes hours in the range `1-12`.
+
+    - The `weekofyear`, `weekday`, `dayofweek`, `date_trunc`, `from_utc_timestamp`, `to_utc_timestamp`, and `unix_timestamp` functions use java.time API for calculation week number of year, day number of week as well for conversion from/to TimestampType values in UTC time zone.
+
+    - the JDBC options `lowerBound` and `upperBound` are converted to TimestampType/DateType values in the same way as casting strings to TimestampType/DateType values. The conversion is based on Proleptic Gregorian calendar, and time zone defined by the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion is based on the hybrid calendar (Julian + Gregorian) and on default system time zone.
+
+    - Formatting of `TIMESTAMP` and `DATE` literals.
+
+  - In Spark version 2.4 and earlier, invalid time zone ids are silently ignored and replaced by GMT time zone, for example, in the from_utc_timestamp function. Since Spark 3.0, such time zone ids are rejected, and Spark throws `java.time.DateTimeException`.
+
+  - In Spark version 2.4 and earlier, the `current_timestamp` function returns a timestamp with millisecond resolution only. Since Spark 3.0, the function can return the result with microsecond resolution if the underlying clock available on the system offers such resolution.
+
+  - In Spark version 2.4 and earlier, when reading a Hive Serde table with Spark native data sources(parquet/orc), Spark will infer the actual file schema and update the table schema in metastore. Since Spark 3.0, Spark doesn't infer the schema anymore. This should not cause any problems to end users, but if it does, please set `spark.sql.hive.caseSensitiveInferenceMode` to `INFER_AND_SAVE`.
+
+  - Since Spark 3.0, `TIMESTAMP` literals are converted to strings using the SQL config `spark.sql.session.timeZone`. In Spark version 2.4 and earlier, the conversion uses the default time zone of the Java virtual machine.
+
+  - In Spark version 2.4, when a spark session is created via `cloneSession()`, the newly created spark session inherits its configuration from its parent `SparkContext` even though the same configuration may exist with a different value in its parent spark session. Since Spark 3.0, the configurations of a parent `SparkSession` have a higher precedence over the parent `SparkContext`. The old behavior can be restored by setting `spark.sql.legacy.sessionInitWithConfigDefaults` to `true`.
+
+  - Since Spark 3.0, parquet logical type `TIMESTAMP_MICROS` is used by default while saving `TIMESTAMP` columns. In Spark version 2.4 and earlier, `TIMESTAMP` columns are saved as `INT96` in parquet files. To set `INT96` to `spark.sql.parquet.outputTimestampType` restores the previous behavior.
+
+  - Since Spark 3.0, if `hive.default.fileformat` is not found in `Spark SQL configuration` then it will fallback to hive-site.xml present in the `Hadoop configuration` of `SparkContext`.
+
+  - Since Spark 3.0, Spark will cast `String` to `Date/TimeStamp` in binary comparisons with dates/timestamps. The previous behaviour of casting `Date/Timestamp` to `String` can be restored by setting `spark.sql.legacy.typeCoercion.datetimeToString` to `true`.
+
+  - Since Spark 3.0, when Avro files are written with user provided schema, the fields will be matched by field names between catalyst schema and avro schema instead of positions.
+
+  - Since Spark 3.0, when Avro files are written with user provided non-nullable schema, even the catalyst schema is nullable, Spark is still able to write the files. However, Spark will throw runtime NPE if any of the records contains null.
+
+  - Since Spark 3.0, we use a new protocol for fetching shuffle blocks, for external shuffle service users, we need to upgrade the server correspondingly. Otherwise, we'll get the error message `UnsupportedOperationException: Unexpected message: FetchShuffleBlocks`. If it is hard to upgrade the shuffle service right now, you can still use the old protocol by setting `spark.shuffle.useOldFetchProtocol` to `true`.
+
+  - Since Spark 3.0, a higher-order function `exists` follows the three-valued boolean logic, i.e., if the `predicate` returns any `null`s and no `true` is obtained, then `exists` will return `null` instead of `false`. For example, `exists(array(1, null, 3), x -> x % 2 == 0)` will be `null`. The previous behaviour can be restored by setting `spark.sql.legacy.arrayExistsFollowsThreeValuedLogic` to `false`.
+
+  - Since Spark 3.0, if files or subdirectories disappear during recursive directory listing (i.e. they appear in an intermediate listing but then cannot be read or listed during later phases of the recursive directory listing, due to either concurrent file deletions or object store consistency issues) then the listing will fail with an exception unless `spark.sql.files.ignoreMissingFiles` is `true` (default `false`). In previous versions, these missing files or subdirectories would be ignored. Note that this change of behavior only applies during initial table file listing (or during `REFRESH TABLE`), not during query execution: the net change is that `spark.sql.files.ignoreMissingFiles` is now obeyed during table file listing / query planning, not only at query execution time.
+
+  - Since Spark 3.0, substitution order of nested WITH clauses is changed and an inner CTE definition takes precedence over an outer. In version 2.4 and earlier, `WITH t AS (SELECT 1), t2 AS (WITH t AS (SELECT 2) SELECT * FROM t) SELECT * FROM t2` returns `1` while in version 3.0 it returns `2`. The previous behaviour can be restored by setting `spark.sql.legacy.ctePrecedence.enabled` to `true`.
+
+  - Since Spark 3.0, the `add_months` function does not adjust the resulting date to a last day of month if the original date is a last day of months. For example, `select add_months(DATE'2019-02-28', 1)` results `2019-03-28`. In Spark version 2.4 and earlier, the resulting date is adjusted when the original date is a last day of months. For example, adding a month to `2019-02-28` results in `2019-03-31`.
+
+  - Since Spark 3.0, 0-argument Java UDF is executed in the executor side identically with other UDFs. In Spark version 2.4 and earlier, 0-argument Java UDF alone was executed in the driver side, and the result was propagated to executors, which might be more performant in some cases but caused inconsistency with a correctness issue in some cases.
+
+  - The result of `java.lang.Math`'s `log`, `log1p`, `exp`, `expm1`, and `pow` may vary across platforms. In Spark 3.0, the result of the equivalent SQL functions (including related SQL functions like `LOG10`) return values consistent with `java.lang.StrictMath`. In virtually all cases this makes no difference in the return value, and the difference is very small, but may not exactly match `java.lang.Math` on x86 platforms in cases like, for example, `log(3.0)`, whose value varies between `Math.log()` and `StrictMath.log()`.
+
+  - Since Spark 3.0, Dataset query fails if it contains ambiguous column reference that is caused by self join. A typical example: `val df1 = ...; val df2 = df1.filter(...);`, then `df1.join(df2, df1("a") > df2("a"))` returns an empty result which is quite confusing. This is because Spark cannot resolve Dataset column references that point to tables being self joined, and `df1("a")` is exactly the same as `df2("a")` in Spark. To restore the behavior before Spark 3.0, you can set `spark.sql.analyzer.failAmbiguousSelfJoin` to `false`.
+
+  - Since Spark 3.0, `Cast` function processes string literals such as 'Infinity', '+Infinity', '-Infinity', 'NaN', 'Inf', '+Inf', '-Inf' in case insensitive manner when casting the literals to `Double` or `Float` type to ensure greater compatibility with other database systems. This behaviour change is illustrated in the table below:
+    <table class="table">
+        <tr>
+          <th>
+            <b>Operation</b>
+          </th>
+          <th>
+            <b>Result prior to Spark 3.0</b>
+          </th>
+          <th>
+            <b>Result starting Spark 3.0</b>
+          </th>
+        </tr>
+        <tr>
+          <td>
+            CAST('infinity' AS DOUBLE)<br>
+            CAST('+infinity' AS DOUBLE)<br>
+            CAST('inf' AS DOUBLE)<br>
+            CAST('+inf' AS DOUBLE)<br>
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Double.PositiveInfinity
+          </td>
+        </tr>
+        <tr>
+          <td>
+            CAST('-infinity' AS DOUBLE)<br>
+            CAST('-inf' AS DOUBLE)<br>
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Double.NegativeInfinity
+          </td>
+        </tr>
+        <tr>
+          <td>
+            CAST('infinity' AS FLOAT)<br>
+            CAST('+infinity' AS FLOAT)<br>
+            CAST('inf' AS FLOAT)<br>
+            CAST('+inf' AS FLOAT)<br>
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Float.PositiveInfinity
+          </td>
+        </tr>
+        <tr>
+          <td>
+            CAST('-infinity' AS FLOAT)<br>
+            CAST('-inf' AS FLOAT)<br>
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Float.NegativeInfinity
+          </td>
+        </tr>
+        <tr>
+          <td>
+            CAST('nan' AS DOUBLE)
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Double.NaN
+          </td>
+        </tr>
+        <tr>
+          <td>
+            CAST('nan' AS FLOAT)
+          </td>
+          <td>
+            NULL
+          </td>
+          <td>
+            Float.NaN
+          </td>
+        </tr>
+    </table>
+
+  - Since Spark 3.0, special values are supported in conversion from strings to dates and timestamps. Those values are simply notational shorthands that will be converted to ordinary date or timestamp values when read. The following string values are supported for dates:
+    - `epoch [zoneId]` - 1970-01-01
+    - `today [zoneId]` - the current date in the time zone specified by `spark.sql.session.timeZone`
+    - `yesterday [zoneId]` - the current date - 1
+    - `tomorrow [zoneId]` - the current date + 1
+    - `now` - the date of running the current query. It has the same notion as today
+  For example `SELECT date 'tomorrow' - date 'yesterday';` should output `2`. Here are special timestamp values:
+    - `epoch [zoneId]` - 1970-01-01 00:00:00+00 (Unix system time zero)
+    - `today [zoneId]` - midnight today
+    - `yesterday [zoneId]` - midnight yesterday
+    - `tomorrow [zoneId]` - midnight tomorrow
+    - `now` - current query start time
+  For example `SELECT timestamp 'tomorrow';`. 
+
+## Upgrading from Spark SQL 2.4 to 2.4.1
+
+  - The value of `spark.executor.heartbeatInterval`, when specified without units like "30" rather than "30s", was
+    inconsistently interpreted as both seconds and milliseconds in Spark 2.4.0 in different parts of the code.
+    Unitless values are now consistently interpreted as milliseconds. Applications that set values like "30"
+    need to specify a value with units like "30s" now, to avoid being interpreted as milliseconds; otherwise,
+    the extremely short interval that results will likely cause applications to fail.
+
+  - When turning a Dataset to another Dataset, Spark will up cast the fields in the original Dataset to the type of corresponding fields in the target DataSet. In version 2.4 and earlier, this up cast is not very strict, e.g. `Seq("str").toDS.as[Int]` fails, but `Seq("str").toDS.as[Boolean]` works and throw NPE during execution. In Spark 3.0, the up cast is stricter and turning String into something else is not allowed, i.e. `Seq("str").toDS.as[Boolean]` will fail during analysis.
+
+## Upgrading from Spark SQL 2.3 to 2.4
+
+  - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
+    <table class="table">
+        <tr>
+          <th>
+            <b>Query</b>
+          </th>
+          <th>
+            <b>Spark 2.3 or Prior</b>
+          </th>
+          <th>
+            <b>Spark 2.4</b>
+          </th>
+          <th>
+            <b>Remarks</b>
+          </th>
+        </tr>
+        <tr>
+          <td>
+            <code>SELECT array_contains(array(1), 1.34D);</code>
+          </td>
+          <td>
+            <code>true</code>
+          </td>
+          <td>
+            <code>false</code>
+          </td>
+          <td>
+            In Spark 2.4, left and right parameters are promoted to array type of double type and double type respectively.
+          </td>
+        </tr>
+        <tr>
+          <td>
+            <code>SELECT array_contains(array(1), '1');</code>
+          </td>
+          <td>
+            <code>true</code>
+          </td>
+          <td>
+            <code>AnalysisException</code> is thrown.
+          </td>
+          <td>
+            Explicit cast can be used in arguments to avoid the exception. In Spark 2.4, <code>AnalysisException</code> is thrown since integer type can not be promoted to string type in a loss-less manner.
+          </td>
+        </tr>
+        <tr>
+          <td>
+            <code>SELECT array_contains(array(1), 'anystring');</code>
+          </td>
+          <td>
+            <code>null</code>
+          </td>
+          <td>
+            <code>AnalysisException</code> is thrown.
+          </td>
+          <td>
+            Explicit cast can be used in arguments to avoid the exception. In Spark 2.4, <code>AnalysisException</code> is thrown since integer type can not be promoted to string type in a loss-less manner.
+          </td>
+        </tr>
+    </table>
+
+  - Since Spark 2.4, when there is a struct field in front of the IN operator before a subquery, the inner query must contain a struct field as well. In previous versions, instead, the fields of the struct were compared to the output of the inner query. Eg. if `a` is a `struct(a string, b int)`, in Spark 2.4 `a in (select (1 as a, 'a' as b) from range(1))` is a valid query, while `a in (select 1, 'a' from range(1))` is not. In previous version it was the opposite.
+
+  - In versions 2.2.1+ and 2.3, if `spark.sql.caseSensitive` is set to true, then the `CURRENT_DATE` and `CURRENT_TIMESTAMP` functions incorrectly became case-sensitive and would resolve to columns (unless typed in lower case). In Spark 2.4 this has been fixed and the functions are no longer case-sensitive.
+
+  - Since Spark 2.4, Spark will evaluate the set operations referenced in a query by following a precedence rule as per the SQL standard. If the order is not specified by parentheses, set operations are performed from left to right with the exception that all INTERSECT operations are performed before any UNION, EXCEPT or MINUS operations. The old behaviour of giving equal precedence to all the set operations are preserved under a newly added configuration `spark.sql.legacy.setopsPrecedence.enabled` with a default value of `false`. When this property is set to `true`, spark will evaluate the set operators from left to right as they appear in the query given no explicit ordering is enforced by usage of parenthesis.
+
+  - Since Spark 2.4, Spark will display table description column Last Access value as UNKNOWN when the value was Jan 01 1970.
+
+  - Since Spark 2.4, Spark maximizes the usage of a vectorized ORC reader for ORC files by default. To do that, `spark.sql.orc.impl` and `spark.sql.orc.filterPushdown` change their default values to `native` and `true` respectively. ORC files created by native ORC writer cannot be read by some old Apache Hive releases. Use `spark.sql.orc.impl=hive` to create the files shared with Hive 2.1.1 and older.
+
+  - Since Spark 2.4, writing an empty dataframe to a directory launches at least one write task, even if physically the dataframe has no partition. This introduces a small behavior change that for self-describing file formats like Parquet and Orc, Spark creates a metadata-only file in the target directory when writing a 0-partition dataframe, so that schema inference can still work if users read that directory later. The new behavior is more reasonable and more consistent regarding writing empty dataframe.
+
+  - Since Spark 2.4, expression IDs in UDF arguments do not appear in column names. For example, a column name in Spark 2.4 is not `UDF:f(col0 AS colA#28)` but ``UDF:f(col0 AS `colA`)``.
+
+  - Since Spark 2.4, writing a dataframe with an empty or nested empty schema using any file formats (parquet, orc, json, text, csv etc.) is not allowed. An exception is thrown when attempting to write dataframes with empty schema.
+
+  - Since Spark 2.4, Spark compares a DATE type with a TIMESTAMP type after promotes both sides to TIMESTAMP. To set `false` to `spark.sql.legacy.compareDateTimestampInTimestamp` restores the previous behavior. This option will be removed in Spark 3.0.
+
+  - Since Spark 2.4, creating a managed table with nonempty location is not allowed. An exception is thrown when attempting to create a managed table with nonempty location. To set `true` to `spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation` restores the previous behavior. This option will be removed in Spark 3.0.
+
+  - Since Spark 2.4, renaming a managed table to existing location is not allowed. An exception is thrown when attempting to rename a managed table to existing location.
+
+  - Since Spark 2.4, the type coercion rules can automatically promote the argument types of the variadic SQL functions (e.g., IN/COALESCE) to the widest common type, no matter how the input arguments order. In prior Spark versions, the promotion could fail in some specific orders (e.g., TimestampType, IntegerType and StringType) and throw an exception.
+
+  - Since Spark 2.4, Spark has enabled non-cascading SQL cache invalidation in addition to the traditional cache invalidation mechanism. The non-cascading cache invalidation mechanism allows users to remove a cache without impacting its dependent caches. This new cache invalidation mechanism is used in scenarios where the data of the cache to be removed is still valid, e.g., calling unpersist() on a Dataset, or dropping a temporary view. This allows users to free up memory and keep the desired caches valid at the same time.
+
+  - In version 2.3 and earlier, Spark converts Parquet Hive tables by default but ignores table properties like `TBLPROPERTIES (parquet.compression 'NONE')`. This happens for ORC Hive table properties like `TBLPROPERTIES (orc.compress 'NONE')` in case of `spark.sql.hive.convertMetastoreOrc=true`, too. Since Spark 2.4, Spark respects Parquet/ORC specific table properties while converting Parquet/ORC Hive tables. As an example, `CREATE TABLE t(id int) STORED AS PARQUET TBLPROPERTIES (parquet.compression 'NONE')` would generate Snappy parquet files during insertion in Spark 2.3, and in Spark 2.4, the result would be uncompressed parquet files.
+
+  - Since Spark 2.0, Spark converts Parquet Hive tables by default for better performance. Since Spark 2.4, Spark converts ORC Hive tables by default, too. It means Spark uses its own ORC support by default instead of Hive SerDe. As an example, `CREATE TABLE t(id int) STORED AS ORC` would be handled with Hive SerDe in Spark 2.3, and in Spark 2.4, it would be converted into Spark's ORC data source table and ORC vectorization would be applied. To set `false` to `spark.sql.hive.convertMetastoreOrc` restores the previous behavior.
+
+  - In version 2.3 and earlier, CSV rows are considered as malformed if at least one column value in the row is malformed. CSV parser dropped such rows in the DROPMALFORMED mode or outputs an error in the FAILFAST mode. Since Spark 2.4, CSV row is considered as malformed only when it contains malformed column values requested from CSV datasource, other values can be ignored. As an example, CSV file contains the "id,name" header and one row "1234". In Spark 2.4, selection of the id column consists of a row with one column value 1234 but in Spark 2.3 and earlier it is empty in the DROPMALFORMED mode. To restore the previous behavior, set `spark.sql.csv.parser.columnPruning.enabled` to `false`.
+
+  - Since Spark 2.4, File listing for compute statistics is done in parallel by default. This can be disabled by setting `spark.sql.statistics.parallelFileListingInStatsComputation.enabled` to `False`.
+
+  - Since Spark 2.4, Metadata files (e.g. Parquet summary files) and temporary files are not counted as data files when calculating table size during Statistics computation.
+
+  - Since Spark 2.4, empty strings are saved as quoted empty strings `""`. In version 2.3 and earlier, empty strings are equal to `null` values and do not reflect to any characters in saved CSV files. For example, the row of `"a", null, "", 1` was written as `a,,,1`. Since Spark 2.4, the same row is saved as `a,,"",1`. To restore the previous behavior, set the CSV option `emptyValue` to empty (not quoted) string.
+
+  - Since Spark 2.4, The LOAD DATA command supports wildcard `?` and `*`, which match any one character, and zero or more characters, respectively. Example: `LOAD DATA INPATH '/tmp/folder*/'` or `LOAD DATA INPATH '/tmp/part-?'`. Special Characters like `space` also now work in paths. Example: `LOAD DATA INPATH '/tmp/folder name/'`.
+
+  - In Spark version 2.3 and earlier, HAVING without GROUP BY is treated as WHERE. This means, `SELECT 1 FROM range(10) HAVING true` is executed as `SELECT 1 FROM range(10) WHERE true`  and returns 10 rows. This violates SQL standard, and has been fixed in Spark 2.4. Since Spark 2.4, HAVING without GROUP BY is treated as a global aggregate, which means `SELECT 1 FROM range(10) HAVING true` will return only one row. To restore the previous behavior, set `spark.sql.legacy.parser.havingWithoutGroupByAsWhere` to `true`.
+
+  - In version 2.3 and earlier, when reading from a Parquet data source table, Spark always returns null for any column whose column names in Hive metastore schema and Parquet schema are in different letter cases, no matter whether `spark.sql.caseSensitive` is set to `true` or `false`. Since 2.4, when `spark.sql.caseSensitive` is set to `false`, Spark does case insensitive column name resolution between Hive metastore schema and Parquet schema, so even column names are in different letter cases, Spark returns corresponding column values. An exception is thrown if there is ambiguity, i.e. more than one Parquet column is matched. This change also applies to Parquet Hive tables when `spark.sql.hive.convertMetastoreParquet` is set to `true`.
+
+## Upgrading from Spark SQL 2.2 to 2.3
+
+  - Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the referenced columns only include the internal corrupt record column (named `_corrupt_record` by default). For example, `spark.read.schema(schema).json(file).filter($"_corrupt_record".isNotNull).count()` and `spark.read.schema(schema).json(file).select("_corrupt_record").show()`. Instead, you can cache or save the parsed results and then send the same query. For example, `val df = spark.read.schema(schema).json(file).cache()` and then `df.filter($"_corrupt_record".isNotNull).count()`.
+
+  - The `percentile_approx` function previously accepted numeric type input and output double type results. Now it supports date type, timestamp type and numeric types as input types. The result type is also changed to be the same as the input type, which is more reasonable for percentiles.
+
+  - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown.
+
+  - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below:
+    <table class="table">
+      <tr>
+        <th>
+          <b>InputA \ InputB</b>
+        </th>
+        <th>
+          <b>NullType</b>
+        </th>
+        <th>
+          <b>IntegerType</b>
+        </th>
+        <th>
+          <b>LongType</b>
+        </th>
+        <th>
+          <b>DecimalType(38,0)*</b>
+        </th>
+        <th>
+          <b>DoubleType</b>
+        </th>
+        <th>
+          <b>DateType</b>
+        </th>
+        <th>
+          <b>TimestampType</b>
+        </th>
+        <th>
+          <b>StringType</b>
+        </th>
+      </tr>
+      <tr>
+        <td>
+          <b>NullType</b>
+        </td>
+        <td>NullType</td>
+        <td>IntegerType</td>
+        <td>LongType</td>
+        <td>DecimalType(38,0)</td>
+        <td>DoubleType</td>
+        <td>DateType</td>
+        <td>TimestampType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>IntegerType</b>
+        </td>
+        <td>IntegerType</td>
+        <td>IntegerType</td>
+        <td>LongType</td>
+        <td>DecimalType(38,0)</td>
+        <td>DoubleType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>LongType</b>
+        </td>
+        <td>LongType</td>
+        <td>LongType</td>
+        <td>LongType</td>
+        <td>DecimalType(38,0)</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>DecimalType(38,0)*</b>
+        </td>
+        <td>DecimalType(38,0)</td>
+        <td>DecimalType(38,0)</td>
+        <td>DecimalType(38,0)</td>
+        <td>DecimalType(38,0)</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>DoubleType</b>
+        </td>
+        <td>DoubleType</td>
+        <td>DoubleType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>DoubleType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>DateType</b>
+        </td>
+        <td>DateType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>DateType</td>
+        <td>TimestampType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>TimestampType</b>
+        </td>
+        <td>TimestampType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>TimestampType</td>
+        <td>TimestampType</td>
+        <td>StringType</td>
+      </tr>
+      <tr>
+        <td>
+          <b>StringType</b>
+        </td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+        <td>StringType</td>
+      </tr>
+    </table>
+
+    Note that, for <b>DecimalType(38,0)*</b>, the table above intentionally does not cover all other combinations of scales and precisions because currently we only infer decimal type like `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type.
+
+  - Since Spark 2.3, when either broadcast hash join or broadcast nested loop join is applicable, we prefer to broadcasting the table that is explicitly specified in a broadcast hint. For details, see the section [Broadcast Hint](sql-performance-tuning.html#broadcast-hint-for-sql-queries) and [SPARK-22489](https://issues.apache.org/jira/browse/SPARK-22489).
+
+  - Since Spark 2.3, when all inputs are binary, `functions.concat()` returns an output as binary. Otherwise, it returns as a string. Until Spark 2.3, it always returns as a string despite of input types. To keep the old behavior, set `spark.sql.function.concatBinaryAsString` to `true`.
+
+  - Since Spark 2.3, when all inputs are binary, SQL `elt()` returns an output as binary. Otherwise, it returns as a string. Until Spark 2.3, it always returns as a string despite of input types. To keep the old behavior, set `spark.sql.function.eltOutputAsString` to `true`.
+
+ - Since Spark 2.3, by default arithmetic operations between decimals return a rounded value if an exact representation is not possible (instead of returning NULL). This is compliant with SQL ANSI 2011 specification and Hive's new behavior introduced in Hive 2.2 (HIVE-15331). This involves the following changes
+
+    - The rules to determine the result type of an arithmetic operation have been updated. In particular, if the precision / scale needed are out of the range of available values, the scale is reduced up to 6, in order to prevent the truncation of the integer part of the decimals. All the arithmetic operations are affected by the change, ie. addition (`+`), subtraction (`-`), multiplication (`*`), division (`/`), remainder (`%`) and positive module (`pmod`).
+
+    - Literal values used in SQL operations are converted to DECIMAL with the exact precision and scale needed by them.
+
+    - The configuration `spark.sql.decimalOperations.allowPrecisionLoss` has been introduced. It defaults to `true`, which means the new behavior described here; if set to `false`, Spark uses previous rules, ie. it doesn't adjust the needed scale to represent the values and it returns NULL if an exact representation of the value is not possible.
+
+  - Un-aliased subquery's semantic has not been well defined with confusing behaviors. Since Spark 2.3, we invalidate such confusing cases, for example: `SELECT v.i from (SELECT i FROM v)`, Spark will throw an analysis exception in this case because users should not be able to use the qualifier inside a subquery. See [SPARK-20690](https://issues.apache.org/jira/browse/SPARK-20690) and [SPARK-21335](https://issues.apache.org/jira/browse/SPARK-21335) for more details.
+
+  - When creating a `SparkSession` with `SparkSession.builder.getOrCreate()`, if there is an existing `SparkContext`, the builder was trying to update the `SparkConf` of the existing `SparkContext` with configurations specified to the builder, but the `SparkContext` is shared by all `SparkSession`s, so we should not update them. Since 2.3, the builder comes to not update the configurations. If you want to update them, you need to update them prior to creating a `SparkSession`.
+
+## Upgrading from Spark SQL 2.1 to 2.2
+
+  - Spark 2.1.1 introduced a new configuration key: `spark.sql.hive.caseSensitiveInferenceMode`. It had a default setting of `NEVER_INFER`, which kept behavior identical to 2.1.0. However, Spark 2.2.0 changes this setting's default value to `INFER_AND_SAVE` to restore compatibility with reading Hive metastore tables whose underlying file schema have mixed-case column names. With the `INFER_AND_SAVE` configuration value, on first access Spark will perform schema inference on any Hive metastore table for which it has not already saved an inferred schema. Note that schema inference can be a very time-consuming operation for tables with thousands of partitions. If compatibility with mixed-case column names is not a concern, you can safely set `spark.sql.hive.caseSensitiveInferenceMode` to `NEVER_INFER` to avoid the initial overhead of schema inference. Note that with the new default `INFER_AND_SAVE` setting, the results of the schema inference are saved as a metastore key for future use. Therefore, the initial schema inference occurs only at a table's first access.
+
+  - Since Spark 2.2.1 and 2.3.0, the schema is always inferred at runtime when the data source tables have the columns that exist in both partition schema and data schema. The inferred schema does not have the partitioned columns. When reading the table, Spark respects the partition values of these overlapping columns instead of the values stored in the data source files. In 2.2.0 and 2.1.x release, the inferred schema is partitioned but the data of the table is invisible to users (i.e., the result set is empty).
+
+  - Since Spark 2.2, view definitions are stored in a different way from prior versions. This may cause Spark unable to read views created by prior versions. In such cases, you need to recreate the views using `ALTER VIEW AS` or `CREATE OR REPLACE VIEW AS` with newer Spark versions.
+
+## Upgrading from Spark SQL 2.0 to 2.1
+
+ - Datasource tables now store partition metadata in the Hive metastore. This means that Hive DDLs such as `ALTER TABLE PARTITION ... SET LOCATION` are now available for tables created with the Datasource API.
+
+    - Legacy datasource tables can be migrated to this format via the `MSCK REPAIR TABLE` command. Migrating legacy tables is recommended to take advantage of Hive DDL support and improved planning performance.
+
+    - To determine if a table has been migrated, look for the `PartitionProvider: Catalog` attribute when issuing `DESCRIBE FORMATTED` on the table.
+ - Changes to `INSERT OVERWRITE TABLE ... PARTITION ...` behavior for Datasource tables.
+
+    - In prior Spark versions `INSERT OVERWRITE` overwrote the entire Datasource table, even when given a partition specification. Now only partitions matching the specification are overwritten.
+
+    - Note that this still differs from the behavior of Hive tables, which is to overwrite only partitions overlapping with newly inserted data.
+
+## Upgrading from Spark SQL 1.6 to 2.0
+
+ - `SparkSession` is now the new entry point of Spark that replaces the old `SQLContext` and
+
+   `HiveContext`. Note that the old SQLContext and HiveContext are kept for backward compatibility. A new `catalog` interface is accessible from `SparkSession` - existing API on databases and tables access such as `listTables`, `createExternalTable`, `dropTempView`, `cacheTable` are moved here.
+
+ - Dataset API and DataFrame API are unified. In Scala, `DataFrame` becomes a type alias for
+   `Dataset[Row]`, while Java API users must replace `DataFrame` with `Dataset<Row>`. Both the typed
+   transformations (e.g., `map`, `filter`, and `groupByKey`) and untyped transformations (e.g.,
+   `select` and `groupBy`) are available on the Dataset class. Since compile-time type-safety in
+   Python and R is not a language feature, the concept of Dataset does not apply to these languages’
+   APIs. Instead, `DataFrame` remains the primary programming abstraction, which is analogous to the
+   single-node data frame notion in these languages.
+
+ - Dataset and DataFrame API `unionAll` has been deprecated and replaced by `union`
+
+ - Dataset and DataFrame API `explode` has been deprecated, alternatively, use `functions.explode()` with `select` or `flatMap`
+
+ - Dataset and DataFrame API `registerTempTable` has been deprecated and replaced by `createOrReplaceTempView`
+
+ - Changes to `CREATE TABLE ... LOCATION` behavior for Hive tables.
+
+    - From Spark 2.0, `CREATE TABLE ... LOCATION` is equivalent to `CREATE EXTERNAL TABLE ... LOCATION`
+      in order to prevent accidental dropping the existing data in the user-provided locations.
+      That means, a Hive table created in Spark SQL with the user-specified location is always a Hive external table.
+      Dropping external tables will not remove the data. Users are not allowed to specify the location for Hive managed tables.
+      Note that this is different from the Hive behavior.
+
+    - As a result, `DROP TABLE` statements on those tables will not remove the data.
+
+ - `spark.sql.parquet.cacheMetadata` is no longer used.
+   See [SPARK-13664](https://issues.apache.org/jira/browse/SPARK-13664) for details.
+
+## Upgrading from Spark SQL 1.5 to 1.6
+
+ - From Spark 1.6, by default, the Thrift server runs in multi-session mode. Which means each JDBC/ODBC
+   connection owns a copy of their own SQL configuration and temporary function registry. Cached
+   tables are still shared though. If you prefer to run the Thrift server in the old single-session
+   mode, please set option `spark.sql.hive.thriftServer.singleSession` to `true`. You may either add
+   this option to `spark-defaults.conf`, or pass it to `start-thriftserver.sh` via `--conf`:
+
+   {% highlight bash %}
+   ./sbin/start-thriftserver.sh \
+     --conf spark.sql.hive.thriftServer.singleSession=true \
+     ...
+   {% endhighlight %}
+
+ - From Spark 1.6, LongType casts to TimestampType expect seconds instead of microseconds. This
+   change was made to match the behavior of Hive 1.2 for more consistent type casting to TimestampType
+   from numeric types. See [SPARK-11724](https://issues.apache.org/jira/browse/SPARK-11724) for
+   details.
+
+## Upgrading from Spark SQL 1.4 to 1.5
+
+ - Optimized execution using manually managed memory (Tungsten) is now enabled by default, along with
+   code generation for expression evaluation. These features can both be disabled by setting
+   `spark.sql.tungsten.enabled` to `false`.
+
+ - Parquet schema merging is no longer enabled by default. It can be re-enabled by setting
+   `spark.sql.parquet.mergeSchema` to `true`.
+
+ - In-memory columnar storage partition pruning is on by default. It can be disabled by setting
+   `spark.sql.inMemoryColumnarStorage.partitionPruning` to `false`.
+
+ - Unlimited precision decimal columns are no longer supported, instead Spark SQL enforces a maximum
+   precision of 38. When inferring schema from `BigDecimal` objects, a precision of (38, 18) is now
+   used. When no precision is specified in DDL then the default remains `Decimal(10, 0)`.
+
+ - Timestamps are now stored at a precision of 1us, rather than 1ns
+
+ - In the `sql` dialect, floating point numbers are now parsed as decimal. HiveQL parsing remains
+   unchanged.
+
+ - The canonical name of SQL/DataFrame functions are now lower case (e.g., sum vs SUM).
+
+ - JSON data source will not automatically load new files that are created by other applications
+   (i.e. files that are not inserted to the dataset through Spark SQL).
+   For a JSON persistent table (i.e. the metadata of the table is stored in Hive Metastore),
+   users can use `REFRESH TABLE` SQL command or `HiveContext`'s `refreshTable` method
+   to include those new files to the table. For a DataFrame representing a JSON dataset, users need to recreate
+   the DataFrame and the new DataFrame will include new files.
+
+## Upgrading from Spark SQL 1.3 to 1.4
+
+#### DataFrame data reader/writer interface
+{:.no_toc}
+
+Based on user feedback, we created a new, more fluid API for reading data in (`SQLContext.read`)
+and writing data out (`DataFrame.write`),
+and deprecated the old APIs (e.g., `SQLContext.parquetFile`, `SQLContext.jsonFile`).
+
+See the API docs for `SQLContext.read` (
+  <a href="api/scala/index.html#org.apache.spark.sql.SQLContext@read:DataFrameReader">Scala</a>,
+  <a href="api/java/org/apache/spark/sql/SQLContext.html#read()">Java</a>,
+  <a href="api/python/pyspark.sql.html#pyspark.sql.SQLContext.read">Python</a>
+) and `DataFrame.write` (
+  <a href="api/scala/index.html#org.apache.spark.sql.DataFrame@write:DataFrameWriter">Scala</a>,
+  <a href="api/java/org/apache/spark/sql/Dataset.html#write()">Java</a>,
+  <a href="api/python/pyspark.sql.html#pyspark.sql.DataFrame.write">Python</a>
+) more information.
+
+
+#### DataFrame.groupBy retains grouping columns
+{:.no_toc}
+
+Based on user feedback, we changed the default behavior of `DataFrame.groupBy().agg()` to retain the
+grouping columns in the resulting `DataFrame`. To keep the behavior in 1.3, set `spark.sql.retainGroupColumns` to `false`.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+
+// In 1.3.x, in order for the grouping column "department" to show up,
+// it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg($"department", max("age"), sum("expense"))
+
+// In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(max("age"), sum("expense"))
+
+// Revert to 1.3 behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+
+// In 1.3.x, in order for the grouping column "department" to show up,
+// it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
+
+// In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(max("age"), sum("expense"));
+
+// Revert to 1.3 behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false");
+
+{% endhighlight %}
+</div>
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+
+import pyspark.sql.functions as func
+
+# In 1.3.x, in order for the grouping column "department" to show up,
+# it must be included explicitly as part of the agg function call.
+df.groupBy("department").agg(df["department"], func.max("age"), func.sum("expense"))
+
+# In 1.4+, grouping column "department" is included automatically.
+df.groupBy("department").agg(func.max("age"), func.sum("expense"))
+
+# Revert to 1.3.x behavior (not retaining grouping column) by:
+sqlContext.setConf("spark.sql.retainGroupColumns", "false")
+
+{% endhighlight %}
+</div>
+
+</div>
+
+
+#### Behavior change on DataFrame.withColumn
+{:.no_toc}
+
+Prior to 1.4, DataFrame.withColumn() supports adding a column only. The column will always be added
+as a new column with its specified name in the result DataFrame even if there may be any existing
+columns of the same name. Since 1.4, DataFrame.withColumn() supports adding a column of a different
+name from names of all existing columns or replacing existing columns of the same name.
+
+Note that this change is only for Scala API, not for PySpark and SparkR.
+
+
+## Upgrading from Spark SQL 1.0-1.2 to 1.3
+
+In Spark 1.3 we removed the "Alpha" label from Spark SQL and as part of this did a cleanup of the
+available APIs. From Spark 1.3 onwards, Spark SQL will provide binary compatibility with other
+releases in the 1.X series. This compatibility guarantee excludes APIs that are explicitly marked
+as unstable (i.e., DeveloperAPI or Experimental).
+
+#### Rename of SchemaRDD to DataFrame
+{:.no_toc}
+
+The largest change that users will notice when upgrading to Spark SQL 1.3 is that `SchemaRDD` has
+been renamed to `DataFrame`. This is primarily because DataFrames no longer inherit from RDD
+directly, but instead provide most of the functionality that RDDs provide though their own
+implementation. DataFrames can still be converted to RDDs by calling the `.rdd` method.
+
+In Scala, there is a type alias from `SchemaRDD` to `DataFrame` to provide source compatibility for
+some use cases. It is still recommended that users update their code to use `DataFrame` instead.
+Java and Python users will need to update their code.
+
+#### Unification of the Java and Scala APIs
+{:.no_toc}
+
+Prior to Spark 1.3 there were separate Java compatible classes (`JavaSQLContext` and `JavaSchemaRDD`)
+that mirrored the Scala API. In Spark 1.3 the Java API and Scala API have been unified. Users
+of either language should use `SQLContext` and `DataFrame`. In general these classes try to
+use types that are usable from both languages (i.e. `Array` instead of language-specific collections).
+In some cases where no common type exists (e.g., for passing in closures or Maps) function overloading
+is used instead.
+
+Additionally, the Java specific types API has been removed. Users of both Scala and Java should
+use the classes present in `org.apache.spark.sql.types` to describe schema programmatically.
+
+
+#### Isolation of Implicit Conversions and Removal of dsl Package (Scala-only)
+{:.no_toc}
+
+Many of the code examples prior to Spark 1.3 started with `import sqlContext._`, which brought
+all of the functions from sqlContext into scope. In Spark 1.3 we have isolated the implicit
+conversions for converting `RDD`s into `DataFrame`s into an object inside of the `SQLContext`.
+Users should now write `import sqlContext.implicits._`.
+
+Additionally, the implicit conversions now only augment RDDs that are composed of `Product`s (i.e.,
+case classes or tuples) with a method `toDF`, instead of applying automatically.
+
+When using function inside of the DSL (now replaced with the `DataFrame` API) users used to import
+`org.apache.spark.sql.catalyst.dsl`. Instead the public dataframe functions API should be used:
+`import org.apache.spark.sql.functions._`.
+
+#### Removal of the type aliases in org.apache.spark.sql for DataType (Scala-only)
+{:.no_toc}
+
+Spark 1.3 removes the type aliases that were present in the base sql package for `DataType`. Users
+should instead import the classes in `org.apache.spark.sql.types`
+
+#### UDF Registration Moved to `sqlContext.udf` (Java & Scala)
+{:.no_toc}
+
+Functions that are used to register UDFs, either for use in the DataFrame DSL or SQL, have been
+moved into the udf object in `SQLContext`.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+
+sqlContext.udf.register("strLen", (s: String) => s.length())
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+
+sqlContext.udf().register("strLen", (String s) -> s.length(), DataTypes.IntegerType);
+
+{% endhighlight %}
+</div>
+
+</div>
+
+Python UDF registration is unchanged.
+
+
+
+## Compatibility with Apache Hive
+
+Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.
+Currently, Hive SerDes and UDFs are based on Hive 1.2.1,
+and Spark SQL can be connected to different versions of Hive Metastore
+(from 0.12.0 to 2.3.6 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
+
+#### Deploying in Existing Hive Warehouses
+{:.no_toc}
+
+The Spark SQL Thrift JDBC server is designed to be "out of the box" compatible with existing Hive
+installations. You do not need to modify your existing Hive Metastore or change the data placement
+or partitioning of your tables.
+
+### Supported Hive Features
+{:.no_toc}
+
+Spark SQL supports the vast majority of Hive features, such as:
+
+* Hive query statements, including:
+  * `SELECT`
+  * `GROUP BY`
+  * `ORDER BY`
+  * `CLUSTER BY`
+  * `SORT BY`
+* All Hive operators, including:
+  * Relational operators (`=`, `⇔`, `==`, `<>`, `<`, `>`, `>=`, `<=`, etc)
+  * Arithmetic operators (`+`, `-`, `*`, `/`, `%`, etc)
+  * Logical operators (`AND`, `&&`, `OR`, `||`, etc)
+  * Complex type constructors
+  * Mathematical functions (`sign`, `ln`, `cos`, etc)
+  * String functions (`instr`, `length`, `printf`, etc)
+* User defined functions (UDF)
+* User defined aggregation functions (UDAF)
+* User defined serialization formats (SerDes)
+* Window functions
+* Joins
+  * `JOIN`
+  * `{LEFT|RIGHT|FULL} OUTER JOIN`
+  * `LEFT SEMI JOIN`
+  * `CROSS JOIN`
+* Unions
+* Sub-queries
+  * `SELECT col FROM ( SELECT a + b AS col from t1) t2`
+* Sampling
+* Explain
+* Partitioned tables including dynamic partition insertion
+* View
+  * If column aliases are not specified in view definition queries, both Spark and Hive will
+    generate alias names, but in different ways. In order for Spark to be able to read views created
+    by Hive, users should explicitly specify column aliases in view definition queries. As an
+    example, Spark cannot read `v1` created as below by Hive.
+
+    ```
+    CREATE VIEW v1 AS SELECT * FROM (SELECT c + 1 FROM (SELECT 1 c) t1) t2;
+    ```
+
+    Instead, you should create `v1` as below with column aliases explicitly specified.
+
+    ```
+    CREATE VIEW v1 AS SELECT * FROM (SELECT c + 1 AS inc_c FROM (SELECT 1 c) t1) t2;
+    ```
+
+* All Hive DDL Functions, including:
+  * `CREATE TABLE`
+  * `CREATE TABLE AS SELECT`
+  * `ALTER TABLE`
+* Most Hive Data types, including:
+  * `TINYINT`
+  * `SMALLINT`
+  * `INT`
+  * `BIGINT`
+  * `BOOLEAN`
+  * `FLOAT`
+  * `DOUBLE`
+  * `STRING`
+  * `BINARY`
+  * `TIMESTAMP`
+  * `DATE`
+  * `ARRAY<>`
+  * `MAP<>`
+  * `STRUCT<>`
+
+### Unsupported Hive Functionality
+{:.no_toc}
+
+Below is a list of Hive features that we don't support yet. Most of these features are rarely used
+in Hive deployments.
+
+**Major Hive Features**
+
+* Tables with buckets: bucket is the hash partitioning within a Hive table partition. Spark SQL
+  doesn't support buckets yet.
+
+
+**Esoteric Hive Features**
+
+* `UNION` type
+* Unique join
+* Column statistics collecting: Spark SQL does not piggyback scans to collect column statistics at
+  the moment and only supports populating the sizeInBytes field of the hive metastore.
+
+**Hive Input/Output Formats**
+
+* File format for CLI: For results showing back to the CLI, Spark SQL only supports TextOutputFormat.
+* Hadoop archive
+
+**Hive Optimizations**
+
+A handful of Hive optimizations are not yet included in Spark. Some of these (such as indexes) are
+less important due to Spark SQL's in-memory computational model. Others are slotted for future
+releases of Spark SQL.
+
+* Block-level bitmap indexes and virtual columns (used to build indexes)
+* Automatically determine the number of reducers for joins and groupbys: Currently, in Spark SQL, you
+  need to control the degree of parallelism post-shuffle using "`SET spark.sql.shuffle.partitions=[num_tasks];`".
+* Meta-data only query: For queries that can be answered by using only metadata, Spark SQL still
+  launches tasks to compute the result.
+* Skew data flag: Spark SQL does not follow the skew data flags in Hive.
+* `STREAMTABLE` hint in join: Spark SQL does not follow the `STREAMTABLE` hint.
+* Merge multiple small files for query results: if the result output contains multiple small files,
+  Hive can optionally merge the small files into fewer large files to avoid overflowing the HDFS
+  metadata. Spark SQL does not support that.
+
+**Hive UDF/UDTF/UDAF**
+
+Not all the APIs of the Hive UDF/UDTF/UDAF are supported by Spark SQL. Below are the unsupported APIs:
+
+* `getRequiredJars` and `getRequiredFiles` (`UDF` and `GenericUDF`) are functions to automatically
+  include additional resources required by this UDF.
+* `initialize(StructObjectInspector)` in `GenericUDTF` is not supported yet. Spark SQL currently uses
+  a deprecated interface `initialize(ObjectInspector[])` only.
+* `configure` (`GenericUDF`, `GenericUDTF`, and `GenericUDAFEvaluator`) is a function to initialize
+  functions with `MapredContext`, which is inapplicable to Spark.
+* `close` (`GenericUDF` and `GenericUDAFEvaluator`) is a function to release associated resources.
+  Spark SQL does not call this function when tasks finish.
+* `reset` (`GenericUDAFEvaluator`) is a function to re-initialize aggregation for reusing the same aggregation.
+  Spark SQL currently does not support the reuse of aggregation.
+* `getWindowingEvaluator` (`GenericUDAFEvaluator`) is a function to optimize aggregation by evaluating
+  an aggregate over a fixed window.
+
+### Incompatible Hive UDF
+{:.no_toc}
+
+Below are the scenarios in which Hive and Spark generate different results:
+
+* `SQRT(n)` If n < 0, Hive returns null, Spark SQL returns NaN.
+* `ACOS(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN.
+* `ASIN(n)` If n < -1 or n > 1, Hive returns null, Spark SQL returns NaN.
diff --git a/docs/mllib-migration-guides.md b/docs/sql-migration-old.md
similarity index 73%
rename from docs/mllib-migration-guides.md
rename to docs/sql-migration-old.md
index b746b96e19f07..e100820f6d664 100644
--- a/docs/mllib-migration-guides.md
+++ b/docs/sql-migration-old.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Old Migration Guides - MLlib
-displayTitle: Old Migration Guides - MLlib
+title: Migration Guide
+displayTitle: Migration Guide
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,6 +19,5 @@ license: |
   limitations under the License.
 ---
 
-The migration guide for the current Spark version is kept on the [MLlib Guide main page](ml-guide.html#migration-guide).
+The migration guide is now archived [on this page](sql-migration-guide.html).
 
-Past migration guides are now stored at [ml-migration-guides.html](ml-migration-guides.html).
diff --git a/docs/sql-ref-null-semantics.md b/docs/sql-ref-null-semantics.md
new file mode 100644
index 0000000000000..a67b3993a31c0
--- /dev/null
+++ b/docs/sql-ref-null-semantics.md
@@ -0,0 +1,703 @@
+---
+layout: global
+title: NULL Semantics
+displayTitle: NULL Semantics
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+A table consists of a set of rows and each row contains a set of columns.
+A column is associated with a data type and represents
+a specific attribute of an entity (for example, `age` is a column of an
+entity called `person`). Sometimes, the value of a column
+specific to a row is not known at the time the row comes into existence.
+In `SQL`, such values are represnted as `NULL`. This section details the
+semantics of `NULL` values handling in various operators, expressions and
+other `SQL` constructs.
+
+1. [Null handling in comparison operators](#comp-operators)
+2. [Null handling in Logical operators](#logical-operators)
+3. [Null handling in Expressions](#expressions)
+      1. [Null handling in null-in-tolerant expressions](#null-in-tolerant)
+      2. [Null handling Expressions that can process null value operands](#can-process-null)
+      3. [Null handling in built-in aggregate expressions](#built-in-aggregate)
+4. [Null handling in WHERE, HAVING and JOIN conditions](#condition-expressions)
+5. [Null handling in GROUP BY and DISTINCT](#aggregate-operator)
+6. [Null handling in ORDER BY](#order-by)
+7. [Null handling in UNION, INTERSECT, EXCEPT](#set-operators)
+8. [Null handling in EXISTS and NOT EXISTS subquery](#exists-not-exists)
+9. [Null handling in IN and NOT IN subquery](#in-not-in)
+
+<style type="text/css">
+.tsclass {font-size:12px;color:#333333;width:40%;border-width: 2px;border-color: #729ea5;border-collapse: collapse;}
+.tsclass th {text-align: left;}
+</style>
+
+The following illustrates the schema layout and data of a table named `person`. The data contains `NULL` values in
+the `age` column and this table will be used in various examples in the sections below.
+**<u>TABLE: person</u>**
+<table class="tsclass" border="1">
+<tr><th>Id</th><th>Name</th><th>Age</th></tr>
+<tr><td>100</td><td>Joe</td><td>30</td></tr>
+<tr><td>200</td><td>Marry</td><td>NULL</td></tr>
+<tr><td>300</td><td>Mike</td><td>18</td></tr>
+<tr><td>400</td><td>Fred</td><td>50</td></tr>
+<tr><td>500</td><td>Albert</td><td>NULL</td></tr>
+<tr><td>600</td><td>Michelle</td><td>30</td></tr>
+<tr><td>700</td><td>Dan</td><td>50</td></tr>
+</table>
+
+### Comparision operators <a name="comp-operators"></a>
+
+Apache spark supports the standard comparison operators such as '>', '>=', '=', '<' and '<='.
+The result of these operators is unknown or `NULL` when one of the operarands or both the operands are
+unknown or `NULL`. In order to compare the `NULL` values for equality, Spark provides a null-safe
+equal operator ('<=>'), which returns `False` when one of the operand is `NULL` and returns 'True` when
+both the operands are `NULL`. The following table illustrates the behaviour of comparison operators when
+one or both operands are `NULL`:
+
+<table class="tsclass" border="1">
+  <tr>
+    <th>Left Operand</th>
+    <th>Right  Operand</th>
+    <th>></th>
+    <th>>=</th>
+    <th>=</th>
+    <th><</th>
+    <th><=</th>
+    <th><=></th>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>Any value</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>False</td>
+  </tr>
+  <tr>
+    <td>Any value</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>False</td>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>True</td>
+  </tr>
+</table>
+
+### Examples
+{% highlight sql %}
+-- Normal comparison operators return `NULL` when one of the operand is `NULL`.
+SELECT 5 > null AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+-- Normal comparison operators return `NULL` when both the operands are `NULL`.
+SELECT null = null AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+-- Null-safe equal operator return `False` when one of the operand is `NULL`
+SELECT 5 <=> null AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |false            |
+  +-----------------+
+
+-- Null-safe equal operator return `True` when one of the operand is `NULL`
+SELECT NULL <=> NULL;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |true             |
+  +-----------------+
+{% endhighlight %}
+
+### Logical operators <a name="logical-operators"></a>
+Spark supports standard logical operators such as `AND`, `OR` and `NOT`. These operators take `Boolean` expressions
+as the arguments and return a `Boolean` value.  
+
+The following tables illustrate the behavior of logical opeators when one or both operands are `NULL`.
+
+<table class="tsclass" border="1">
+  <tr>
+    <th>Left Operand</th>
+    <th>Right Operand</th>
+    <th>OR</th>
+    <th>AND</th>
+  </tr>
+  <tr>
+    <td>True</td>
+    <td>NULL</td>
+    <td>True</td>
+    <td>NULL</td>
+  </tr>
+  <tr>
+    <td>False</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>False</td>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>True</td>
+    <td>True</td>
+    <td>NULL</td>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>False</td>
+    <td>NULL</td>
+    <td>NULL</td>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+    <td>NULL</td>
+  </tr>
+</table>
+<br>
+<table class="tsclass" border="1">
+  <tr>
+    <th>operand</th>
+    <th>NOT</th>
+  </tr>
+  <tr>
+    <td>NULL</td>
+    <td>NULL</td>
+  </tr>
+</table>
+
+### Examples
+{% highlight sql %}
+-- Normal comparison operators return `NULL` when one of the operands is `NULL`.
+SELECT (true OR null) AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |true             |
+  +-----------------+
+
+-- Normal comparison operators return `NULL` when both the operands are `NULL`.
+SELECT (null OR false) AS expression_output
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+-- Null-safe equal operator returns `False` when one of the operands is `NULL`
+SELECT NOT(null) AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+{% endhighlight %}
+
+### Expressions <a name="expressions"></a>
+The comparison operators and logical operators are treated as expressions in
+Spark. Other than these two kinds of expressions, Spark supports other form of
+expressions such as function expressions, cast expressions, etc. The expressions
+in Spark can be broadly classified as :
+- Null in-tolerent expressions
+- Expressions that can process `NULL` value operands
+  - The result of these expressions depends on the expression itself.
+
+#### Null in-tolerant expressions <a name="null-in-tolerant"></a>
+Null in-tolerant expressions return `NULL` when one or more arguments of 
+expression are `NULL` and most of the expressions fall in this category.
+
+##### Examples
+{% highlight sql %}
+SELECT concat('John', null) as expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+SELECT positive(null) as expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+SELECT to_date(null) as expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+{% endhighlight %}
+
+#### Expressions that can process null value operands. <a name="can-process-null"></a>
+
+This class of expressions are designed to handle `NULL` values. The result of the 
+expressions depends on the expression itself. As an example, function expression `isnull`
+returns a `true` on null input and `false` on non null input where as function `coalesce`
+returns the first non `NULL` value in its list of operands. However, `coalesce` returns
+`NULL` when all its operands are `NULL`. Below is an incomplete list of expressions of this category.
+  - COALESCE
+  - NULLIF
+  - IFNULL
+  - NVL
+  - NVL2
+  - ISNAN
+  - NANVL
+  - ISNULL
+  - ISNOTNULL
+  - ATLEASTNNONNULLS 
+  - IN
+
+
+##### Examples
+{% highlight sql %}
+SELECT isnull(null) AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |true             |
+  +-----------------+
+
+-- Returns the first occurence of non `NULL` value.
+SELECT coalesce(null, null, 3, null) AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |3                |
+  +-----------------+
+
+-- Returns `NULL` as all its operands are `NULL`. 
+SELECT coalesce(null, null, null, null) AS expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |null             |
+  +-----------------+
+
+SELECT isnan(null) as expression_output;
+  +-----------------+
+  |expression_output|
+  +-----------------+
+  |false            |
+  +-----------------+
+{% endhighlight %}
+
+#### Builtin Aggregate Expressions <a name="built-in-aggregate"></a>
+Aggregate functions compute a single result by processing a set of input rows. Below are
+the rules of how `NULL` values are handled by aggregate functions.
+- `NULL` values are ignored from processing by all the aggregate functions.
+  - Only exception to this rule is COUNT(*) function.
+- Some aggregate functions return `NULL` when all input values are `NULL` or the input data set
+  is empty.<br> The list of these functions is:
+  - MAX
+  - MIN
+  - SUM
+  - AVG
+  - EVERY
+  - ANY
+  - SOME
+   
+#### Examples
+{% highlight sql %}
+-- `count(*)` does not skip `NULL` values.
+SELECT count(*) FROM person;
+  +--------+
+  |count(1)|
+  +--------+
+  |7       |
+  +--------+
+
+-- `NULL` values in column `age` are skipped from processing.
+SELECT count(age) FROM person;
+  +----------+
+  |count(age)|
+  +----------+
+  |5         |
+  +----------+
+
+-- `count(*)` on an empty input set returns 0. This is unlike the other
+-- aggregate functions, such as `max`, which return `NULL`.
+SELECT count(*) FROM person where 1 = 0;
+  +--------+
+  |count(1)|
+  +--------+
+  |0       |
+  +--------+
+
+-- `NULL` values are excluded from computation of maximum value.
+SELECT max(age) FROM person;
+  +--------+
+  |max(age)|
+  +--------+
+  |50      |
+  +--------+
+
+-- `max` returns `NULL` on an empty input set.
+SELECT max(age) FROM person where 1 = 0;
+  +--------+
+  |max(age)|
+  +--------+
+  |null    |
+  +--------+
+
+{% endhighlight %}
+
+### Condition expressions in WHERE, HAVING and JOIN clauses. <a name="condition-expressions"></a>
+`WHERE`, `HAVING` operators filter rows based on the user specified condition.
+A `JOIN` operator is used to combine rows from two tables based on a join condition.
+For all the three operators, a condition expression is a boolean expression and can return
+ <code>True, False or Unknown (NULL)</code>. They are "satisfied" if the result of the condition is `True`.
+
+#### Examples
+{% highlight sql %}
+-- Persons whose age is unknown (`NULL`) are filtered out from the result set.
+SELECT * FROM person WHERE age > 0;
+  +--------+---+
+  |name    |age|
+  +--------+---+
+  |Michelle|30 |
+  |Fred    |50 |
+  |Mike    |18 |
+  |Dan     |50 |
+  |Joe     |30 |
+  +--------+---+
+
+-- `IS NULL` expression is used in disjunction to select the persons
+-- with unknown (`NULL`) records.
+SELECT * FROM person WHERE age > 0 OR age IS NULL;
+  +--------+----+
+  |name    |age |
+  +--------+----+
+  |Albert  |null|
+  |Michelle|30  |
+  |Fred    |50  |
+  |Mike    |18  |
+  |Dan     |50  |
+  |Marry   |null|
+  |Joe     |30  |
+  +--------+----+
+
+-- Person with unknown(`NULL`) ages are skipped from processing.
+SELECT * FROM person GROUP BY age HAVING max(age) > 18;
+  +---+--------+                                                                  
+  |age|count(1)|
+  +---+--------+
+  |50 |2       |
+  |30 |2       |
+  +---+--------+
+
+-- A self join case with a join condition `p1.age = p2.age AND p1.name = p2.name`.
+-- The persons with unknown age (`NULL`) are filtered out by the join operator.
+SELECT * FROM person p1, person p2
+WHERE p1.age = p2.age
+  AND p1.name = p2.name;
+  +--------+---+--------+---+
+  |name    |age|name    |age|
+  +--------+---+--------+---+
+  |Michelle|30 |Michelle|30 |
+  |Fred    |50 |Fred    |50 |
+  |Mike    |18 |Mike    |18 |
+  |Dan     |50 |Dan     |50 |
+  |Joe     |30 |Joe     |30 |
+  +--------+---+--------+---+
+
+-- The age column from both legs of join are compared using null-safe equal which
+-- is why the persons with unknown age (`NULL`) are qualified by the join.
+SELECT * FROM person p1, person p2
+WHERE p1.age <=> p2.age
+  AND p1.name = p2.name;
++--------+----+--------+----+
+|    name| age|    name| age|
++--------+----+--------+----+
+|  Albert|null|  Albert|null|
+|Michelle|  30|Michelle|  30|
+|    Fred|  50|    Fred|  50|
+|    Mike|  18|    Mike|  18|
+|     Dan|  50|     Dan|  50|
+|   Marry|null|   Marry|null|
+|     Joe|  30|     Joe|  30|
++--------+----+--------+----+
+
+{% endhighlight %}
+
+### Aggregate operator (GROUP BY, DISTINCT) <a name="aggregate-operator"></a>
+As discussed in the previous section [comparison operator](sql-ref-null-semantics.html#comparision-operators),
+two `NULL` values are not equal. However, for the purpose of grouping and distinct processing, the two or more
+values with `NULL data`are grouped together into the same bucket. This behaviour is conformant with SQL
+standard and with other enterprise database management systems.
+
+#### Examples
+{% highlight sql %}
+-- `NULL` values are put in one bucket in `GROUP BY` processing.
+SELECT age, count(*) FROM person GROUP BY age;
+  +----+--------+                                                                 
+  |age |count(1)|
+  +----+--------+
+  |null|2       |
+  |50  |2       |
+  |30  |2       |
+  |18  |1       |
+  +----+--------+
+
+-- All `NULL` ages are considered one distinct value in `DISTINCT` processing.
+SELECT DISTINCT age FROM person;
+  +----+
+  |age |
+  +----+
+  |null|
+  |50  |
+  |30  |
+  |18  |
+  +----+
+
+{% endhighlight %}
+
+### Sort operator (ORDER BY Clause) <a name="order-by"></a>
+Spark SQL supports null ordering specification in `ORDER BY` clause. Spark processes the `ORDER BY` clause by
+placing all the `NULL` values at first or at last depending on the null ordering specification. By default, all
+the `NULL` values are placed at first.
+
+#### Examples
+{% highlight sql %}
+-- `NULL` values are shown at first and other values
+-- are sorted in ascending way.
+SELECT age, name FROM person ORDER BY age;
+  +----+--------+
+  |age |name    |
+  +----+--------+
+  |null|Marry   |
+  |null|Albert  |
+  |18  |Mike    |
+  |30  |Michelle|
+  |30  |Joe     |
+  |50  |Fred    |
+  |50  |Dan     |
+  +----+--------+
+
+-- Column values other than `NULL` are sorted in ascending
+-- way and `NULL` values are shown at the last.
+SELECT age, name FROM person ORDER BY age NULLS LAST;
+  +----+--------+
+  |age |name    |
+  +----+--------+
+  |18  |Mike    |
+  |30  |Michelle|
+  |30  |Joe     |
+  |50  |Dan     |
+  |50  |Fred    |
+  |null|Marry   |
+  |null|Albert  |
+  +----+--------+
+
+-- Columns other than `NULL` values are sorted in descending
+-- and `NULL` values are shown at the last.
+SELECT age, name FROM person ORDER BY age DESC NULLS LAST;
+  +----+--------+
+  |age |name    |
+  +----+--------+
+  |50  |Fred    |
+  |50  |Dan     |
+  |30  |Michelle|
+  |30  |Joe     |
+  |18  |Mike    |
+  |null|Marry   |
+  |null|Albert  |
+  +----+--------+
+{% endhighlight %}
+
+### Set operators (UNION, INTERSECT, EXCEPT) <a name="set-operators"></a>
+`NULL` values are compared in a null-safe manner for equality in the context of
+set operations. That means when comparing rows, two `NULL` values are considered 
+equal unlike the regular `EqualTo`(`=`) operator.
+
+#### Examples
+{% highlight sql %}
+CREATE VIEW unknown_age SELECT * FROM person WHERE age IS NULL;
+
+-- Only common rows between two legs of `INTERSECT` are in the 
+-- result set. The comparison between columns of the row are done
+-- in a null-safe manner.
+SELECT name, age FROM person
+INTERSECT
+SELECT name, age from unknown_age;
+  +------+----+                                                                   
+  |name  |age |
+  +------+----+
+  |Albert|null|
+  |Marry |null|
+  +------+----+
+
+-- `NULL` values from two legs of the `EXCEPT` are not in output. 
+-- This basically shows that the comparison happens in a null-safe manner.
+SELECT age, name FROM person
+EXCEPT
+SELECT age FROM unknown_age;
+  +---+--------+                                                                  
+  |age|name    |
+  +---+--------+
+  |30 |Joe     |
+  |50 |Fred    |
+  |30 |Michelle|
+  |18 |Mike    |
+  |50 |Dan     |
+  +---+--------+
+
+-- Performs `UNION` operation between two sets of data. 
+-- The comparison between columns of the row ae done in
+-- null-safe manner.
+SELECT name, age FROM person
+UNION 
+SELECT name, age FROM unknown_age;
+  +--------+----+                                                                 
+  |name    |age |
+  +--------+----+
+  |Albert  |null|
+  |Joe     |30  |
+  |Michelle|30  |
+  |Marry   |null|
+  |Fred    |50  |
+  |Mike    |18  |
+  |Dan     |50  |
+  +--------+----+
+{% endhighlight %}
+
+
+### EXISTS/NOT EXISTS Subquery <a name="exists-not-exists"></a>
+In Spark, EXISTS and NOT EXISTS expressions are allowed inside a WHERE clause. 
+These are boolean expressions which return either `TRUE` or
+`FALSE`. In otherwords, EXISTS is a membership condition and returns `TRUE`
+when the subquery it refers to returns one or more rows. Similary, NOT EXISTS
+is a non-membership condition and returns TRUE when no rows or zero rows are
+returned from the subquery.
+
+These two expressions are not affected by presence of NULL in the result of
+the subquery. 
+
+#### Examples
+{% highlight sql %}
+-- Even if subquery produces rows with `NULL` values, the `EXISTS` expression
+-- evaluates to `TRUE` as the subquery produces 1 row.
+SELECT * FROM person WHERE EXISTS (SELECT null);
+  +--------+----+                                                                 
+  |name    |age |
+  +--------+----+
+  |Albert  |null|
+  |Michelle|30  |
+  |Fred    |50  |
+  |Mike    |18  |
+  |Dan     |50  |
+  |Marry   |null|
+  |Joe     |30  |
+  +--------+----+
+
+-- `NOT EXISTS` expression returns `FALSE`. It returns `TRUE` only when
+-- subquery produces no rows. In this case, it returns 1 row.
+SELECT * FROM person WHERE NOT EXISTS (SELECT null);
+  +----+---+
+  |name|age|
+  +----+---+
+  +----+---+
+
+-- `NOT EXISTS` expression returns `TRUE`.
+SELECT * FROM person WHERE NOT EXISTS (SELECT 1 WHERE 1 = 0);
+  +--------+----+
+  |name    |age |
+  +--------+----+
+  |Albert  |null|
+  |Michelle|30  |
+  |Fred    |50  |
+  |Mike    |18  |
+  |Dan     |50  |
+  |Marry   |null|
+  |Joe     |30  |
+  +--------+----+
+{% endhighlight %}
+
+### IN/NOT IN Subquery <a name="in-not-in"></a>
+In Spark, `IN` and `NOT IN` expressions are allowed inside a WHERE clause of
+a query. Unlike the `EXISTS` expression, `IN` expression can return a `TRUE`,
+`FALSE` or `UNKNOWN (NULL)` value. Conceptually a `IN` expression is semantically
+equivalent to a set of equality condition separated by a disjunctive operator (`OR`).
+For example, c1 IN (1, 2, 3) is semantically equivalent to `(C1 = 1 OR c1 = 2 OR c1 = 3)`.
+
+As far as handling `NULL` values are concerned, the semantics can be deduced from
+the `NULL` value handling in comparison operators(`=`) and logical operators(`OR`).
+To summarize, below are the rules for computing the result of an `IN` expression.
+
+- TRUE is returned when the non-NULL value in question is found in the list
+- FALSE is returned when the non-NULL value is not found in the list and the
+  list does not contain NULL values
+- UNKNOWN is returned when the value is `NULL`, or the non-NULL value is not found in the list
+  and the list contains at least one `NULL` value
+ 
+#### Examples
+{% highlight sql %}
+-- The subquery has only `NULL` value in its result set. Therefore,
+-- the result of `IN` predicate is UNKNOWN.
+SELECT * FROM person WHERE age IN (SELECT null);
+  +----+---+
+  |name|age|
+  +----+---+
+  +----+---+
+
+-- The subquery has `NULL` value in the result set as well as a valid 
+-- value `50`. Rows with age = 50 are returned. 
+SELECT * FROM person
+WHERE age IN (SELECT age FROM VALUES (50), (null) sub(age));
+  +----+---+
+  |name|age|
+  +----+---+
+  |Fred|50 |
+  |Dan |50 |
+  +----+---+
+
+-- Since subquery has `NULL` value in the result set, the `NOT IN`
+-- predicate would return UNKNOWN. Hence, no rows are
+-- qualified for this query.
+SELECT * FROM person
+WHERE age NOT IN (SELECT age FROM VALUES (50), (null) sub(age));
+  +----+---+
+  |name|age|
+  +----+---+
+  +----+---+
+
+{% endhighlight %}
diff --git a/docs/sql-ref-syntax-aux-cache-clear-cache.md b/docs/sql-ref-syntax-aux-cache-clear-cache.md
index 88d126f0f528e..d8e451a230a71 100644
--- a/docs/sql-ref-syntax-aux-cache-clear-cache.md
+++ b/docs/sql-ref-syntax-aux-cache-clear-cache.md
@@ -19,4 +19,20 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`CLEAR CACHE` removes the entries and associated data from the in-memory and/or on-disk cache for all cached tables and views.
+
+### Syntax
+{% highlight sql %}
+CLEAR CACHE
+{% endhighlight %}
+
+### Examples
+{% highlight sql %}
+CLEAR CACHE;
+{% endhighlight %}
+
+### Related Statements
+ * [CACHE TABLE](sql-ref-syntax-aux-cache-cache-table.html)
+ * [UNCACHE TABLE](sql-ref-syntax-aux-cache-uncache-table.html)
+
diff --git a/docs/sql-ref-syntax-aux-cache-uncache-table.md b/docs/sql-ref-syntax-aux-cache-uncache-table.md
index 69819fee088da..a6cb4d6807b22 100644
--- a/docs/sql-ref-syntax-aux-cache-uncache-table.md
+++ b/docs/sql-ref-syntax-aux-cache-uncache-table.md
@@ -20,7 +20,7 @@ license: |
 ---
 
 ### Description
-`UNCACHE TABLE` removes the entries and associated data from the in-memory and/or on-disk cache for a given table. The
+`UNCACHE TABLE` removes the entries and associated data from the in-memory and/or on-disk cache for a given table or view. The
 underlying entries should already have been brought to cache by previous `CACHE TABLE` operation. `UNCACHE TABLE` on a non-existent table throws Exception if `IF EXISTS` is not specified.
 ### Syntax
 {% highlight sql %}
@@ -29,7 +29,7 @@ UNCACHE TABLE [ IF EXISTS ] table_name
 ### Parameters
 <dl>
  <dt><code><em>table_name</em></code></dt>
- <dd>The name of the table to be uncached.</dd>
+ <dd>The name of the table or view to be uncached.</dd>
 </dl>
 ### Examples
 {% highlight sql %}
diff --git a/docs/sql-ref-syntax-aux-cache.md b/docs/sql-ref-syntax-aux-cache.md
index eb0e73d00e848..c3dcb276a7e0f 100644
--- a/docs/sql-ref-syntax-aux-cache.md
+++ b/docs/sql-ref-syntax-aux-cache.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: Reference
-displayTitle: Reference
+title: Cache
+displayTitle: Cache
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -9,9 +9,9 @@ license: |
   The ASF licenses this file to You under the Apache License, Version 2.0
   (the "License"); you may not use this file except in compliance with
   the License.  You may obtain a copy of the License at
- 
+
      http://www.apache.org/licenses/LICENSE-2.0
- 
+
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -19,7 +19,6 @@ license: |
   limitations under the License.
 ---
 
-Spark SQL is a Apache Spark's module for working with structured data.
-This guide is a reference for Structured Query Language (SQL) for Apache 
-Spark. This document describes the SQL constructs supported by Spark in detail
-along with usage examples when applicable.
+* [CACHE TABLE statement](sql-ref-syntax-aux-cache-cache-table.html)
+* [UNCACHE TABLE statement](sql-ref-syntax-aux-cache-uncache-table.html)
+* [CLEAR CACHE statement](sql-ref-syntax-aux-cache-clear-cache.html)
diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-reset.md b/docs/sql-ref-syntax-aux-conf-mgmt-reset.md
index ad2d7f9a83316..8ee61514ee4ef 100644
--- a/docs/sql-ref-syntax-aux-conf-mgmt-reset.md
+++ b/docs/sql-ref-syntax-aux-conf-mgmt-reset.md
@@ -19,4 +19,20 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+Reset all the properties specific to the current session to their default values. After RESET command, executing SET command will output empty.
+
+### Syntax
+{% highlight sql %}
+RESET
+{% endhighlight %}
+
+
+### Examples
+{% highlight sql %}
+-- Reset all the properties specific to the current session to their default values.
+RESET;
+{% endhighlight %}
+
+### Related Statements
+- [SET](sql-ref-syntax-aux-conf-mgmt-set.html)
diff --git a/docs/sql-ref-syntax-aux-conf-mgmt-set.md b/docs/sql-ref-syntax-aux-conf-mgmt-set.md
index c38d68dbb4f1d..f05dde3f567ee 100644
--- a/docs/sql-ref-syntax-aux-conf-mgmt-set.md
+++ b/docs/sql-ref-syntax-aux-conf-mgmt-set.md
@@ -19,4 +19,51 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+The SET command sets a property, returns the value of an existing property or returns all SQLConf properties with value and meaning.
+
+### Syntax
+{% highlight sql %}
+SET
+SET [ -v ]
+SET property_key[ = property_value ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>-v</em></code></dt>
+  <dd>Outputs the key, value and meaning of existing SQLConf properties.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>property_key</em></code></dt>
+  <dd>Returns the value of specified property key.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>property_key=property_value</em></code></dt>
+  <dd>Sets the value for a given property key. If an old value exists for a given property key, then it gets overridden by the new value.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+-- Set a property.
+SET  spark.sql.variable.substitute=false;
+
+-- List all SQLConf properties with value and meaning.
+SET -v;
+
+-- List all SQLConf properties with value for current session.
+SET;
+
+-- List the value of specified property key.
+SET  spark.sql.variable.substitute;
+    +--------------------------------+--------+
+    |              key               | value  |
+    +--------------------------------+--------+
+    | spark.sql.variable.substitute  | false  |
+    +--------------------------------+--------+
+{% endhighlight %}
+
+### Related Statements
+- [RESET](sql-ref-syntax-aux-conf-mgmt-reset.html)
diff --git a/docs/sql-ref-syntax-aux-refresh-table.md b/docs/sql-ref-syntax-aux-refresh-table.md
new file mode 100644
index 0000000000000..262382a467073
--- /dev/null
+++ b/docs/sql-ref-syntax-aux-refresh-table.md
@@ -0,0 +1,58 @@
+---
+layout: global
+title: REFRESH TABLE
+displayTitle: REFRESH TABLE
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+`REFRESH TABLE` statement invalidates the cached entries, which include data
+and metadata of the given table or view. The invalidated cache is populated in
+lazy manner when the cached table or the query associated with it is executed again.
+
+### Syntax
+{% highlight sql %}
+REFRESH [TABLE] tableIdentifier
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>tableIdentifier</em></code></dt>
+  <dd>
+    Specifies a table name, which is either a qualified or unqualified name that designates a table/view. If no database identifier is provided, it refers to a temporary view or a table/view in the current database.<br><br>
+    <b>Syntax:</b>
+      <code>
+        [database_name.]table_name
+      </code>
+  </dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+-- The cached entries of the table will be refreshed  
+-- The table is resolved from the current database as the table name is unqualified.
+REFRESH TABLE tbl1;
+
+-- The cached entries of the view will be refreshed or invalidated
+-- The view is resolved from tempDB database, as the view name is qualified.
+REFRESH TABLE tempDB.view1;   
+{% endhighlight %}
+
+### Related Statements
+- [CACHE TABLE](sql-ref-syntax-aux-cache-cache-table.html)
+- [CLEAR CACHE](sql-ref-syntax-aux-cache-clear-cache.html)
+- [UNCACHE TABLE](sql-ref-syntax-aux-cache-uncache-table.html)
\ No newline at end of file
diff --git a/docs/sql-ref-syntax-aux-show-create-table.md b/docs/sql-ref-syntax-aux-show-create-table.md
index 2cf40915774c4..7871d30b5b186 100644
--- a/docs/sql-ref-syntax-aux-show-create-table.md
+++ b/docs/sql-ref-syntax-aux-show-create-table.md
@@ -19,4 +19,46 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+`SHOW CREATE TABLE` returns the [CREATE TABLE statement](sql-ref-syntax-ddl-create-table.html) or [CREATE VIEW statement](sql-ref-syntax-ddl-create-view.html) that was used to create a given table or view. `SHOW CREATE TABLE` on a non-existent table or a temporary view throws an exception.
+
+### Syntax
+{% highlight sql %}
+SHOW CREATE TABLE name
+{% endhighlight %}
+
+### Parameters
+<dl>
+ <dt><code><em>name</em></code></dt>
+ <dd>The name of the table or view to be used for SHOW CREATE TABLE.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+CREATE TABLE test (c INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+    STORED AS TEXTFILE
+    TBLPROPERTIES ('prop1' = 'value1', 'prop2' = 'value2');
+
+show create table test;
+
+-- the result of SHOW CREATE TABLE test
+CREATE TABLE `test`(`c` INT)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+WITH SERDEPROPERTIES (
+  'field.delim' = ',',
+  'serialization.format' = ','
+)
+STORED AS
+  INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
+  OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+TBLPROPERTIES (
+  'transient_lastDdlTime' = '1569350233',
+  'prop1' = 'value1',
+  'prop2' = 'value2'
+)
+
+{% endhighlight %}
+
+### Related Statements
+ * [CREATE TABLE](sql-ref-syntax-ddl-create-table.html)
+ * [CREATE VIEW](sql-ref-syntax-ddl-create-view.html)
diff --git a/docs/sql-ref-syntax-ddl-create-database.md b/docs/sql-ref-syntax-ddl-create-database.md
index bbcd34a6d6853..ed0bbf629b027 100644
--- a/docs/sql-ref-syntax-ddl-create-database.md
+++ b/docs/sql-ref-syntax-ddl-create-database.md
@@ -19,4 +19,61 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+Creates a database with the specified name. If database with the same name already exists, an exception will be thrown.
+
+### Syntax
+{% highlight sql %}
+CREATE {DATABASE | SCHEMA} [ IF NOT EXISTS ] database_name
+  [ COMMENT database_comment ]
+  [ LOCATION database_directory ]
+  [ WITH DBPROPERTIES (property_name=property_value [ , ...]) ]
+{% endhighlight %}
+
+### Parameters
+<dl>
+    <dt><code><em>database_name</em></code></dt>
+    <dd>Specifies the name of the database to be created.</dd>
+
+    <dt><code><em>IF NOT EXISTS</em></code></dt>
+    <dd>Creates a database with the given name if it doesn't exists. If a database with the same name already exists, nothing will happen.</dd>
+
+    <dt><code><em>database_directory</em></code></dt>
+    <dd>Path of the file system in which the specified database is to be created. If the specified path does not exist in the underlying file system, this command creates a directory with the path. If the location is not specified, the database will be created in the default warehouse directory, whose path is configured by the static configuration spark.sql.warehouse.dir.</dd>
+
+    <dt><code><em>database_comment</em></code></dt>
+    <dd>Specifies the description for the database.</dd>
+
+    <dt><code><em>WITH DBPROPERTIES (property_name=property_value [ , ...])</em></code></dt>
+    <dd>Specifies the properties for the database in key-value pairs.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+-- Create database `customer_db`. This throws exception if database with name customer_db
+-- already exists.
+CREATE DATABASE customer_db;
+
+-- Create database `customer_db` only if database with same name doesn't exist.
+CREATE DATABASE IF NOT EXISTS customer_db;
+
+-- Create database `customer_db` only if database with same name doesn't exist with 
+-- `Comments`,`Specific Location` and `Database properties`.
+CREATE DATABASE IF NOT EXISTS customer_db COMMENT 'This is customer database' LOCATION '/user'
+ WITH DBPROPERTIES (ID=001, Name='John');
+
+-- Verify that properties are set.
+DESCRIBE DATABASE EXTENDED customer_db;
+   +----------------------------+-----------------------------+
+   | database_description_item  | database_description_value  |
+   +----------------------------+-----------------------------+
+   | Database Name              | customer_db                 |
+   | Description                | This is customer database   |
+   | Location                   | hdfs://hacluster/user       |
+   | Properties                 | ((ID,001), (Name,John))     |
+   +----------------------------+-----------------------------+
+{% endhighlight %}
+
+### Related Statements
+- [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html)
+- [DROP DATABASE](sql-ref-syntax-ddl-drop-database.html)
diff --git a/docs/sql-ref-syntax-ddl-create-view.md b/docs/sql-ref-syntax-ddl-create-view.md
index eff7df91f59c5..c7ca28ea5b62f 100644
--- a/docs/sql-ref-syntax-ddl-create-view.md
+++ b/docs/sql-ref-syntax-ddl-create-view.md
@@ -19,4 +19,64 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+Views are based on the result-set of an `SQL` query. `CREATE VIEW` constructs
+a virtual table that has no physical data therefore other operations like
+`ALTER VIEW` and `DROP VIEW` only change metadata. 
+
+### Syntax
+{% highlight sql %}
+CREATE [OR REPLACE] [[GLOBAL] TEMPORARY] VIEW [IF NOT EXISTS] [db_name.]view_name
+    create_view_clauses
+    AS query;
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>OR REPLACE</em></code></dt>
+  <dd>If a view of same name already exists, it will be replaced.</dd>
+</dl>
+<dl>
+  <dt><code><em>[GLOBAL] TEMPORARY</em></code></dt>
+  <dd>TEMPORARY views are session-scoped and will be dropped when session ends 
+      because it skips persisting the definition in the underlying metastore, if any.
+      GLOBAL TEMPORARY views are tied to a system preserved temporary database `global_temp`.</dd>
+</dl>
+<dl>
+  <dt><code><em>IF NOT EXISTS</em></code></dt>
+  <dd>Creates a view if it does not exists.</dd>
+</dl>
+<dl>
+  <dt><code><em>create_view_clauses</em></code></dt>
+  <dd>These clauses are optional and order insensitive. It can be of following formats.
+    <ul>
+      <li><code>[(column_name [COMMENT column_comment], ...) ]</code> to specify column-level comments.</li>
+      <li><code>[COMMENT view_comment]</code> to specify view-level comments.</li>
+      <li><code>[TBLPROPERTIES (property_name = property_value, ...)]</code> to add metadata key-value pairs.</li>
+    </ul>  
+  </dd>
+</dl>
+<dl>
+  <dt><code><em>query</em></code></dt>
+  <dd>A <a href="sql-ref-syntax-qry-select.md">SELECT</a> statement that constructs the view from base tables or other views.</dd>
+</dl>
+
+### Examples
+{% highlight sql %}
+-- Create or replace view for `experienced_employee` with comments.
+CREATE OR REPLACE VIEW experienced_employee
+    (ID COMMENT 'Unique identification number', Name) 
+    COMMENT 'View for experienced employees'
+    AS SELECT id, name FROM all_employee
+        WHERE working_years > 5;
+
+-- Create a global temporary view `subscribed_movies` if it does not exist.
+CREATE GLOBAL TEMPORARY VIEW IF NOT EXISTS subscribed_movies 
+    AS SELECT mo.member_id, mb.full_name, mo.movie_title
+        FROM movies AS mo INNER JOIN members AS mb 
+        ON mo.member_id = mb.id;
+{% endhighlight %}
+
+### Related Statements
+- [ALTER VIEW](sql-ref-syntax-ddl-alter-view.md)
+- [DROP VIEW](sql-ref-syntax-ddl-drop-view.md)
diff --git a/docs/sql-ref-syntax-ddl-drop-database.md b/docs/sql-ref-syntax-ddl-drop-database.md
index cd900a7e393db..f3cdbf91a8d2a 100644
--- a/docs/sql-ref-syntax-ddl-drop-database.md
+++ b/docs/sql-ref-syntax-ddl-drop-database.md
@@ -19,4 +19,62 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+
+Drop a database and delete the directory associated with the database from the file system. An 
+exception will be thrown if the database does not exist in the system. 
+
+### Syntax
+
+{% highlight sql %}
+DROP (DATABASE|SCHEMA) [IF EXISTS] dbname [RESTRICT|CASCADE];
+{% endhighlight %}
+
+
+### Parameters
+
+<dl>
+  <dt><code><em>DATABASE|SCHEMA</em></code></dt>
+  <dd>`DATABASE` and `SCHEMA` mean the same thing, either of them can be used.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>IF EXISTS</em></code></dt>
+  <dd>If specified, no exception is thrown when the database does not exist.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>RESTRICT</em></code></dt>
+  <dd>If specified, will restrict dropping a non-empty database and is enabled by default.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>CASCADE</em></code></dt>
+  <dd>If specified, will drop all the associated tables and functions.</dd>
+</dl>
+
+### Example
+{% highlight sql %}
+-- Create `inventory_db` Database
+CREATE DATABASE inventory_db COMMENT 'This database is used to maintain Inventory';
+
+-- Drop the database and it's tables
+DROP DATABASE inventory_db CASCADE;
++---------+
+| Result  |
++---------+
++---------+
+
+-- Drop the database using IF EXISTS
+DROP DATABASE IF EXISTS inventory_db CASCADE;
++---------+
+| Result  |
++---------+
++---------+
+
+{% endhighlight %}
+
+### Related statements
+- [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html)
+- [DESCRIBE DATABASE](sql-ref-syntax-aux-describe-database.html)
+- [SHOW DATABASES](sql-ref-syntax-aux-show-databases.html)
\ No newline at end of file
diff --git a/docs/sql-ref-syntax-ddl-truncate-table.md b/docs/sql-ref-syntax-ddl-truncate-table.md
index 2704259391e94..4b4094ab708e5 100644
--- a/docs/sql-ref-syntax-ddl-truncate-table.md
+++ b/docs/sql-ref-syntax-ddl-truncate-table.md
@@ -19,4 +19,68 @@ license: |
   limitations under the License.
 ---
 
-**This page is under construction**
+### Description
+The `TRUNCATE TABLE` statement removes all the rows from a table or partition(s). The table must not be a view 
+or an external/temporary table. In order to truncate multiple partitions at once, the user can specify the partitions 
+in `partition_spec`. If no `partition_spec` is specified it will remove all partitions in the table.
+
+### Syntax
+{% highlight sql %}
+TRUNCATE TABLE table_name [PARTITION partition_spec];
+{% endhighlight %}
+
+### Parameters
+<dl>
+  <dt><code><em>table_name</em></code></dt>
+  <dd>The name of an existing table.</dd>
+</dl>
+
+<dl>
+  <dt><code><em>PARTITION ( partition_spec :[ partition_column = partition_col_value, partition_column = partition_col_value, ...] )</em></code></dt>
+  <dd>Specifies one or more partition column and value pairs. The partition value is optional.</dd>
+</dl>
+
+
+### Examples
+{% highlight sql %}
+
+--Create table Student with partition
+CREATE TABLE Student ( name String, rollno INT) PARTITIONED BY (age int);
+
+SELECT * from Student;
++-------+---------+------+--+
+| name  | rollno  | age  |
++-------+---------+------+--+
+| ABC   | 1       | 10   |
+| DEF   | 2       | 10   |
+| XYZ   | 3       | 12   |
++-------+---------+------+--+
+
+-- Removes all rows from the table in the partion specified
+TRUNCATE TABLE Student partition(age=10);
+
+--After truncate execution, records belonging to partition age=10 are removed
+SELECT * from Student;
++-------+---------+------+--+
+| name  | rollno  | age  |
++-------+---------+------+--+
+| XYZ   | 3       | 12   |
++-------+---------+------+--+
+
+-- Removes all rows from the table from all partitions
+TRUNCATE TABLE Student;
+
+SELECT * from Student;
++-------+---------+------+--+
+| name  | rollno  | age  |
++-------+---------+------+--+
++-------+---------+------+--+
+No rows selected 
+
+{% endhighlight %}
+
+
+### Related Statements
+- [DROP TABLE](sql-ref-syntax-ddl-drop-table.html)
+- [ALTER TABLE](sql-ref-syntax-ddl-alter-tabley.html)
+
diff --git a/docs/sql-ref-syntax-qry-select-usedb.md b/docs/sql-ref-syntax-qry-select-usedb.md
new file mode 100644
index 0000000000000..92ac91ac51769
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-usedb.md
@@ -0,0 +1,60 @@
+---
+layout: global
+title: USE Database
+displayTitle: USE Database
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+`USE` statement is used to set the current database. After the current database is set,
+the unqualified database artifacts such as tables, functions and views that are 
+referenced by SQLs are resolved from the current database. 
+The default database name is 'default'.
+
+### Syntax
+{% highlight sql %}
+USE database_name
+{% endhighlight %}
+
+### Parameter
+
+<dl>
+  <dt><code><em>database_name</em></code></dt>
+  <dd>
+     Name of the database will be used. If the database does not exist, an exception will be thrown.
+  </dd>
+</dl>
+
+### Example
+{% highlight sql %}
+-- Use the 'userdb' which exists.
+USE userdb;
++---------+--+
+| Result  |
++---------+--+
++---------+--+
+
+-- Use the 'userdb1' which doesn't exist
+USE userdb1;
+Error: org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'userdb1' not found;(state=,code=0)
+{% endhighlight %}
+
+### Related statements.
+- [CREATE DATABASE](sql-ref-syntax-ddl-create-database.html)
+- [DROP DATABASE](sql-ref-syntax-ddl-drop-database.html)
+- [CREATE TABLE ](sql-ref-syntax-ddl-create-table.html)
+
diff --git a/docs/ss-migration-guide.md b/docs/ss-migration-guide.md
new file mode 100644
index 0000000000000..b0fd8a8325dff
--- /dev/null
+++ b/docs/ss-migration-guide.md
@@ -0,0 +1,32 @@
+---
+layout: global
+title: "Migration Guide: Structured Streaming"
+displayTitle: "Migration Guide: Structured Streaming"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* Table of contents
+{:toc}
+
+Note that this migration guide describes the items specific to Structured Streaming.
+Many items of SQL migration can be applied when migrating Structured Streaming to higher versions.
+Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
+
+## Upgrading from Structured Streaming 2.4 to 3.0
+
+- In Spark 3.0, Structured Streaming forces the source schema into nullable when file-based datasources such as text, json, csv, parquet and orc are used via `spark.readStream(...)`. Previously, it respected the nullability in source schema; however, it caused issues tricky to debug with NPE. To restore the previous behavior, set `spark.sql.streaming.fileSource.schema.forceNullable` to `false`.
+
diff --git a/docs/streaming-kinesis-integration.md b/docs/streaming-kinesis-integration.md
index 55acec53302e4..3389d453c2cbd 100644
--- a/docs/streaming-kinesis-integration.md
+++ b/docs/streaming-kinesis-integration.md
@@ -64,13 +64,13 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
 
 	</div>
 	<div data-lang="java" markdown="1">
-            import org.apache.spark.storage.StorageLevel
-            import org.apache.spark.streaming.kinesis.KinesisInputDStream
-            import org.apache.spark.streaming.Seconds
-            import org.apache.spark.streaming.StreamingContext
-            import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+            import org.apache.spark.storage.StorageLevel;
+            import org.apache.spark.streaming.kinesis.KinesisInputDStream;
+            import org.apache.spark.streaming.Seconds;
+            import org.apache.spark.streaming.StreamingContext;
+            import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
 
-            KinesisInputDStream<byte[]> kinesisStream = KinesisInputDStream.builder
+            KinesisInputDStream<byte[]> kinesisStream = KinesisInputDStream.builder()
                 .streamingContext(streamingContext)
                 .endpointUrl([endpoint URL])
                 .regionName([region name])
@@ -81,7 +81,7 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
                 .storageLevel(StorageLevel.MEMORY_AND_DISK_2)
                 .build();
 
-	See the [API docs](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisUtils.html)
+	See the [API docs](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisInputDStream.html)
 	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java). Refer to the [Running the Example](#running-the-example) subsection for instructions to run the example.
 
 	</div>
@@ -98,14 +98,21 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
 	</div>
 	</div>
 
-	You may also provide a "message handler function" that takes a Kinesis `Record` and returns a generic object `T`, in case you would like to use other data included in a `Record` such as partition key. This is currently only supported in Scala and Java.
+	You may also provide the following settings. These are currently only supported in Scala and Java.
+
+	- A "message handler function" that takes a Kinesis `Record` and returns a generic object `T`, in case you would like to use other data included in a `Record` such as partition key.
+
+	- CloudWatch metrics level and dimensions. See [the AWS documentation about monitoring KCL](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html) for details.
 
 	<div class="codetabs">
 	<div data-lang="scala" markdown="1">
+                import collection.JavaConverters._
                 import org.apache.spark.storage.StorageLevel
                 import org.apache.spark.streaming.kinesis.KinesisInputDStream
                 import org.apache.spark.streaming.{Seconds, StreamingContext}
                 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+                import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration
+                import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel
 
                 val kinesisStream = KinesisInputDStream.builder
                     .streamingContext(streamingContext)
@@ -116,17 +123,22 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
                     .checkpointAppName([Kinesis app name])
                     .checkpointInterval([checkpoint interval])
                     .storageLevel(StorageLevel.MEMORY_AND_DISK_2)
+                    .metricsLevel(MetricsLevel.DETAILED)
+                    .metricsEnabledDimensions(KinesisClientLibConfiguration.DEFAULT_METRICS_ENABLED_DIMENSIONS.asScala.toSet)
                     .buildWithMessageHandler([message handler])
 
 	</div>
 	<div data-lang="java" markdown="1">
-                import org.apache.spark.storage.StorageLevel
-                import org.apache.spark.streaming.kinesis.KinesisInputDStream
-                import org.apache.spark.streaming.Seconds
-                import org.apache.spark.streaming.StreamingContext
-                import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
-
-                KinesisInputDStream<byte[]> kinesisStream = KinesisInputDStream.builder
+                import org.apache.spark.storage.StorageLevel;
+                import org.apache.spark.streaming.kinesis.KinesisInputDStream;
+                import org.apache.spark.streaming.Seconds;
+                import org.apache.spark.streaming.StreamingContext;
+                import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
+                import com.amazonaws.services.kinesis.clientlibrary.lib.worker.KinesisClientLibConfiguration;
+                import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel;
+                import scala.collection.JavaConverters;
+
+                KinesisInputDStream<byte[]> kinesisStream = KinesisInputDStream.builder()
                     .streamingContext(streamingContext)
                     .endpointUrl([endpoint URL])
                     .regionName([region name])
@@ -135,6 +147,8 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
                     .checkpointAppName([Kinesis app name])
                     .checkpointInterval([checkpoint interval])
                     .storageLevel(StorageLevel.MEMORY_AND_DISK_2)
+                    .metricsLevel(MetricsLevel.DETAILED)
+                    .metricsEnabledDimensions(JavaConverters.asScalaSetConverter(KinesisClientLibConfiguration.DEFAULT_METRICS_ENABLED_DIMENSIONS).asScala().toSet())
                     .buildWithMessageHandler([message handler]);
 
 	</div>
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index f5abed74bff20..f6b579fbf74d1 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -2488,13 +2488,13 @@ additional effort may be necessary to achieve exactly-once semantics. There are
     * [StreamingContext](api/scala/index.html#org.apache.spark.streaming.StreamingContext) and
   [DStream](api/scala/index.html#org.apache.spark.streaming.dstream.DStream)
     * [KafkaUtils](api/scala/index.html#org.apache.spark.streaming.kafka.KafkaUtils$),
-    [KinesisUtils](api/scala/index.html#org.apache.spark.streaming.kinesis.KinesisUtils$),
+    [KinesisUtils](api/scala/index.html#org.apache.spark.streaming.kinesis.KinesisInputDStream),
   - Java docs
     * [JavaStreamingContext](api/java/index.html?org/apache/spark/streaming/api/java/JavaStreamingContext.html),
     [JavaDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaDStream.html) and
     [JavaPairDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaPairDStream.html)
     * [KafkaUtils](api/java/index.html?org/apache/spark/streaming/kafka/KafkaUtils.html),
-    [KinesisUtils](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisUtils.html)
+    [KinesisUtils](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisInputDStream.html)
   - Python docs
     * [StreamingContext](api/python/pyspark.streaming.html#pyspark.streaming.StreamingContext) and [DStream](api/python/pyspark.streaming.html#pyspark.streaming.DStream)
     * [KafkaUtils](api/python/pyspark.streaming.html#pyspark.streaming.kafka.KafkaUtils)
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index c4378b4a02663..89732d309aa27 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -27,6 +27,8 @@ For Scala/Java applications using SBT/Maven project definitions, link your appli
     artifactId = spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}
     version = {{site.SPARK_VERSION_SHORT}}
 
+Please note that to use the headers functionality, your Kafka client version should be version 0.11.0.0 or up.
+
 For Python applications, you need to add this above library and its dependencies when deploying your
 application. See the [Deploying](#deploying) subsection below.
 
@@ -50,6 +52,17 @@ val df = spark
 df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
   .as[(String, String)]
 
+// Subscribe to 1 topic, with headers
+val df = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .option("includeHeaders", "true")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers")
+  .as[(String, String, Map)]
+
 // Subscribe to multiple topics
 val df = spark
   .readStream
@@ -84,6 +97,16 @@ Dataset<Row> df = spark
   .load();
 df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
 
+// Subscribe to 1 topic, with headers
+Dataset<Row> df = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .option("includeHeaders", "true")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers");
+
 // Subscribe to multiple topics
 Dataset<Row> df = spark
   .readStream()
@@ -116,6 +139,16 @@ df = spark \
   .load()
 df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
 
+# Subscribe to 1 topic, with headers
+val df = spark \
+  .readStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribe", "topic1") \
+  .option("includeHeaders", "true") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers")
+
 # Subscribe to multiple topics
 df = spark \
   .readStream \
@@ -286,6 +319,10 @@ Each row in the source has the following schema:
   <td>timestampType</td>
   <td>int</td>
 </tr>
+<tr>
+  <td>headers (optional)</td>
+  <td>array</td>
+</tr>
 </table>
 
 The following options must be set for the Kafka source
@@ -325,6 +362,27 @@ The following configurations are optional:
 
 <table class="table">
 <tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr>
+<tr>
+  <td>startingOffsetsByTimestamp</td>
+  <td>json string
+  """ {"topicA":{"0": 1000, "1": 1000}, "topicB": {"0": 2000, "1": 2000}} """
+  </td>
+  <td>none (the value of <code>startingOffsets<code/> will apply)</td>
+  <td>streaming and batch</td>
+  <td>The start point of timestamp when a query is started, a json string specifying a starting timestamp for
+  each TopicPartition. The returned offset for each partition is the earliest offset whose timestamp is greater than or
+  equal to the given timestamp in the corresponding partition. If the matched offset doesn't exist,
+  the query will fail immediately to prevent unintended read from such partition. (This is a kind of limitation as of now, and will be addressed in near future.)<p/>
+  <p/>
+  Spark simply passes the timestamp information to <code>KafkaConsumer.offsetsForTimes</code>, and doesn't interpret or reason about the value. <p/>
+  For more details on <code>KafkaConsumer.offsetsForTimes</code>, please refer <a href="https://kafka.apache.org/21/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#offsetsForTimes-java.util.Map-">javadoc</a> for details.<p/>
+  Also the meaning of <code>timestamp</code> here can be vary according to Kafka configuration (<code>log.message.timestamp.type</code>): please refer <a href="https://kafka.apache.org/documentation/">Kafka documentation</a> for further details.<p/>
+  Note: This option requires Kafka 0.10.1.0 or higher.<p/>
+  Note2: <code>startingOffsetsByTimestamp</code> takes precedence over <code>startingOffsets</code>.<p/>
+  Note3: For streaming queries, this only applies when a new query is started, and that resuming will
+  always pick up from where the query left off. Newly discovered partitions during a query will start at
+  earliest.</td>
+</tr>
 <tr>
   <td>startingOffsets</td>
   <td>"earliest", "latest" (streaming only), or json string
@@ -340,6 +398,25 @@ The following configurations are optional:
   always pick up from where the query left off. Newly discovered partitions during a query will start at
   earliest.</td>
 </tr>
+<tr>
+  <td>endingOffsetsByTimestamp</td>
+  <td>json string
+  """ {"topicA":{"0": 1000, "1": 1000}, "topicB": {"0": 2000, "1": 2000}} """
+  </td>
+  <td>latest</td>
+  <td>batch query</td>
+  <td>The end point when a batch query is ended, a json string specifying an ending timesamp for each TopicPartition.
+  The returned offset for each partition is the earliest offset whose timestamp is greater than or equal to
+  the given timestamp in the corresponding partition. If the matched offset doesn't exist, the offset will
+  be set to latest.<p/>
+  <p/>
+  Spark simply passes the timestamp information to <code>KafkaConsumer.offsetsForTimes</code>, and doesn't interpret or reason about the value. <p/>
+  For more details on <code>KafkaConsumer.offsetsForTimes</code>, please refer <a href="https://kafka.apache.org/21/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#offsetsForTimes-java.util.Map-">javadoc</a> for details.<p/>
+  Also the meaning of <code>timestamp</code> here can be vary according to Kafka configuration (<code>log.message.timestamp.type</code>): please refer <a href="https://kafka.apache.org/documentation/">Kafka documentation</a> for further details.<p/>
+  Note: This option requires Kafka 0.10.1.0 or higher.<p/>
+  Note2: <code>endingOffsetsByTimestamp</code> takes precedence over <code>endingOffsets</code>.
+  </td>
+</tr>
 <tr>
   <td>endingOffsets</td>
   <td>latest or json string
@@ -425,6 +502,13 @@ The following configurations are optional:
   issues, set the Kafka consumer session timeout (by setting option "kafka.session.timeout.ms") to
   be very small. When this is set, option "groupIdPrefix" will be ignored.</td>
 </tr>
+<tr>
+  <td>includeHeaders</td>
+  <td>boolean</td>
+  <td>false</td>
+  <td>streaming and batch</td>
+  <td>Whether to include the Kafka headers in the row.</td>
+</tr>
 </table>
 
 ### Consumer Caching
@@ -522,6 +606,10 @@ The Dataframe being written to Kafka should have the following columns in schema
   <td>value (required)</td>
   <td>string or binary</td>
 </tr>
+<tr>
+  <td>headers (optional)</td>
+  <td>array</td>
+</tr>
 <tr>
   <td>topic (*optional)</td>
   <td>string</td>
@@ -559,6 +647,13 @@ The following configurations are optional:
   <td>Sets the topic that all rows will be written to in Kafka. This option overrides any
   topic column that may exist in the data.</td>
 </tr>
+<tr>
+  <td>includeHeaders</td>
+  <td>boolean</td>
+  <td>false</td>
+  <td>streaming and batch</td>
+  <td>Whether to include the Kafka headers in the row.</td>
+</tr>
 </table>
 
 ### Creating a Kafka Sink for Streaming Queries
@@ -825,7 +920,9 @@ Delegation tokens can be obtained from multiple clusters and <code>${cluster}</c
     <td><code>spark.kafka.clusters.${cluster}.security.protocol</code></td>
     <td>SASL_SSL</td>
     <td>
-      Protocol used to communicate with brokers. For further details please see Kafka documentation. Only used to obtain delegation token.
+      Protocol used to communicate with brokers. For further details please see Kafka documentation. Protocol is applied on all the sources and sinks as default where
+      <code>bootstrap.servers</code> config matches (for further details please see <code>spark.kafka.clusters.${cluster}.target.bootstrap.servers.regex</code>),
+      and can be overridden by setting <code>kafka.security.protocol</code> on the source or sink.
     </td>
   </tr>
   <tr>
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index deaf262c5f572..2a405f36fd5fd 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1505,7 +1505,6 @@ Additional details on supported joins:
 
   - Cannot use mapGroupsWithState and flatMapGroupsWithState in Update mode before joins.
 
-
 ### Streaming Deduplication
 You can deduplicate records in data streams using a unique identifier in the events. This is exactly same as deduplication on static using a unique identifier column. The query will store the necessary amount of data from previous records such that it can filter duplicate records. Similar to aggregations, you can use deduplication with or without watermarking.
 
@@ -1616,6 +1615,8 @@ this configuration judiciously.
 ### Arbitrary Stateful Operations
 Many usecases require more advanced stateful operations than aggregations. For example, in many usecases, you have to track sessions from data streams of events. For doing such sessionization, you will have to save arbitrary types of data as state, and perform arbitrary operations on the state using the data stream events in every trigger. Since Spark 2.2, this can be done using the operation `mapGroupsWithState` and the more powerful operation `flatMapGroupsWithState`. Both operations allow you to apply user-defined code on grouped Datasets to update user-defined state. For more concrete details, take a look at the API documentation ([Scala](api/scala/index.html#org.apache.spark.sql.streaming.GroupState)/[Java](api/java/org/apache/spark/sql/streaming/GroupState.html)) and the examples ([Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredSessionization.java)).
 
+Though Spark cannot check and force it, the state function should be implemented with respect to the semantics of the output mode. For example, in Update mode Spark doesn't expect that the state function will emit rows which are older than current watermark plus allowed late record delay, whereas in Append mode the state function can emit these rows.
+
 ### Unsupported Operations
 There are a few DataFrame/Dataset operations that are not supported with streaming DataFrames/Datasets. 
 Some of them are as follows.
@@ -1647,6 +1648,26 @@ For example, sorting on the input stream is not supported, as it requires keepin
 track of all the data received in the stream. This is therefore fundamentally hard to execute 
 efficiently.
 
+### Limitation of global watermark
+
+In Append mode, if a stateful operation emits rows older than current watermark plus allowed late record delay,
+they will be "late rows" in downstream stateful operations (as Spark uses global watermark). Note that these rows may be discarded.
+This is a limitation of a global watermark, and it could potentially cause a correctness issue.
+
+Spark will check the logical plan of query and log a warning when Spark detects such a pattern.
+
+Any of the stateful operation(s) after any of below stateful operations can have this issue:
+
+* streaming aggregation in Append mode
+* stream-stream outer join
+* `mapGroupsWithState` and `flatMapGroupsWithState` in Append mode (depending on the implementation of the state function)
+
+As Spark cannot check the state function of `mapGroupsWithState`/`flatMapGroupsWithState`, Spark assumes that the state function
+emits late rows if the operator uses Append mode.
+
+There's a known workaround: split your streaming query into multiple queries per stateful operator, and ensure
+end-to-end exactly once per query. Ensuring end-to-end exactly once for the last query is optional.
+
 ## Starting Streaming Queries
 Once you have defined the final result DataFrame/Dataset, all that is left is for you to start the streaming computation. To do that, you have to use the `DataStreamWriter`
 ([Scala](api/scala/index.html#org.apache.spark.sql.streaming.DataStreamWriter)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Python](api/python/pyspark.sql.html#pyspark.sql.streaming.DataStreamWriter) docs)
diff --git a/docs/web-ui.md b/docs/web-ui.md
index 72423d9468e83..e6025370e6796 100644
--- a/docs/web-ui.md
+++ b/docs/web-ui.md
@@ -404,3 +404,44 @@ The web UI includes a Streaming tab if the application uses Spark streaming. Thi
 scheduling delay and processing time for each micro-batch in the data stream, which can be useful
 for troubleshooting the streaming application.
 
+## JDBC/ODBC Server Tab
+We can see this tab when Spark is running as a [distributed SQL engine](sql-distributed-sql-engine.html). It shows information about sessions and submitted SQL operations.
+
+The first section of the page displays general information about the JDBC/ODBC server: start time and uptime.
+
+<p style="text-align: center;">
+  <img src="img/JDBCServer1.png" width="40%" title="JDBC/ODBC Header" alt="JDBC/ODBC Header">
+</p>
+
+The second section contains information about active and finished sessions.
+* **User** and **IP** of the connection.
+* **Session id** link to access to session info.
+* **Start time**, **finish time** and **duration** of the session.
+* **Total execute** is the number of operations submitted in this session.
+
+<p style="text-align: center;">
+  <img src="img/JDBCServer2.png" title="JDBC/ODBC sessions" alt="JDBC/ODBC sessions">
+</p>
+
+The third section has the SQL statistics of the submitted operations.
+* **User** that submit the operation.
+* **Job id** link to [jobs tab](web-ui.html#jobs-tab).
+* **Group id** of the query that group all jobs together. An application can cancel all running jobs using this group id.
+* **Start time** of the operation.
+* **Finish time** of the execution, before fetching the results.
+* **Close time** of the operation after fetching the results.
+* **Execution time** is the difference between finish time and start time.
+* **Duration time** is the difference between close time and start time.
+* **Statement** is the operation being executed.
+* **State** of the process.
+	* _Started_, first state, when the process begins.
+	* _Compiled_, execution plan generated.
+	* _Failed_, final state when the execution failed or finished with error.
+	* _Canceled_, final state when the execution is canceled.
+	* _Finished_ processing and waiting to fetch results.
+	* _Closed_, final state when client closed the statement.
+* **Detail** of the execution plan with parsed logical plan, analyzed logical plan, optimized logical plan and physical plan or errors in the the SQL statement.
+
+<p style="text-align: center;">
+  <img src="img/JDBCServer3.png" title="JDBC/ODBC SQL Statistics" alt="JDBC/ODBC SQL Statistics">
+</p>
diff --git a/examples/pom.xml b/examples/pom.xml
index ac148ef4c9c01..a099f1e042e99 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -107,7 +107,7 @@
     <dependency>
       <groupId>com.github.scopt</groupId>
       <artifactId>scopt_${scala.binary.version}</artifactId>
-      <version>3.7.0</version>
+      <version>3.7.1</version>
     </dependency>
     <dependency>
       <groupId>${hive.parquet.group}</groupId>
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java
deleted file mode 100644
index 324a781c1a44a..0000000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaLinearRegressionWithSGDExample.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-import org.apache.spark.SparkConf;
-import org.apache.spark.api.java.JavaSparkContext;
-
-// $example on$
-import scala.Tuple2;
-
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.regression.LinearRegressionModel;
-import org.apache.spark.mllib.regression.LinearRegressionWithSGD;
-// $example off$
-
-/**
- * Example for LinearRegressionWithSGD.
- */
-public class JavaLinearRegressionWithSGDExample {
-  public static void main(String[] args) {
-    SparkConf conf = new SparkConf().setAppName("JavaLinearRegressionWithSGDExample");
-    JavaSparkContext sc = new JavaSparkContext(conf);
-
-    // $example on$
-    // Load and parse the data
-    String path = "data/mllib/ridge-data/lpsa.data";
-    JavaRDD<String> data = sc.textFile(path);
-    JavaRDD<LabeledPoint> parsedData = data.map(line -> {
-      String[] parts = line.split(",");
-      String[] features = parts[1].split(" ");
-      double[] v = new double[features.length];
-      for (int i = 0; i < features.length - 1; i++) {
-        v[i] = Double.parseDouble(features[i]);
-      }
-      return new LabeledPoint(Double.parseDouble(parts[0]), Vectors.dense(v));
-    });
-    parsedData.cache();
-
-    // Building the model
-    int numIterations = 100;
-    double stepSize = 0.00000001;
-    LinearRegressionModel model =
-      LinearRegressionWithSGD.train(JavaRDD.toRDD(parsedData), numIterations, stepSize);
-
-    // Evaluate model on training examples and compute training error
-    JavaPairRDD<Double, Double> valuesAndPreds = parsedData.mapToPair(point ->
-      new Tuple2<>(model.predict(point.features()), point.label()));
-
-    double MSE = valuesAndPreds.mapToDouble(pair -> {
-      double diff = pair._1() - pair._2();
-      return diff * diff;
-    }).mean();
-    System.out.println("training Mean Squared Error = " + MSE);
-
-    // Save and load model
-    model.save(sc.sc(), "target/tmp/javaLinearRegressionWithSGDModel");
-    LinearRegressionModel sameModel = LinearRegressionModel.load(sc.sc(),
-      "target/tmp/javaLinearRegressionWithSGDModel");
-    // $example off$
-
-    sc.stop();
-  }
-}
diff --git a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java b/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
deleted file mode 100644
index 00033b5730a3d..0000000000000
--- a/examples/src/main/java/org/apache/spark/examples/mllib/JavaRegressionMetricsExample.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.examples.mllib;
-
-// $example on$
-import scala.Tuple2;
-
-import org.apache.spark.api.java.*;
-import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.regression.LinearRegressionModel;
-import org.apache.spark.mllib.regression.LinearRegressionWithSGD;
-import org.apache.spark.mllib.evaluation.RegressionMetrics;
-import org.apache.spark.SparkConf;
-// $example off$
-
-public class JavaRegressionMetricsExample {
-  public static void main(String[] args) {
-    SparkConf conf = new SparkConf().setAppName("Java Regression Metrics Example");
-    JavaSparkContext sc = new JavaSparkContext(conf);
-    // $example on$
-    // Load and parse the data
-    String path = "data/mllib/sample_linear_regression_data.txt";
-    JavaRDD<String> data = sc.textFile(path);
-    JavaRDD<LabeledPoint> parsedData = data.map(line -> {
-      String[] parts = line.split(" ");
-      double[] v = new double[parts.length - 1];
-      for (int i = 1; i < parts.length; i++) {
-        v[i - 1] = Double.parseDouble(parts[i].split(":")[1]);
-      }
-      return new LabeledPoint(Double.parseDouble(parts[0]), Vectors.dense(v));
-    });
-    parsedData.cache();
-
-    // Building the model
-    int numIterations = 100;
-    LinearRegressionModel model = LinearRegressionWithSGD.train(JavaRDD.toRDD(parsedData),
-      numIterations);
-
-    // Evaluate model on training examples and compute training error
-    JavaPairRDD<Object, Object> valuesAndPreds = parsedData.mapToPair(point ->
-      new Tuple2<>(model.predict(point.features()), point.label()));
-
-    // Instantiate metrics object
-    RegressionMetrics metrics = new RegressionMetrics(valuesAndPreds.rdd());
-
-    // Squared error
-    System.out.format("MSE = %f\n", metrics.meanSquaredError());
-    System.out.format("RMSE = %f\n", metrics.rootMeanSquaredError());
-
-    // R-squared
-    System.out.format("R Squared = %f\n", metrics.r2());
-
-    // Mean absolute error
-    System.out.format("MAE = %f\n", metrics.meanAbsoluteError());
-
-    // Explained variance
-    System.out.format("Explained Variance = %f\n", metrics.explainedVariance());
-
-    // Save and load model
-    model.save(sc.sc(), "target/tmp/LogisticRegressionModel");
-    LinearRegressionModel sameModel = LinearRegressionModel.load(sc.sc(),
-      "target/tmp/LogisticRegressionModel");
-    // $example off$
-
-    sc.stop();
-  }
-}
diff --git a/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala b/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala
index 5d9a9a73f12ec..36da10568989d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/AccumulatorMetricsTest.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.SparkSession
  * accumulator source) are reported to stdout as well.
  */
 object AccumulatorMetricsTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val spark = SparkSession
       .builder()
diff --git a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
index 3311de12dbd97..d7e79966037cc 100644
--- a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.SparkSession
  * Usage: BroadcastTest [partitions] [numElem] [blockSize]
  */
 object BroadcastTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val blockSize = if (args.length > 2) args(2) else "4096"
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
index d12ef642bd2cd..ed56108f4b624 100644
--- a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
@@ -27,7 +27,7 @@ import org.apache.spark.util.Utils
  * test driver submission in the standalone scheduler.
  */
 object DriverSubmissionTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       println("Usage: DriverSubmissionTest <seconds-to-sleep>")
       System.exit(0)
diff --git a/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
index 45c4953a84be2..6e95318a8cbc0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ExceptionHandlingTest.scala
@@ -20,7 +20,7 @@ package org.apache.spark.examples
 import org.apache.spark.sql.SparkSession
 
 object ExceptionHandlingTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("ExceptionHandlingTest")
diff --git a/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
index 2f2bbb1275438..c07c1afbcb174 100644
--- a/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/GroupByTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
  * Usage: GroupByTest [numMappers] [numKVPairs] [KeySize] [numReducers]
  */
 object GroupByTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("GroupBy Test")
diff --git a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
index b327e13533b81..48698678571e3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/HdfsTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
 object HdfsTest {
 
   /** Usage: HdfsTest [file] */
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       System.err.println("Usage: HdfsTest <file>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
index 3f9cea35d6503..87c2f6853807a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalALS.scala
@@ -93,7 +93,7 @@ object LocalALS {
     new CholeskyDecomposition(XtX).getSolver.solve(Xty)
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of ALS and is given as an example!
         |Please use org.apache.spark.ml.recommendation.ALS
@@ -101,7 +101,7 @@ object LocalALS {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     args match {
       case Array(m, u, f, iters) =>
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
index 5512e33e41ac3..5478c585a959e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalFileLR.scala
@@ -39,7 +39,7 @@ object LocalFileLR {
     DataPoint(new DenseVector(nums.slice(1, D + 1)), nums(0))
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
         |Please use org.apache.spark.ml.classification.LogisticRegression
@@ -47,7 +47,7 @@ object LocalFileLR {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     showWarning()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
index f5162a59522f0..4a73466841f69 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -62,7 +62,7 @@ object LocalKMeans {
     bestIndex
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
         |Please use org.apache.spark.ml.clustering.KMeans
@@ -70,7 +70,7 @@ object LocalKMeans {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     showWarning()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
index bde8ccd305960..4ca0ecdcfe6e0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala
@@ -46,7 +46,7 @@ object LocalLR {
     Array.tabulate(N)(generatePoint)
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
         |Please use org.apache.spark.ml.classification.LogisticRegression
@@ -54,7 +54,7 @@ object LocalLR {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     showWarning()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
index a93c15c85cfc1..7660ffd02ed9b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalPi.scala
@@ -21,7 +21,7 @@ package org.apache.spark.examples
 import scala.math.random
 
 object LocalPi {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     var count = 0
     for (i <- 1 to 100000) {
       val x = random * 2 - 1
diff --git a/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
index 03187aee044e4..e2120eaee6e5a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LogQuery.scala
@@ -41,7 +41,7 @@ object LogQuery {
       | 0 73.23.2.15 images.com 1358492557 - Whatup""".stripMargin.split('\n').mkString
   )
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val sparkConf = new SparkConf().setAppName("Log Query")
     val sc = new SparkContext(sparkConf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
index e6f33b7adf5d1..4bea5cae775cb 100644
--- a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
  * Usage: MultiBroadcastTest [partitions] [numElem]
  */
 object MultiBroadcastTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val spark = SparkSession
       .builder
diff --git a/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
index 2332a661f26a0..2bd7c3e954396 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SimpleSkewedGroupByTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
  * Usage: SimpleSkewedGroupByTest [numMappers] [numKVPairs] [valSize] [numReducers] [ratio]
  */
 object SimpleSkewedGroupByTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("SimpleSkewedGroupByTest")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
index 4d3c34041bc17..2e7abd62dcdc6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
  * Usage: GroupByTest [numMappers] [numKVPairs] [KeySize] [numReducers]
  */
 object SkewedGroupByTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("GroupBy Test")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index d3e7b7a967de7..651f0224d4402 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -78,7 +78,7 @@ object SparkALS {
     new CholeskyDecomposition(XtX).getSolver.solve(Xty)
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of ALS and is given as an example!
         |Please use org.apache.spark.ml.recommendation.ALS
@@ -86,7 +86,7 @@ object SparkALS {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     var slices = 0
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
index 23eaa879114a9..8c09ce614d931 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala
@@ -49,7 +49,7 @@ object SparkHdfsLR {
     DataPoint(new DenseVector(x), y)
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
         |Please use org.apache.spark.ml.classification.LogisticRegression
@@ -57,7 +57,7 @@ object SparkHdfsLR {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     if (args.length < 2) {
       System.err.println("Usage: SparkHdfsLR <file> <iters>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
index b005cb6971c16..ec9b44ce6e3b7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -49,7 +49,7 @@ object SparkKMeans {
     bestIndex
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of KMeans Clustering and is given as an example!
         |Please use org.apache.spark.ml.clustering.KMeans
@@ -57,7 +57,7 @@ object SparkKMeans {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     if (args.length < 3) {
       System.err.println("Usage: SparkKMeans <file> <k> <convergeDist>")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
index 4b1497345af82..deb6668f7ecfc 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkLR.scala
@@ -51,7 +51,7 @@ object SparkLR {
     Array.tabulate(N)(generatePoint)
   }
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of Logistic Regression and is given as an example!
         |Please use org.apache.spark.ml.classification.LogisticRegression
@@ -59,7 +59,7 @@ object SparkLR {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     showWarning()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
index 9299bad5d3290..3bd475c440d72 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.SparkSession
  */
 object SparkPageRank {
 
-  def showWarning() {
+  def showWarning(): Unit = {
     System.err.println(
       """WARN: This is a naive implementation of PageRank and is given as an example!
         |Please use the PageRank implementation found in org.apache.spark.graphx.lib.PageRank
@@ -47,7 +47,7 @@ object SparkPageRank {
       """.stripMargin)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       System.err.println("Usage: SparkPageRank <file> <iter>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
index 828d98b5001d7..a8eec6a99cf4b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkPi.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.SparkSession
 
 /** Computes an approximation to pi */
 object SparkPi {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("Spark Pi")
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala b/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala
index 64076f2deb706..99a12b9442365 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkRemoteFileTest.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.SparkSession
 
 /** Usage: SparkRemoteFileTest [file] */
 object SparkRemoteFileTest {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       System.err.println("Usage: SparkRemoteFileTest <file>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
index f5d42141f5dd2..7a6fa9a797ff9 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkTC.scala
@@ -41,7 +41,7 @@ object SparkTC {
     edges.toSeq
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("SparkTC")
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
index da3ffca1a6f2a..af18c0afbb223 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/LiveJournalPageRank.scala
@@ -23,7 +23,7 @@ package org.apache.spark.examples.graphx
  * http://snap.stanford.edu/data/soc-LiveJournal1.html.
  */
 object LiveJournalPageRank {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       System.err.println(
         "Usage: LiveJournalPageRank <edge_list_file>\n" +
diff --git a/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala b/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
index 57b2edf992208..8bc9c0a86eab6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/graphx/SynthBenchmark.scala
@@ -47,7 +47,7 @@ object SynthBenchmark {
    *   -degFile the local file to save the degree information (Default: Empty)
    *   -seed seed to use for RNGs (Default: -1, picks seed randomly)
    */
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val options = args.map {
       arg =>
         arg.dropWhile(_ == '-').split('=') match {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
index 8091838a2301e..354e65c2bae38 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
@@ -42,7 +42,7 @@ object ALSExample {
   }
   // $example off$
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("ALSExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala
index 5638e66b8792a..1a67a6e755ab4 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ChiSqSelectorExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.sql.SparkSession
 
 object ChiSqSelectorExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("ChiSqSelectorExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala
index 91d861dd4380a..947ca5f5fb5e1 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/CountVectorizerExample.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.feature.{CountVectorizer, CountVectorizerModel}
 import org.apache.spark.sql.SparkSession
 
 object CountVectorizerExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("CountVectorizerExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
index ee4469faab3a0..4377efd9e95fa 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
@@ -41,7 +41,7 @@ object DataFrameExample {
   case class Params(input: String = "data/mllib/sample_libsvm_data.txt")
     extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("DataFrameExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
index 19f2d7751bc54..ef38163d7eb0d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala
@@ -65,7 +65,7 @@ object DecisionTreeExample {
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("DecisionTreeExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index 2dc11b07d88ef..9b5dfed0cb31b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.{Dataset, Row, SparkSession}
  */
 object DeveloperApiExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("DeveloperApiExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
index 8f3ce4b315bd3..ca4235d53e636 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/GBTExample.scala
@@ -63,7 +63,7 @@ object GBTExample {
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("GBTExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala
index 2940682c32801..b3642c0b45db6 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/IndexToStringExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.ml.feature.{IndexToString, StringIndexer}
 import org.apache.spark.sql.SparkSession
 
 object IndexToStringExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("IndexToStringExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
index 6903a1c298ced..370c6fd7c17fc 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala
@@ -50,7 +50,7 @@ object LinearRegressionExample {
       tol: Double = 1E-6,
       fracTest: Double = 0.2) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("LinearRegressionExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
index bd6cc8cff2348..b64ab4792add4 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/LogisticRegressionExample.scala
@@ -55,7 +55,7 @@ object LogisticRegressionExample {
       tol: Double = 1E-6,
       fracTest: Double = 0.2) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("LogisticRegressionExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
index 4ad6c7c3ef202..86e70e8ab0189 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneVsRestExample.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.SparkSession
  */
 
 object OneVsRestExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName(s"OneVsRestExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
index 0fe16fb6dfa9f..55823fe1832e5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/QuantileDiscretizerExample.scala
@@ -23,7 +23,7 @@ import org.apache.spark.ml.feature.QuantileDiscretizer
 import org.apache.spark.sql.SparkSession
 
 object QuantileDiscretizerExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("QuantileDiscretizerExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
index 3c127a46e1f10..6ba14bcd1822f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RandomForestExample.scala
@@ -64,7 +64,7 @@ object RandomForestExample {
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("RandomForestExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala
index bb4587b82cb37..bf6a4846b6e34 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/SQLTransformerExample.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.feature.SQLTransformer
 import org.apache.spark.sql.SparkSession
 
 object SQLTransformerExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("SQLTransformerExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
index ec2df2ef876ba..6121c81cd1f5d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.SparkSession
 
 object TfIdfExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("TfIdfExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
index b4179ecc1e56d..05f2ee3288624 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/UnaryTransformerExample.scala
@@ -82,7 +82,7 @@ object UnaryTransformerExample {
   object MyTransformer extends DefaultParamsReadable[MyTransformer]
   // $example off$
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder()
       .appName("UnaryTransformerExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
index 4bcc6ac6a01f5..8ff0e8c6a51c8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/Word2VecExample.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.SparkSession
 
 object Word2VecExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("Word2Vec example")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
index a07535bb5a38d..1a7839414b38e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/AssociationRulesExample.scala
@@ -26,7 +26,7 @@ import org.apache.spark.mllib.fpm.FPGrowth.FreqItemset
 
 object AssociationRulesExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val conf = new SparkConf().setAppName("AssociationRulesExample")
     val sc = new SparkContext(conf)
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
index e3cc1d9c83361..6fc3501fc57b5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala
@@ -58,7 +58,7 @@ object BinaryClassification {
       regType: RegType = L2,
       regParam: Double = 0.01) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("BinaryClassification") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala
index 53d0b8fc208ef..b7f0ba00f913e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BisectingKMeansExample.scala
@@ -34,7 +34,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
  */
 object BisectingKMeansExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val sparkConf = new SparkConf().setAppName("mllib.BisectingKMeansExample")
     val sc = new SparkContext(sparkConf)
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala
index 0b44c339ef139..cf9f7adbf6999 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/Correlations.scala
@@ -37,7 +37,7 @@ object Correlations {
   case class Params(input: String = "data/mllib/sample_linear_regression_data.txt")
     extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val defaultParams = Params()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala
index 681465d2176d4..9082f0b5a8b85 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/CosineSimilarity.scala
@@ -45,7 +45,7 @@ object CosineSimilarity {
   case class Params(inputFile: String = null, threshold: Double = 0.1)
     extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("CosineSimilarity") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
index b5d1b02f92524..1029ca04c348f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DecisionTreeRunner.scala
@@ -67,7 +67,7 @@ object DecisionTreeRunner {
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("DecisionTreeRunner") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
index b228827e5886f..0259df2799174 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala
@@ -47,7 +47,7 @@ object DenseKMeans {
       numIterations: Int = 10,
       initializationMode: InitializationMode = Parallel) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("DenseKMeans") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
index f724ee1030f04..a25ce826ee842 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/FPGrowthExample.scala
@@ -35,7 +35,7 @@ object FPGrowthExample {
     minSupport: Double = 0.3,
     numPartition: Int = -1) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("FPGrowthExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala
index b1b3a79d87ae1..103d212a80e78 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GaussianMixtureExample.scala
@@ -26,7 +26,7 @@ import org.apache.spark.mllib.linalg.Vectors
 
 object GaussianMixtureExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName("GaussianMixtureExample")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
index 3f264933cd3cc..12e0c8df274b2 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/GradientBoostedTreesRunner.scala
@@ -50,7 +50,7 @@ object GradientBoostedTreesRunner {
       numIterations: Int = 10,
       fracTest: Double = 0.2) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("GradientBoostedTrees") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
index 9b3c3266ee30a..8435209377553 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/HypothesisTestingExample.scala
@@ -29,7 +29,7 @@ import org.apache.spark.rdd.RDD
 
 object HypothesisTestingExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName("HypothesisTestingExample")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
index b0a6f1671a898..17ebd4159b8d7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/KMeansExample.scala
@@ -26,7 +26,7 @@ import org.apache.spark.mllib.linalg.Vectors
 
 object KMeansExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName("KMeansExample")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
index cd77ecf990b3b..605ca68e627ec 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
@@ -53,7 +53,7 @@ object LDAExample {
       checkpointDir: Option[String] = None,
       checkpointInterval: Int = 10) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("LDAExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
index d25962c5500ed..55a45b302b5a3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LatentDirichletAllocationExample.scala
@@ -26,7 +26,7 @@ import org.apache.spark.mllib.linalg.Vectors
 
 object LatentDirichletAllocationExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName("LatentDirichletAllocationExample")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
deleted file mode 100644
index 03222b13ad27d..0000000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegression.scala
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.log4j.{Level, Logger}
-import scopt.OptionParser
-
-import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.mllib.optimization.{L1Updater, SimpleUpdater, SquaredL2Updater}
-import org.apache.spark.mllib.regression.LinearRegressionWithSGD
-import org.apache.spark.mllib.util.MLUtils
-
-/**
- * An example app for linear regression. Run with
- * {{{
- * bin/run-example org.apache.spark.examples.mllib.LinearRegression
- * }}}
- * A synthetic dataset can be found at `data/mllib/sample_linear_regression_data.txt`.
- * If you use it as a template to create your own app, please use `spark-submit` to submit your app.
- */
-@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
-object LinearRegression {
-
-  object RegType extends Enumeration {
-    type RegType = Value
-    val NONE, L1, L2 = Value
-  }
-
-  import RegType._
-
-  case class Params(
-      input: String = null,
-      numIterations: Int = 100,
-      stepSize: Double = 1.0,
-      regType: RegType = L2,
-      regParam: Double = 0.01) extends AbstractParams[Params]
-
-  def main(args: Array[String]) {
-    val defaultParams = Params()
-
-    val parser = new OptionParser[Params]("LinearRegression") {
-      head("LinearRegression: an example app for linear regression.")
-      opt[Int]("numIterations")
-        .text("number of iterations")
-        .action((x, c) => c.copy(numIterations = x))
-      opt[Double]("stepSize")
-        .text(s"initial step size, default: ${defaultParams.stepSize}")
-        .action((x, c) => c.copy(stepSize = x))
-      opt[String]("regType")
-        .text(s"regularization type (${RegType.values.mkString(",")}), " +
-        s"default: ${defaultParams.regType}")
-        .action((x, c) => c.copy(regType = RegType.withName(x)))
-      opt[Double]("regParam")
-        .text(s"regularization parameter, default: ${defaultParams.regParam}")
-      arg[String]("<input>")
-        .required()
-        .text("input paths to labeled examples in LIBSVM format")
-        .action((x, c) => c.copy(input = x))
-      note(
-        """
-          |For example, the following command runs this app on a synthetic dataset:
-          |
-          | bin/spark-submit --class org.apache.spark.examples.mllib.LinearRegression \
-          |  examples/target/scala-*/spark-examples-*.jar \
-          |  data/mllib/sample_linear_regression_data.txt
-        """.stripMargin)
-    }
-
-    parser.parse(args, defaultParams) match {
-      case Some(params) => run(params)
-      case _ => sys.exit(1)
-    }
-  }
-
-  def run(params: Params): Unit = {
-    val conf = new SparkConf().setAppName(s"LinearRegression with $params")
-    val sc = new SparkContext(conf)
-
-    Logger.getRootLogger.setLevel(Level.WARN)
-
-    val examples = MLUtils.loadLibSVMFile(sc, params.input).cache()
-
-    val splits = examples.randomSplit(Array(0.8, 0.2))
-    val training = splits(0).cache()
-    val test = splits(1).cache()
-
-    val numTraining = training.count()
-    val numTest = test.count()
-    println(s"Training: $numTraining, test: $numTest.")
-
-    examples.unpersist()
-
-    val updater = params.regType match {
-      case NONE => new SimpleUpdater()
-      case L1 => new L1Updater()
-      case L2 => new SquaredL2Updater()
-    }
-
-    val algorithm = new LinearRegressionWithSGD()
-    algorithm.optimizer
-      .setNumIterations(params.numIterations)
-      .setStepSize(params.stepSize)
-      .setUpdater(updater)
-      .setRegParam(params.regParam)
-
-    val model = algorithm.run(training)
-
-    val prediction = model.predict(test.map(_.features))
-    val predictionAndLabel = prediction.zip(test.map(_.label))
-
-    val loss = predictionAndLabel.map { case (p, l) =>
-      val err = p - l
-      err * err
-    }.reduce(_ + _)
-    val rmse = math.sqrt(loss / numTest)
-
-    println(s"Test RMSE = $rmse.")
-
-    sc.stop()
-  }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
deleted file mode 100644
index 449b725d1d173..0000000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LinearRegressionWithSGDExample.scala
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.{SparkConf, SparkContext}
-// $example on$
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.regression.LinearRegressionModel
-import org.apache.spark.mllib.regression.LinearRegressionWithSGD
-// $example off$
-
-@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
-object LinearRegressionWithSGDExample {
-
-  def main(args: Array[String]): Unit = {
-    val conf = new SparkConf().setAppName("LinearRegressionWithSGDExample")
-    val sc = new SparkContext(conf)
-
-    // $example on$
-    // Load and parse the data
-    val data = sc.textFile("data/mllib/ridge-data/lpsa.data")
-    val parsedData = data.map { line =>
-      val parts = line.split(',')
-      LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
-    }.cache()
-
-    // Building the model
-    val numIterations = 100
-    val stepSize = 0.00000001
-    val model = LinearRegressionWithSGD.train(parsedData, numIterations, stepSize)
-
-    // Evaluate model on training examples and compute training error
-    val valuesAndPreds = parsedData.map { point =>
-      val prediction = model.predict(point.features)
-      (point.label, prediction)
-    }
-    val MSE = valuesAndPreds.map{ case(v, p) => math.pow((v - p), 2) }.mean()
-    println(s"training Mean Squared Error $MSE")
-
-    // Save and load model
-    model.save(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
-    val sameModel = LinearRegressionModel.load(sc, "target/tmp/scalaLinearRegressionWithSGDModel")
-    // $example off$
-
-    sc.stop()
-  }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
index fd810155d6a88..92c85c9271a5a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
@@ -48,7 +48,7 @@ object MovieLensALS {
       numProductBlocks: Int = -1,
       implicitPrefs: Boolean = false) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("MovieLensALS") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala
index f9e47e485e72f..b5c52f9a31224 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MultivariateSummarizer.scala
@@ -38,7 +38,7 @@ object MultivariateSummarizer {
   case class Params(input: String = "data/mllib/sample_linear_regression_data.txt")
     extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val defaultParams = Params()
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala
deleted file mode 100644
index eff2393cc3abe..0000000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PCAExample.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// scalastyle:off println
-package org.apache.spark.examples.mllib
-
-import org.apache.spark.SparkConf
-import org.apache.spark.SparkContext
-// $example on$
-import org.apache.spark.mllib.feature.PCA
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD}
-// $example off$
-
-@deprecated("Deprecated since LinearRegressionWithSGD is deprecated.  Use ml.feature.PCA", "2.0.0")
-object PCAExample {
-
-  def main(args: Array[String]): Unit = {
-
-    val conf = new SparkConf().setAppName("PCAExample")
-    val sc = new SparkContext(conf)
-
-    // $example on$
-    val data = sc.textFile("data/mllib/ridge-data/lpsa.data").map { line =>
-      val parts = line.split(',')
-      LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
-    }.cache()
-
-    val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L)
-    val training = splits(0).cache()
-    val test = splits(1)
-
-    val pca = new PCA(training.first().features.size / 2).fit(data.map(_.features))
-    val training_pca = training.map(p => p.copy(features = pca.transform(p.features)))
-    val test_pca = test.map(p => p.copy(features = pca.transform(p.features)))
-
-    val numIterations = 100
-    val model = LinearRegressionWithSGD.train(training, numIterations)
-    val model_pca = LinearRegressionWithSGD.train(training_pca, numIterations)
-
-    val valuesAndPreds = test.map { point =>
-      val score = model.predict(point.features)
-      (score, point.label)
-    }
-
-    val valuesAndPreds_pca = test_pca.map { point =>
-      val score = model_pca.predict(point.features)
-      (score, point.label)
-    }
-
-    val MSE = valuesAndPreds.map { case (v, p) => math.pow((v - p), 2) }.mean()
-    val MSE_pca = valuesAndPreds_pca.map { case (v, p) => math.pow((v - p), 2) }.mean()
-
-    println(s"Mean Squared Error = $MSE")
-    println(s"PCA Mean Squared Error = $MSE_pca")
-    // $example off$
-
-    sc.stop()
-  }
-}
-// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
index 65603252c4384..eaf1dacd0160a 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala
@@ -62,7 +62,7 @@ object PowerIterationClusteringExample {
       maxIterations: Int = 15
     ) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("PowerIterationClusteringExample") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
index 8b789277774af..1b5d919a047e8 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/PrefixSpanExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.mllib.fpm.PrefixSpan
 
 object PrefixSpanExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val conf = new SparkConf().setAppName("PrefixSpanExample")
     val sc = new SparkContext(conf)
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala
index 7ccbb5a0640cd..aee12a1b4751f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RandomRDDGeneration.scala
@@ -31,7 +31,7 @@ import org.apache.spark.rdd.RDD
  */
 object RandomRDDGeneration {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName(s"RandomRDDGeneration")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
index ea13ec05e2fad..2845028dd0814 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala
@@ -25,7 +25,7 @@ import org.apache.spark.mllib.recommendation.{ALS, Rating}
 import org.apache.spark.sql.SparkSession
 
 object RankingMetricsExample {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder
       .appName("RankingMetricsExample")
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala
deleted file mode 100644
index 76cfb804e18f3..0000000000000
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/RegressionMetricsExample.scala
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-// scalastyle:off println
-
-package org.apache.spark.examples.mllib
-
-// $example on$
-import org.apache.spark.mllib.evaluation.RegressionMetrics
-import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.regression.{LabeledPoint, LinearRegressionWithSGD}
-// $example off$
-import org.apache.spark.sql.SparkSession
-
-@deprecated("Use ml.regression.LinearRegression and the resulting model summary for metrics",
-  "2.0.0")
-object RegressionMetricsExample {
-  def main(args: Array[String]): Unit = {
-    val spark = SparkSession
-      .builder
-      .appName("RegressionMetricsExample")
-      .getOrCreate()
-    // $example on$
-    // Load the data
-    val data = spark
-      .read.format("libsvm").load("data/mllib/sample_linear_regression_data.txt")
-      .rdd.map(row => LabeledPoint(row.getDouble(0), row.get(1).asInstanceOf[Vector]))
-      .cache()
-
-    // Build the model
-    val numIterations = 100
-    val model = LinearRegressionWithSGD.train(data, numIterations)
-
-    // Get predictions
-    val valuesAndPreds = data.map{ point =>
-      val prediction = model.predict(point.features)
-      (prediction, point.label)
-    }
-
-    // Instantiate metrics object
-    val metrics = new RegressionMetrics(valuesAndPreds)
-
-    // Squared error
-    println(s"MSE = ${metrics.meanSquaredError}")
-    println(s"RMSE = ${metrics.rootMeanSquaredError}")
-
-    // R-squared
-    println(s"R-squared = ${metrics.r2}")
-
-    // Mean absolute error
-    println(s"MAE = ${metrics.meanAbsoluteError}")
-
-    // Explained variance
-    println(s"Explained variance = ${metrics.explainedVariance}")
-    // $example off$
-
-    spark.stop()
-  }
-}
-// scalastyle:on println
-
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
index ba3deae5d688f..fdde47d60c544 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
@@ -35,7 +35,7 @@ object SampledRDDs {
   case class Params(input: String = "data/mllib/sample_binary_classification_data.txt")
     extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("SampledRDDs") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
index 694c3bb18b045..ba16e8f5ff347 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SimpleFPGrowth.scala
@@ -26,7 +26,7 @@ import org.apache.spark.rdd.RDD
 
 object SimpleFPGrowth {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     val conf = new SparkConf().setAppName("SimpleFPGrowth")
     val sc = new SparkContext(conf)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
index b76add2f9bc99..b501f4db2efbb 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala
@@ -40,7 +40,7 @@ object SparseNaiveBayes {
       numFeatures: Int = -1,
       lambda: Double = 1.0) extends AbstractParams[Params]
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val defaultParams = Params()
 
     val parser = new OptionParser[Params]("SparseNaiveBayes") {
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala
index 7888af79f87f4..5186f599d9628 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingKMeansExample.scala
@@ -52,7 +52,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  */
 object StreamingKMeansExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 5) {
       System.err.println(
         "Usage: StreamingKMeansExample " +
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala
index a8b144a197229..4c72f444ff9ec 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingLogisticRegression.scala
@@ -46,7 +46,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  */
 object StreamingLogisticRegression {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     if (args.length != 4) {
       System.err.println(
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala
index ae4dee24c6474..f60b10a02274b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/StreamingTestExample.scala
@@ -44,7 +44,7 @@ import org.apache.spark.util.Utils
  */
 object StreamingTestExample {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 3) {
       // scalastyle:off println
       System.err.println(
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
index 071d341b81614..6b839f3f4ac1e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnyPCA.scala
@@ -35,7 +35,7 @@ import org.apache.spark.mllib.linalg.distributed.RowMatrix
  * represents a 3-by-2 matrix, whose first row is (0.5, 1.0).
  */
 object TallSkinnyPCA {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 1) {
       System.err.println("Usage: TallSkinnyPCA <input>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
index 8ae6de16d80e7..8874c2eda3d2e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/TallSkinnySVD.scala
@@ -35,7 +35,7 @@ import org.apache.spark.mllib.linalg.distributed.RowMatrix
  * represents a 3-by-2 matrix, whose first row is (0.5, 1.0).
  */
 object TallSkinnySVD {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 1) {
       System.err.println("Usage: TallSkinnySVD <input>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
index deaa9f252b9b0..4fd482d5b8bf7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.SparkSession
 case class Record(key: Int, value: String)
 
 object RDDRelation {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // $example on:init_session$
     val spark = SparkSession
       .builder
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
index c7b6a50f0ae7c..d4c05e5ad9944 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
@@ -24,7 +24,7 @@ object SQLDataSourceExample {
 
   case class Person(name: String, age: Long)
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val spark = SparkSession
       .builder()
       .appName("Spark SQL data sources example")
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
index 678cbc64aff1f..fde281087c267 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SparkSQLExample.scala
@@ -34,7 +34,7 @@ object SparkSQLExample {
   case class Person(name: String, age: Long)
   // $example off:create_ds$
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // $example on:init_session$
     val spark = SparkSession
       .builder()
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
index a832276602b88..3be8a3862f39c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/SparkHiveExample.scala
@@ -28,7 +28,7 @@ object SparkHiveExample {
   case class Record(key: Int, value: String)
   // $example off:spark_hive$
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // When working with Hive, one must instantiate `SparkSession` with Hive support, including
     // connectivity to a persistent Hive metastore, support for Hive serdes, and Hive user-defined
     // functions. Users who do not have an existing Hive deployment can still enable Hive support.
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala
index de477c5ce8161..6dbc70bd141f3 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.SparkSession
  *    localhost 9999`
  */
 object StructuredNetworkWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println("Usage: StructuredNetworkWordCount <hostname> <port>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala
index b4dad21dd75b0..4ba2c6bc68918 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala
@@ -48,7 +48,7 @@ import org.apache.spark.sql.functions._
  */
 object StructuredNetworkWordCountWindowed {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 3) {
       System.err.println("Usage: StructuredNetworkWordCountWindowed <hostname> <port>" +
         " <window duration in seconds> [<slide duration in seconds>]")
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
index fc3f8fa53c7ae..0f47deaf1021b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
@@ -37,7 +37,7 @@ import org.apache.spark.streaming.receiver.Receiver
  *    `$ bin/run-example org.apache.spark.examples.streaming.CustomReceiver localhost 9999`
  */
 object CustomReceiver {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println("Usage: CustomReceiver <hostname> <port>")
       System.exit(1)
@@ -64,20 +64,20 @@ object CustomReceiver {
 class CustomReceiver(host: String, port: Int)
   extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) {
 
-  def onStart() {
+  def onStart(): Unit = {
     // Start the thread that receives data over a connection
     new Thread("Socket Receiver") {
-      override def run() { receive() }
+      override def run(): Unit = { receive() }
     }.start()
   }
 
-  def onStop() {
+  def onStop(): Unit = {
    // There is nothing much to do as the thread calling receive()
    // is designed to stop by itself isStopped() returns false
   }
 
   /** Create a socket connection and receive data until receiver is stopped */
-  private def receive() {
+  private def receive(): Unit = {
    var socket: Socket = null
    var userInput: String = null
    try {
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala
index 3024b59480099..6fdb37194ea7d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala
@@ -37,7 +37,7 @@ import org.apache.spark.streaming.kafka010._
  *    consumer-group topic1,topic2
  */
 object DirectKafkaWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 3) {
       System.err.println(s"""
         |Usage: DirectKafkaWordCount <brokers> <groupId> <topics>
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKerberizedKafkaWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKerberizedKafkaWordCount.scala
index b68a59873a8fe..6a35ce9b2a293 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKerberizedKafkaWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKerberizedKafkaWordCount.scala
@@ -76,7 +76,7 @@ import org.apache.spark.streaming.kafka010._
  * using SASL_SSL in production.
  */
 object DirectKerberizedKafkaWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 3) {
       System.err.println(s"""
         |Usage: DirectKerberizedKafkaWordCount <brokers> <groupId> <topics>
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
index 1f282d437dc38..19dc7a3cce0ac 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/HdfsWordCount.scala
@@ -33,7 +33,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  * Then create a text file in `localdir` and the words in the file will get counted.
  */
 object HdfsWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 1) {
       System.err.println("Usage: HdfsWordCount <directory>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
index 15b57fccb4076..26bb51dde3a1d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/NetworkWordCount.scala
@@ -34,7 +34,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  *    `$ bin/run-example org.apache.spark.examples.streaming.NetworkWordCount localhost 9999`
  */
 object NetworkWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println("Usage: NetworkWordCount <hostname> <port>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
index 19bacd449787b..09eeaf9fa4496 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/QueueStream.scala
@@ -25,7 +25,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
 
 object QueueStream {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
 
     StreamingExamples.setStreamingLogLevels()
     val sparkConf = new SparkConf().setAppName("QueueStream")
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
index 437ccf0898d7c..a20abd6e9d12e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RawNetworkGrep.scala
@@ -37,7 +37,7 @@ import org.apache.spark.util.IntParam
  *   <batchMillise> is the Spark Streaming batch duration in milliseconds.
  */
 object RawNetworkGrep {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 4) {
       System.err.println("Usage: RawNetworkGrep <numStreams> <host> <port> <batchMillis>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
index f018f3a26d2e9..243c22e71275c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
@@ -139,7 +139,7 @@ object RecoverableNetworkWordCount {
     ssc
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 4) {
       System.err.println(s"Your arguments were ${args.mkString("[", ", ", "]")}")
       System.err.println(
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
index 787bbec73b28f..778be7baaeeac 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala
@@ -38,7 +38,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext, Time}
  */
 
 object SqlNetworkWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println("Usage: NetworkWordCount <hostname> <port>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
index 2811e67009fb0..46f01edf7deec 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StatefulNetworkWordCount.scala
@@ -35,7 +35,7 @@ import org.apache.spark.streaming._
  *      org.apache.spark.examples.streaming.StatefulNetworkWordCount localhost 9999`
  */
 object StatefulNetworkWordCount {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       System.err.println("Usage: StatefulNetworkWordCount <hostname> <port>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
index b00f32fb25243..073f9728c68af 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala
@@ -25,7 +25,7 @@ import org.apache.spark.internal.Logging
 object StreamingExamples extends Logging {
 
   /** Set reasonable logging levels for streaming if the user has not configured log4j. */
-  def setStreamingLogLevels() {
+  def setStreamingLogLevels(): Unit = {
     val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
     if (!log4jInitialized) {
       // We first log something to initialize Spark's default logging, then we override the
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
index 2108bc63edea2..7234f30e7d267 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
@@ -81,7 +81,7 @@ object PageViewGenerator {
     new PageView(page, status, zipCode, id).toString()
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 2) {
       System.err.println("Usage: PageViewGenerator <port> <viewsPerSecond>")
       System.exit(1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
index b8e7c7e9e9152..b51bfacabf4aa 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
@@ -35,7 +35,7 @@ import org.apache.spark.streaming.{Seconds, StreamingContext}
  */
 // scalastyle:on
 object PageViewStream {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 3) {
       System.err.println("Usage: PageViewStream <metric> <host> <port>")
       System.err.println("<metric> must be one of pageCounts, slidingPageCounts," +
diff --git a/external/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt b/external/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..94137a691e4aa
--- /dev/null
+++ b/external/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt
@@ -0,0 +1,122 @@
+================================================================================================
+SQL Single Numeric Column Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2995           3081         121          5.3         190.4       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2865           2881          23          5.5         182.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2919           2936          23          5.4         185.6       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3148           3262         161          5.0         200.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2651           2721          99          5.9         168.5       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2782           2854         103          5.7         176.9       1.0X
+
+
+================================================================================================
+Int and String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of columns                                     4531           4583          73          2.3         432.1       1.0X
+
+
+================================================================================================
+Partitioned Table Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column                                        3084           3105          30          5.1         196.1       1.0X
+Partition column                                   3143           3164          30          5.0         199.8       1.0X
+Both columns                                       3272           3339          94          4.8         208.1       0.9X
+
+
+================================================================================================
+Repeated String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3249           3318          98          3.2         309.8       1.0X
+
+
+================================================================================================
+String with Nulls Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               5308           5335          38          2.0         506.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               4405           4429          33          2.4         420.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3256           3309          75          3.2         310.5       1.0X
+
+
+================================================================================================
+Single Column Scan From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               5230           5290          85          0.2        4987.4       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                              10206          10329         174          0.1        9733.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                              15333          15365          46          0.1       14622.3       1.0X
+
+
diff --git a/external/avro/benchmarks/AvroReadBenchmark-results.txt b/external/avro/benchmarks/AvroReadBenchmark-results.txt
index 7900fea453b10..7b008a312c320 100644
--- a/external/avro/benchmarks/AvroReadBenchmark-results.txt
+++ b/external/avro/benchmarks/AvroReadBenchmark-results.txt
@@ -2,121 +2,121 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single TINYINT Column Scan:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           2774 / 2815          5.7         176.4       1.0X
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3067           3132          91          5.1         195.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single SMALLINT Column Scan:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           2761 / 2777          5.7         175.5       1.0X
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2927           2929           3          5.4         186.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single INT Column Scan:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           2783 / 2870          5.7         176.9       1.0X
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2928           2990          87          5.4         186.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single BIGINT Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           3256 / 3266          4.8         207.0       1.0X
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3374           3447         104          4.7         214.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single FLOAT Column Scan:            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           2841 / 2867          5.5         180.6       1.0X
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2896           2901           7          5.4         184.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single DOUBLE Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum                                           2981 / 2996          5.3         189.5       1.0X
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3004           3006           3          5.2         191.0       1.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Int and String Scan:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of columns                                4781 / 4783          2.2         456.0       1.0X
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of columns                                     4814           4830          22          2.2         459.1       1.0X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Partitioned Table:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Data column                                   3372 / 3386          4.7         214.4       1.0X
-Partition column                              3035 / 3064          5.2         193.0       1.1X
-Both columns                                  3445 / 3461          4.6         219.1       1.0X
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column                                        3361           3362           1          4.7         213.7       1.0X
+Partition column                                   2999           3013          20          5.2         190.7       1.1X
+Both columns                                       3613           3615           2          4.4         229.7       0.9X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Repeated String:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of string length                          3395 / 3401          3.1         323.8       1.0X
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3415           3416           1          3.1         325.7       1.0X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (0.0%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of string length                          5580 / 5624          1.9         532.2       1.0X
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               5535           5536           2          1.9         527.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (50.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of string length                          4622 / 4623          2.3         440.8       1.0X
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               4567           4575          11          2.3         435.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (95.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of string length                          3238 / 3241          3.2         308.8       1.0X
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3248           3268          29          3.2         309.7       1.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 100 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of single column                          5472 / 5484          0.2        5218.8       1.0X
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               5486           5497          15          0.2        5232.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 200 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of single column                        10680 / 10701          0.1       10185.1       1.0X
+Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                              10682          10746          90          0.1       10186.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 300 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Sum of single column                        16143 / 16238          0.1       15394.9       1.0X
+Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                              16177          16177           0          0.1       15427.7       1.0X
 
 
diff --git a/external/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt b/external/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..2cf1835013821
--- /dev/null
+++ b/external/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt
@@ -0,0 +1,10 @@
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           3026           3142         164          5.2         192.4       1.0X
+Output Single Double Column                        3157           3260         145          5.0         200.7       1.0X
+Output Int and String Column                       6123           6190          94          2.6         389.3       0.5X
+Output Partitions                                  5197           5733         758          3.0         330.4       0.6X
+Output Buckets                                     7074           7285         298          2.2         449.7       0.4X
+
diff --git a/external/avro/benchmarks/AvroWriteBenchmark-results.txt b/external/avro/benchmarks/AvroWriteBenchmark-results.txt
index fb2a77333eec5..20f6ae9099a4d 100644
--- a/external/avro/benchmarks/AvroWriteBenchmark-results.txt
+++ b/external/avro/benchmarks/AvroWriteBenchmark-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Avro writer benchmark:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Output Single Int Column                      3213 / 3373          4.9         204.3       1.0X
-Output Single Double Column                   3313 / 3345          4.7         210.7       1.0X
-Output Int and String Column                  7303 / 7316          2.2         464.3       0.4X
-Output Partitions                             5309 / 5691          3.0         337.5       0.6X
-Output Buckets                                7031 / 7557          2.2         447.0       0.5X
+Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           3080           3137          82          5.1         195.8       1.0X
+Output Single Double Column                        3595           3595           0          4.4         228.6       0.9X
+Output Int and String Column                       7491           7504          18          2.1         476.3       0.4X
+Output Partitions                                  5518           5663         205          2.9         350.8       0.6X
+Output Buckets                                     7467           7581         161          2.1         474.7       0.4X
 
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala
index 3171f1e08b4fc..c6f52d676422c 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala
@@ -17,9 +17,9 @@
 package org.apache.spark.sql.v2.avro
 
 import org.apache.spark.sql.avro.AvroFileFormat
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
index 243af7da47003..0397d15aed924 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
@@ -31,10 +31,10 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.avro.{AvroDeserializer, AvroOptions}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.execution.datasources.v2.{EmptyPartitionReader, FilePartitionReaderFactory, PartitionReaderWithPartitionValues}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
index 6ec351080a118..e1268ac2ce581 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
@@ -21,9 +21,9 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScan
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
index 815da2bd92d44..e36c71ef4b1f7 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
@@ -17,9 +17,9 @@
 package org.apache.spark.sql.v2.avro
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
-import org.apache.spark.sql.sources.v2.reader.Scan
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
index a781624aa61aa..765e5727d944a 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
+++ b/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
@@ -22,9 +22,9 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.avro.AvroUtils
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index cf88981b1efbd..dc60cfe41ca7a 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -1036,7 +1036,7 @@ abstract class AvroSuite extends QueryTest with SharedSparkSession {
       (TimestampType, LONG),
       (DecimalType(4, 2), BYTES)
     )
-    def assertException(f: () => AvroSerializer) {
+    def assertException(f: () => AvroSerializer): Unit = {
       val message = intercept[org.apache.spark.sql.avro.IncompatibleSchemaException] {
         f()
       }.getMessage
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
index f2f7d650066fb..a16126ae24246 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
+++ b/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
@@ -22,7 +22,6 @@ import scala.util.Random
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.types._
 
 /**
@@ -36,7 +35,7 @@ import org.apache.spark.sql.types._
  *      Results will be written to "benchmarks/AvroReadBenchmark-results.txt".
  * }}}
  */
-object AvroReadBenchmark extends SqlBasedBenchmark with SQLHelper {
+object AvroReadBenchmark extends SqlBasedBenchmark {
   def withTempTable(tableNames: String*)(f: => Unit): Unit = {
     try f finally tableNames.foreach(spark.catalog.dropTempView)
   }
diff --git a/external/docker/spark-test/base/Dockerfile b/external/docker/spark-test/base/Dockerfile
index c1fd630d0b665..5bec5d3f16548 100644
--- a/external/docker/spark-test/base/Dockerfile
+++ b/external/docker/spark-test/base/Dockerfile
@@ -25,7 +25,7 @@ RUN apt-get update && \
     apt-get install -y less openjdk-8-jre-headless iproute2 vim-tiny sudo openssh-server && \
     rm -rf /var/lib/apt/lists/*
 
-ENV SCALA_VERSION 2.12.8
+ENV SCALA_VERSION 2.12.10
 ENV CDH_VERSION cdh4
 ENV SCALA_HOME /opt/scala-$SCALA_VERSION
 ENV SPARK_HOME /opt/spark
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
index 0735f0a7b937f..693820da6af6b 100644
--- a/external/kafka-0-10-sql/pom.xml
+++ b/external/kafka-0-10-sql/pom.xml
@@ -46,6 +46,13 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-token-provider-kafka-0-10_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
index 868edb5dcdc0c..6dd5af2389a81 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
@@ -68,7 +68,7 @@ private object JsonUtils {
           partOffsets.map { case (part, offset) =>
               new TopicPartition(topic, part) -> offset
           }
-      }.toMap
+      }
     } catch {
       case NonFatal(x) =>
         throw new IllegalArgumentException(
@@ -76,12 +76,27 @@ private object JsonUtils {
     }
   }
 
+  def partitionTimestamps(str: String): Map[TopicPartition, Long] = {
+    try {
+      Serialization.read[Map[String, Map[Int, Long]]](str).flatMap { case (topic, partTimestamps) =>
+        partTimestamps.map { case (part, timestamp) =>
+          new TopicPartition(topic, part) -> timestamp
+        }
+      }
+    } catch {
+      case NonFatal(x) =>
+        throw new IllegalArgumentException(
+          s"""Expected e.g. {"topicA": {"0": 123456789, "1": 123456789},
+             |"topicB": {"0": 123456789, "1": 123456789}}, got $str""".stripMargin)
+    }
+  }
+
   /**
    * Write per-TopicPartition offsets as json string
    */
   def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = {
     val result = new HashMap[String, HashMap[Int, Long]]()
-    implicit val ordering = new Ordering[TopicPartition] {
+    implicit val order = new Ordering[TopicPartition] {
       override def compare(x: TopicPartition, y: TopicPartition): Int = {
         Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition))
       }
@@ -95,4 +110,9 @@ private object JsonUtils {
     }
     Serialization.write(result)
   }
+
+  def partitionTimestamps(topicTimestamps: Map[TopicPartition, Long]): String = {
+    // For now it's same as partitionOffsets
+    partitionOffsets(topicTimestamps)
+  }
 }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
index 700414167f3ef..3006770f306c0 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
@@ -23,8 +23,7 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, PartitionReaderFactory}
-
+import org.apache.spark.sql.connector.read.{Batch, InputPartition, PartitionReaderFactory}
 
 private[kafka010] class KafkaBatch(
     strategy: ConsumerStrategy,
@@ -32,7 +31,8 @@ private[kafka010] class KafkaBatch(
     specifiedKafkaParams: Map[String, String],
     failOnDataLoss: Boolean,
     startingOffsets: KafkaOffsetRangeLimit,
-    endingOffsets: KafkaOffsetRangeLimit)
+    endingOffsets: KafkaOffsetRangeLimit,
+    includeHeaders: Boolean)
   extends Batch with Logging {
   assert(startingOffsets != LatestOffsetRangeLimit,
     "Starting offset not allowed to be set to latest offsets.")
@@ -59,8 +59,8 @@ private[kafka010] class KafkaBatch(
     // Leverage the KafkaReader to obtain the relevant partition offsets
     val (fromPartitionOffsets, untilPartitionOffsets) = {
       try {
-        (kafkaOffsetReader.fetchPartitionOffsets(startingOffsets),
-          kafkaOffsetReader.fetchPartitionOffsets(endingOffsets))
+        (kafkaOffsetReader.fetchPartitionOffsets(startingOffsets, isStartingOffsets = true),
+          kafkaOffsetReader.fetchPartitionOffsets(endingOffsets, isStartingOffsets = false))
       } finally {
         kafkaOffsetReader.close()
       }
@@ -91,7 +91,7 @@ private[kafka010] class KafkaBatch(
       KafkaSourceProvider.kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId)
     offsetRanges.map { range =>
       new KafkaBatchInputPartition(
-        range, executorKafkaParams, pollTimeoutMs, failOnDataLoss)
+        range, executorKafkaParams, pollTimeoutMs, failOnDataLoss, includeHeaders)
     }.toArray
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
index 53b0b3c46854e..645b68b0c407a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
@@ -22,21 +22,21 @@ import java.{util => ju}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.sources.v2.reader._
-
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
 
 /** A [[InputPartition]] for reading Kafka data in a batch based streaming query. */
 private[kafka010] case class KafkaBatchInputPartition(
     offsetRange: KafkaOffsetRange,
     executorKafkaParams: ju.Map[String, Object],
     pollTimeoutMs: Long,
-    failOnDataLoss: Boolean) extends InputPartition
+    failOnDataLoss: Boolean,
+    includeHeaders: Boolean) extends InputPartition
 
 private[kafka010] object KafkaBatchReaderFactory extends PartitionReaderFactory {
   override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
     val p = partition.asInstanceOf[KafkaBatchInputPartition]
     KafkaBatchPartitionReader(p.offsetRange, p.executorKafkaParams, p.pollTimeoutMs,
-      p.failOnDataLoss)
+      p.failOnDataLoss, p.includeHeaders)
   }
 }
 
@@ -45,12 +45,14 @@ private case class KafkaBatchPartitionReader(
     offsetRange: KafkaOffsetRange,
     executorKafkaParams: ju.Map[String, Object],
     pollTimeoutMs: Long,
-    failOnDataLoss: Boolean) extends PartitionReader[InternalRow] with Logging {
+    failOnDataLoss: Boolean,
+    includeHeaders: Boolean) extends PartitionReader[InternalRow] with Logging {
 
   private val consumer = KafkaDataConsumer.acquire(offsetRange.topicPartition, executorKafkaParams)
 
   private val rangeToRead = resolveRange(offsetRange)
-  private val converter = new KafkaRecordToUnsafeRowConverter
+  private val unsafeRowProjector = new KafkaRecordToRowConverter()
+    .toUnsafeRowProjector(includeHeaders)
 
   private var nextOffset = rangeToRead.fromOffset
   private var nextRow: UnsafeRow = _
@@ -59,7 +61,7 @@ private case class KafkaBatchPartitionReader(
     if (nextOffset < rangeToRead.untilOffset) {
       val record = consumer.get(nextOffset, rangeToRead.untilOffset, pollTimeoutMs, failOnDataLoss)
       if (record != null) {
-        nextRow = converter.toUnsafeRow(record)
+        nextRow = unsafeRowProjector(record)
         nextOffset = record.offset + 1
         true
       } else {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala
index 47ec07ae128d2..8e29e38b2a644 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.kafka010
 import java.{util => ju}
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
 import org.apache.spark.sql.kafka010.KafkaWriter.validateQuery
-import org.apache.spark.sql.sources.v2.writer._
 import org.apache.spark.sql.types.StructType
 
 /**
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
index a9c1181a01c51..0603ae39ba622 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
@@ -27,9 +27,9 @@ import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.kafka010.KafkaSourceProvider.{INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE, INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE}
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming._
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousPartitionReaderFactory, ContinuousStream, Offset, PartitionOffset}
+import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
@@ -56,6 +56,7 @@ class KafkaContinuousStream(
 
   private[kafka010] val pollTimeoutMs =
     options.getLong(KafkaSourceProvider.CONSUMER_POLL_TIMEOUT, 512)
+  private val includeHeaders = options.getBoolean(INCLUDE_HEADERS, false)
 
   // Initialized when creating reader factories. If this diverges from the partitions at the latest
   // offsets, we need to reconfigure.
@@ -68,6 +69,8 @@ class KafkaContinuousStream(
       case EarliestOffsetRangeLimit => KafkaSourceOffset(offsetReader.fetchEarliestOffsets())
       case LatestOffsetRangeLimit => KafkaSourceOffset(offsetReader.fetchLatestOffsets(None))
       case SpecificOffsetRangeLimit(p) => offsetReader.fetchSpecificOffsets(p, reportDataLoss)
+      case SpecificTimestampRangeLimit(p) => offsetReader.fetchSpecificTimestampBasedOffsets(p,
+        failsOnNoMatchingOffset = true)
     }
     logInfo(s"Initial offsets: $offsets")
     offsets
@@ -88,7 +91,7 @@ class KafkaContinuousStream(
     if (deletedPartitions.nonEmpty) {
       val message = if (
         offsetReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
-        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+        s"$deletedPartitions are gone. ${CUSTOM_GROUP_ID_ERROR_MESSAGE}"
       } else {
         s"$deletedPartitions are gone. Some data may have been missed."
       }
@@ -102,7 +105,7 @@ class KafkaContinuousStream(
     startOffsets.toSeq.map {
       case (topicPartition, start) =>
         KafkaContinuousInputPartition(
-          topicPartition, start, kafkaParams, pollTimeoutMs, failOnDataLoss)
+          topicPartition, start, kafkaParams, pollTimeoutMs, failOnDataLoss, includeHeaders)
     }.toArray
   }
 
@@ -153,19 +156,22 @@ class KafkaContinuousStream(
  * @param pollTimeoutMs The timeout for Kafka consumer polling.
  * @param failOnDataLoss Flag indicating whether data reader should fail if some offsets
  *                       are skipped.
+ * @param includeHeaders Flag indicating whether to include Kafka records' headers.
  */
 case class KafkaContinuousInputPartition(
-    topicPartition: TopicPartition,
-    startOffset: Long,
-    kafkaParams: ju.Map[String, Object],
-    pollTimeoutMs: Long,
-    failOnDataLoss: Boolean) extends InputPartition
+  topicPartition: TopicPartition,
+  startOffset: Long,
+  kafkaParams: ju.Map[String, Object],
+  pollTimeoutMs: Long,
+  failOnDataLoss: Boolean,
+  includeHeaders: Boolean) extends InputPartition
 
 object KafkaContinuousReaderFactory extends ContinuousPartitionReaderFactory {
   override def createReader(partition: InputPartition): ContinuousPartitionReader[InternalRow] = {
     val p = partition.asInstanceOf[KafkaContinuousInputPartition]
     new KafkaContinuousPartitionReader(
-      p.topicPartition, p.startOffset, p.kafkaParams, p.pollTimeoutMs, p.failOnDataLoss)
+      p.topicPartition, p.startOffset, p.kafkaParams, p.pollTimeoutMs,
+      p.failOnDataLoss, p.includeHeaders)
   }
 }
 
@@ -184,9 +190,11 @@ class KafkaContinuousPartitionReader(
     startOffset: Long,
     kafkaParams: ju.Map[String, Object],
     pollTimeoutMs: Long,
-    failOnDataLoss: Boolean) extends ContinuousPartitionReader[InternalRow] {
+    failOnDataLoss: Boolean,
+    includeHeaders: Boolean) extends ContinuousPartitionReader[InternalRow] {
   private val consumer = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-  private val converter = new KafkaRecordToUnsafeRowConverter
+  private val unsafeRowProjector = new KafkaRecordToRowConverter()
+    .toUnsafeRowProjector(includeHeaders)
 
   private var nextKafkaOffset = startOffset
   private var currentRecord: ConsumerRecord[Array[Byte], Array[Byte]] = _
@@ -225,7 +233,7 @@ class KafkaContinuousPartitionReader(
   }
 
   override def get(): UnsafeRow = {
-    converter.toUnsafeRow(currentRecord)
+    unsafeRowProjector(currentRecord)
   }
 
   override def getOffset(): KafkaSourcePartitionOffset = {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
index 87036beb9a252..ca82c908f441b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataConsumer.scala
@@ -23,12 +23,13 @@ import java.util.concurrent.TimeoutException
 
 import scala.collection.JavaConverters._
 
+import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord, KafkaConsumer, OffsetOutOfRangeException}
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.internal.Logging
-import org.apache.spark.kafka010.KafkaConfigUpdater
+import org.apache.spark.kafka010.{KafkaConfigUpdater, KafkaTokenClusterConf, KafkaTokenUtil}
 import org.apache.spark.sql.kafka010.KafkaDataConsumer.{AvailableOffsetRange, UNKNOWN_OFFSET}
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.util.{ShutdownHookManager, UninterruptibleThread}
@@ -46,6 +47,13 @@ private[kafka010] class InternalKafkaConsumer(
 
   val groupId = kafkaParams.get(ConsumerConfig.GROUP_ID_CONFIG).asInstanceOf[String]
 
+  private[kafka010] val clusterConfig = KafkaTokenUtil.findMatchingTokenClusterConfig(
+    SparkEnv.get.conf, kafkaParams.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG)
+      .asInstanceOf[String])
+
+  // Kafka consumer is not able to give back the params instantiated with so we need to store it.
+  // It must be updated whenever a new consumer is created.
+  private[kafka010] var kafkaParamsWithSecurity: ju.Map[String, Object] = _
   private val consumer = createConsumer()
 
   /**
@@ -106,10 +114,10 @@ private[kafka010] class InternalKafkaConsumer(
 
   /** Create a KafkaConsumer to fetch records for `topicPartition` */
   private def createConsumer(): KafkaConsumer[Array[Byte], Array[Byte]] = {
-    val updatedKafkaParams = KafkaConfigUpdater("executor", kafkaParams.asScala.toMap)
-      .setAuthenticationConfigIfNeeded()
+    kafkaParamsWithSecurity = KafkaConfigUpdater("executor", kafkaParams.asScala.toMap)
+      .setAuthenticationConfigIfNeeded(clusterConfig)
       .build()
-    val c = new KafkaConsumer[Array[Byte], Array[Byte]](updatedKafkaParams)
+    val c = new KafkaConsumer[Array[Byte], Array[Byte]](kafkaParamsWithSecurity)
     val tps = new ju.ArrayList[TopicPartition]()
     tps.add(topicPartition)
     c.assign(tps)
@@ -516,13 +524,25 @@ private[kafka010] class KafkaDataConsumer(
     fetchedData.withNewPoll(records.listIterator, offsetAfterPoll)
   }
 
-  private def getOrRetrieveConsumer(): InternalKafkaConsumer = _consumer match {
-    case None =>
-      _consumer = Option(consumerPool.borrowObject(cacheKey, kafkaParams))
-      require(_consumer.isDefined, "borrowing consumer from pool must always succeed.")
-      _consumer.get
+  private[kafka010] def getOrRetrieveConsumer(): InternalKafkaConsumer = {
+    if (!_consumer.isDefined) {
+      retrieveConsumer()
+    }
+    require(_consumer.isDefined, "Consumer must be defined")
+    if (!KafkaTokenUtil.isConnectorUsingCurrentToken(_consumer.get.kafkaParamsWithSecurity,
+        _consumer.get.clusterConfig)) {
+      logDebug("Cached consumer uses an old delegation token, invalidating.")
+      releaseConsumer()
+      consumerPool.invalidateKey(cacheKey)
+      fetchedDataPool.invalidate(cacheKey)
+      retrieveConsumer()
+    }
+    _consumer.get
+  }
 
-    case Some(consumer) => consumer
+  private def retrieveConsumer(): Unit = {
+    _consumer = Option(consumerPool.borrowObject(cacheKey, kafkaParams))
+    require(_consumer.isDefined, "borrowing consumer from pool must always succeed.")
   }
 
   private def getOrRetrieveFetchedData(offset: Long): FetchedData = _fetchedData match {
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala
index 884773452b2a5..3f8d3d2da5797 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala
@@ -21,7 +21,7 @@ import java.{util => ju}
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage}
 
 /**
  * Dummy commit message. The DataSourceV2 framework requires a commit message implementation but we
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 9cd16c8e16249..01f6ba4445162 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -26,10 +26,10 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.sql.execution.streaming.sources.RateControlMicroBatchStream
-import org.apache.spark.sql.kafka010.KafkaSourceProvider.{INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE, INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE}
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
+import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.UninterruptibleThread
 
@@ -64,6 +64,8 @@ private[kafka010] class KafkaMicroBatchStream(
   private[kafka010] val maxOffsetsPerTrigger = Option(options.get(
     KafkaSourceProvider.MAX_OFFSET_PER_TRIGGER)).map(_.toLong)
 
+  private val includeHeaders = options.getBoolean(INCLUDE_HEADERS, false)
+
   private val rangeCalculator = KafkaOffsetRangeCalculator(options)
 
   private var endPartitionOffsets: KafkaSourceOffset = _
@@ -112,7 +114,7 @@ private[kafka010] class KafkaMicroBatchStream(
     if (deletedPartitions.nonEmpty) {
       val message =
         if (kafkaOffsetReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
-          s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+          s"$deletedPartitions are gone. ${CUSTOM_GROUP_ID_ERROR_MESSAGE}"
         } else {
           s"$deletedPartitions are gone. Some data may have been missed."
         }
@@ -146,7 +148,8 @@ private[kafka010] class KafkaMicroBatchStream(
 
     // Generate factories based on the offset ranges
     offsetRanges.map { range =>
-      KafkaBatchInputPartition(range, executorKafkaParams, pollTimeoutMs, failOnDataLoss)
+      KafkaBatchInputPartition(range, executorKafkaParams, pollTimeoutMs,
+        failOnDataLoss, includeHeaders)
     }.toArray
   }
 
@@ -189,6 +192,8 @@ private[kafka010] class KafkaMicroBatchStream(
           KafkaSourceOffset(kafkaOffsetReader.fetchLatestOffsets(None))
         case SpecificOffsetRangeLimit(p) =>
           kafkaOffsetReader.fetchSpecificOffsets(p, reportDataLoss)
+        case SpecificTimestampRangeLimit(p) =>
+          kafkaOffsetReader.fetchSpecificTimestampBasedOffsets(p, failsOnNoMatchingOffset = true)
       }
       metadataLog.add(0, offsets)
       logInfo(s"Initial offsets: $offsets")
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
index 80a026f4f5d73..d64b5d4f7e9e8 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
@@ -42,6 +42,13 @@ private[kafka010] case object LatestOffsetRangeLimit extends KafkaOffsetRangeLim
 private[kafka010] case class SpecificOffsetRangeLimit(
     partitionOffsets: Map[TopicPartition, Long]) extends KafkaOffsetRangeLimit
 
+/**
+ * Represents the desire to bind to earliest offset which timestamp for the offset is equal or
+ * greater than specific timestamp.
+ */
+private[kafka010] case class SpecificTimestampRangeLimit(
+    topicTimestamps: Map[TopicPartition, Long]) extends KafkaOffsetRangeLimit
+
 private[kafka010] object KafkaOffsetRangeLimit {
   /**
    * Used to denote offset range limits that are resolved via Kafka
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
index f3effd5300a79..0179f4dd822f1 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
@@ -26,12 +26,11 @@ import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 import scala.util.control.NonFatal
 
-import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, KafkaConsumer}
+import org.apache.kafka.clients.consumer.{Consumer, ConsumerConfig, KafkaConsumer, OffsetAndTimestamp}
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.types._
 import org.apache.spark.util.{ThreadUtils, UninterruptibleThread}
 
 /**
@@ -127,12 +126,14 @@ private[kafka010] class KafkaOffsetReader(
    * Fetch the partition offsets for the topic partitions that are indicated
    * in the [[ConsumerStrategy]] and [[KafkaOffsetRangeLimit]].
    */
-  def fetchPartitionOffsets(offsetRangeLimit: KafkaOffsetRangeLimit): Map[TopicPartition, Long] = {
+  def fetchPartitionOffsets(
+      offsetRangeLimit: KafkaOffsetRangeLimit,
+      isStartingOffsets: Boolean): Map[TopicPartition, Long] = {
     def validateTopicPartitions(partitions: Set[TopicPartition],
       partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
       assert(partitions == partitionOffsets.keySet,
         "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
-          "Use -1 for latest, -2 for earliest, if you don't care.\n" +
+          "Use -1 for latest, -2 for earliest.\n" +
           s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}")
       logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
       partitionOffsets
@@ -148,6 +149,9 @@ private[kafka010] class KafkaOffsetReader(
       }.toMap
       case SpecificOffsetRangeLimit(partitionOffsets) =>
         validateTopicPartitions(partitions, partitionOffsets)
+      case SpecificTimestampRangeLimit(partitionTimestamps) =>
+        fetchSpecificTimestampBasedOffsets(partitionTimestamps,
+          failsOnNoMatchingOffset = isStartingOffsets).partitionToOffsets
     }
   }
 
@@ -162,23 +166,83 @@ private[kafka010] class KafkaOffsetReader(
   def fetchSpecificOffsets(
       partitionOffsets: Map[TopicPartition, Long],
       reportDataLoss: String => Unit): KafkaSourceOffset = {
-    val fetched = runUninterruptibly {
-      withRetriesWithoutInterrupt {
-        // Poll to get the latest assigned partitions
-        consumer.poll(0)
-        val partitions = consumer.assignment()
+    val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions =>
+      assert(partitions.asScala == partitionOffsets.keySet,
+        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
+          "Use -1 for latest, -2 for earliest, if you don't care.\n" +
+          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}")
+      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
+    }
 
-        // Call `position` to wait until the potential offset request triggered by `poll(0)` is
-        // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by
-        // `poll(0)` may reset offsets that should have been set by another request.
-        partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {})
+    val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ =>
+      partitionOffsets
+    }
 
-        consumer.pause(partitions)
-        assert(partitions.asScala == partitionOffsets.keySet,
-          "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
-            "Use -1 for latest, -2 for earliest, if you don't care.\n" +
-            s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions.asScala}")
-        logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
+    val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { fetched =>
+      partitionOffsets.foreach {
+        case (tp, off) if off != KafkaOffsetRangeLimit.LATEST &&
+          off != KafkaOffsetRangeLimit.EARLIEST =>
+          if (fetched(tp) != off) {
+            reportDataLoss(
+              s"startingOffsets for $tp was $off but consumer reset to ${fetched(tp)}")
+          }
+        case _ =>
+        // no real way to check that beginning or end is reasonable
+      }
+    }
+
+    fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets,
+      fnAssertFetchedOffsets)
+  }
+
+  def fetchSpecificTimestampBasedOffsets(
+      partitionTimestamps: Map[TopicPartition, Long],
+      failsOnNoMatchingOffset: Boolean): KafkaSourceOffset = {
+    val fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit = { partitions =>
+      assert(partitions.asScala == partitionTimestamps.keySet,
+        "If starting/endingOffsetsByTimestamp contains specific offsets, you must specify all " +
+          s"topics. Specified: ${partitionTimestamps.keySet} Assigned: ${partitions.asScala}")
+      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionTimestamps")
+    }
+
+    val fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long] = { _ => {
+        val converted = partitionTimestamps.map { case (tp, timestamp) =>
+          tp -> java.lang.Long.valueOf(timestamp)
+        }.asJava
+
+        val offsetForTime: ju.Map[TopicPartition, OffsetAndTimestamp] =
+          consumer.offsetsForTimes(converted)
+
+        offsetForTime.asScala.map { case (tp, offsetAndTimestamp) =>
+          if (failsOnNoMatchingOffset) {
+            assert(offsetAndTimestamp != null, "No offset matched from request of " +
+              s"topic-partition $tp and timestamp ${partitionTimestamps(tp)}.")
+          }
+
+          if (offsetAndTimestamp == null) {
+            tp -> KafkaOffsetRangeLimit.LATEST
+          } else {
+            tp -> offsetAndTimestamp.offset()
+          }
+        }.toMap
+      }
+    }
+
+    val fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit = { _ => }
+
+    fetchSpecificOffsets0(fnAssertParametersWithPartitions, fnRetrievePartitionOffsets,
+      fnAssertFetchedOffsets)
+  }
+
+  private def fetchSpecificOffsets0(
+      fnAssertParametersWithPartitions: ju.Set[TopicPartition] => Unit,
+      fnRetrievePartitionOffsets: ju.Set[TopicPartition] => Map[TopicPartition, Long],
+      fnAssertFetchedOffsets: Map[TopicPartition, Long] => Unit): KafkaSourceOffset = {
+    val fetched = partitionsAssignedToConsumer {
+      partitions => {
+        fnAssertParametersWithPartitions(partitions)
+
+        val partitionOffsets = fnRetrievePartitionOffsets(partitions)
 
         partitionOffsets.foreach {
           case (tp, KafkaOffsetRangeLimit.LATEST) =>
@@ -187,22 +251,15 @@ private[kafka010] class KafkaOffsetReader(
             consumer.seekToBeginning(ju.Arrays.asList(tp))
           case (tp, off) => consumer.seek(tp, off)
         }
+
         partitionOffsets.map {
           case (tp, _) => tp -> consumer.position(tp)
         }
       }
     }
 
-    partitionOffsets.foreach {
-      case (tp, off) if off != KafkaOffsetRangeLimit.LATEST &&
-        off != KafkaOffsetRangeLimit.EARLIEST =>
-        if (fetched(tp) != off) {
-          reportDataLoss(
-            s"startingOffsets for $tp was $off but consumer reset to ${fetched(tp)}")
-        }
-      case _ =>
-        // no real way to check that beginning or end is reasonable
-    }
+    fnAssertFetchedOffsets(fetched)
+
     KafkaSourceOffset(fetched)
   }
 
@@ -210,20 +267,15 @@ private[kafka010] class KafkaOffsetReader(
    * Fetch the earliest offsets for the topic partitions that are indicated
    * in the [[ConsumerStrategy]].
    */
-  def fetchEarliestOffsets(): Map[TopicPartition, Long] = runUninterruptibly {
-    withRetriesWithoutInterrupt {
-      // Poll to get the latest assigned partitions
-      consumer.poll(0)
-      val partitions = consumer.assignment()
-      consumer.pause(partitions)
-      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to the beginning")
+  def fetchEarliestOffsets(): Map[TopicPartition, Long] = partitionsAssignedToConsumer(
+    partitions => {
+      logDebug("Seeking to the beginning")
 
       consumer.seekToBeginning(partitions)
       val partitionOffsets = partitions.asScala.map(p => p -> consumer.position(p)).toMap
       logDebug(s"Got earliest offsets for partition : $partitionOffsets")
       partitionOffsets
-    }
-  }
+    }, fetchingEarliestOffset = true)
 
   /**
    * Fetch the latest offsets for the topic partitions that are indicated
@@ -240,19 +292,9 @@ private[kafka010] class KafkaOffsetReader(
    * distinguish this with KAFKA-7703, so we just return whatever we get from Kafka after retrying.
    */
   def fetchLatestOffsets(
-      knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap = runUninterruptibly {
-    withRetriesWithoutInterrupt {
-      // Poll to get the latest assigned partitions
-      consumer.poll(0)
-      val partitions = consumer.assignment()
-
-      // Call `position` to wait until the potential offset request triggered by `poll(0)` is
-      // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by
-      // `poll(0)` may reset offsets that should have been set by another request.
-      partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {})
-
-      consumer.pause(partitions)
-      logDebug(s"Partitions assigned to consumer: $partitions. Seeking to the end.")
+      knownOffsets: Option[PartitionOffsetMap]): PartitionOffsetMap =
+    partitionsAssignedToConsumer { partitions => {
+      logDebug("Seeking to the end.")
 
       if (knownOffsets.isEmpty) {
         consumer.seekToEnd(partitions)
@@ -316,25 +358,40 @@ private[kafka010] class KafkaOffsetReader(
     if (newPartitions.isEmpty) {
       Map.empty[TopicPartition, Long]
     } else {
-      runUninterruptibly {
-        withRetriesWithoutInterrupt {
-          // Poll to get the latest assigned partitions
-          consumer.poll(0)
-          val partitions = consumer.assignment()
-          consumer.pause(partitions)
-          logDebug(s"\tPartitions assigned to consumer: $partitions")
-
-          // Get the earliest offset of each partition
-          consumer.seekToBeginning(partitions)
-          val partitionOffsets = newPartitions.filter { p =>
-            // When deleting topics happen at the same time, some partitions may not be in
-            // `partitions`. So we need to ignore them
-            partitions.contains(p)
-          }.map(p => p -> consumer.position(p)).toMap
-          logDebug(s"Got earliest offsets for new partitions: $partitionOffsets")
-          partitionOffsets
-        }
+      partitionsAssignedToConsumer(partitions => {
+        // Get the earliest offset of each partition
+        consumer.seekToBeginning(partitions)
+        val partitionOffsets = newPartitions.filter { p =>
+          // When deleting topics happen at the same time, some partitions may not be in
+          // `partitions`. So we need to ignore them
+          partitions.contains(p)
+        }.map(p => p -> consumer.position(p)).toMap
+        logDebug(s"Got earliest offsets for new partitions: $partitionOffsets")
+        partitionOffsets
+      }, fetchingEarliestOffset = true)
+    }
+  }
+
+  private def partitionsAssignedToConsumer(
+      body: ju.Set[TopicPartition] => Map[TopicPartition, Long],
+      fetchingEarliestOffset: Boolean = false)
+    : Map[TopicPartition, Long] = runUninterruptibly {
+
+    withRetriesWithoutInterrupt {
+      // Poll to get the latest assigned partitions
+      consumer.poll(0)
+      val partitions = consumer.assignment()
+
+      if (!fetchingEarliestOffset) {
+        // Call `position` to wait until the potential offset request triggered by `poll(0)` is
+        // done. This is a workaround for KAFKA-7703, which an async `seekToBeginning` triggered by
+        // `poll(0)` may reset offsets that should have been set by another request.
+        partitions.asScala.map(p => p -> consumer.position(p)).foreach(_ => {})
       }
+
+      consumer.pause(partitions)
+      logDebug(s"Partitions assigned to consumer: $partitions.")
+      body(partitions)
     }
   }
 
@@ -421,16 +478,3 @@ private[kafka010] class KafkaOffsetReader(
     _consumer = null  // will automatically get reinitialized again
   }
 }
-
-private[kafka010] object KafkaOffsetReader {
-
-  def kafkaSchema: StructType = StructType(Seq(
-    StructField("key", BinaryType),
-    StructField("value", BinaryType),
-    StructField("topic", StringType),
-    StructField("partition", IntegerType),
-    StructField("offset", LongType),
-    StructField("timestamp", TimestampType),
-    StructField("timestampType", IntegerType)
-  ))
-}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala
new file mode 100644
index 0000000000000..aed099c142bc3
--- /dev/null
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.sql.Timestamp
+
+import scala.collection.JavaConverters._
+
+import org.apache.kafka.clients.consumer.ConsumerRecord
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+/** A simple class for converting Kafka ConsumerRecord to InternalRow/UnsafeRow */
+private[kafka010] class KafkaRecordToRowConverter {
+  import KafkaRecordToRowConverter._
+
+  private val toUnsafeRowWithoutHeaders = UnsafeProjection.create(schemaWithoutHeaders)
+  private val toUnsafeRowWithHeaders = UnsafeProjection.create(schemaWithHeaders)
+
+  val toInternalRowWithoutHeaders: Record => InternalRow =
+    (cr: Record) => InternalRow(
+      cr.key, cr.value, UTF8String.fromString(cr.topic), cr.partition, cr.offset,
+      DateTimeUtils.fromJavaTimestamp(new Timestamp(cr.timestamp)), cr.timestampType.id
+    )
+
+  val toInternalRowWithHeaders: Record => InternalRow =
+    (cr: Record) => InternalRow(
+      cr.key, cr.value, UTF8String.fromString(cr.topic), cr.partition, cr.offset,
+      DateTimeUtils.fromJavaTimestamp(new Timestamp(cr.timestamp)), cr.timestampType.id,
+      if (cr.headers.iterator().hasNext) {
+        new GenericArrayData(cr.headers.iterator().asScala
+          .map(header =>
+            InternalRow(UTF8String.fromString(header.key()), header.value())
+          ).toArray)
+      } else {
+        null
+      }
+    )
+
+  def toUnsafeRowWithoutHeadersProjector: Record => UnsafeRow =
+    (cr: Record) => toUnsafeRowWithoutHeaders(toInternalRowWithoutHeaders(cr))
+
+  def toUnsafeRowWithHeadersProjector: Record => UnsafeRow =
+    (cr: Record) => toUnsafeRowWithHeaders(toInternalRowWithHeaders(cr))
+
+  def toUnsafeRowProjector(includeHeaders: Boolean): Record => UnsafeRow = {
+    if (includeHeaders) toUnsafeRowWithHeadersProjector else toUnsafeRowWithoutHeadersProjector
+  }
+}
+
+private[kafka010] object KafkaRecordToRowConverter {
+  type Record = ConsumerRecord[Array[Byte], Array[Byte]]
+
+  val headersType = ArrayType(StructType(Array(
+    StructField("key", StringType),
+    StructField("value", BinaryType))))
+
+  private val schemaWithoutHeaders = new StructType(Array(
+    StructField("key", BinaryType),
+    StructField("value", BinaryType),
+    StructField("topic", StringType),
+    StructField("partition", IntegerType),
+    StructField("offset", LongType),
+    StructField("timestamp", TimestampType),
+    StructField("timestampType", IntegerType)
+  ))
+
+  private val schemaWithHeaders =
+    new StructType(schemaWithoutHeaders.fields :+ StructField("headers", headersType))
+
+  def kafkaSchema(includeHeaders: Boolean): StructType = {
+    if (includeHeaders) schemaWithHeaders else schemaWithoutHeaders
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToUnsafeRowConverter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToUnsafeRowConverter.scala
deleted file mode 100644
index 306ef10b775a9..0000000000000
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToUnsafeRowConverter.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.kafka010
-
-import org.apache.kafka.clients.consumer.ConsumerRecord
-
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
-import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.unsafe.types.UTF8String
-
-/** A simple class for converting Kafka ConsumerRecord to UnsafeRow */
-private[kafka010] class KafkaRecordToUnsafeRowConverter {
-  private val rowWriter = new UnsafeRowWriter(7)
-
-  def toUnsafeRow(record: ConsumerRecord[Array[Byte], Array[Byte]]): UnsafeRow = {
-    rowWriter.reset()
-    rowWriter.zeroOutNullBytes()
-
-    if (record.key == null) {
-      rowWriter.setNullAt(0)
-    } else {
-      rowWriter.write(0, record.key)
-    }
-    if (record.value == null) {
-      rowWriter.setNullAt(1)
-    } else {
-      rowWriter.write(1, record.value)
-    }
-    rowWriter.write(2, UTF8String.fromString(record.topic))
-    rowWriter.write(3, record.partition)
-    rowWriter.write(4, record.offset)
-    rowWriter.write(
-      5,
-      DateTimeUtils.fromJavaTimestamp(new java.sql.Timestamp(record.timestamp)))
-    rowWriter.write(6, record.timestampType.id)
-    rowWriter.getRow()
-  }
-}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index dc7087821b10c..61479c992039b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -24,10 +24,9 @@ import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.unsafe.types.UTF8String
 
 
 private[kafka010] class KafkaRelation(
@@ -36,6 +35,7 @@ private[kafka010] class KafkaRelation(
     sourceOptions: CaseInsensitiveMap[String],
     specifiedKafkaParams: Map[String, String],
     failOnDataLoss: Boolean,
+    includeHeaders: Boolean,
     startingOffsets: KafkaOffsetRangeLimit,
     endingOffsets: KafkaOffsetRangeLimit)
   extends BaseRelation with TableScan with Logging {
@@ -49,7 +49,9 @@ private[kafka010] class KafkaRelation(
     (sqlContext.sparkContext.conf.get(NETWORK_TIMEOUT) * 1000L).toString
   ).toLong
 
-  override def schema: StructType = KafkaOffsetReader.kafkaSchema
+  private val converter = new KafkaRecordToRowConverter()
+
+  override def schema: StructType = KafkaRecordToRowConverter.kafkaSchema(includeHeaders)
 
   override def buildScan(): RDD[Row] = {
     // Each running query should use its own group id. Otherwise, the query may be only assigned
@@ -66,8 +68,8 @@ private[kafka010] class KafkaRelation(
     // Leverage the KafkaReader to obtain the relevant partition offsets
     val (fromPartitionOffsets, untilPartitionOffsets) = {
       try {
-        (kafkaOffsetReader.fetchPartitionOffsets(startingOffsets),
-          kafkaOffsetReader.fetchPartitionOffsets(endingOffsets))
+        (kafkaOffsetReader.fetchPartitionOffsets(startingOffsets, isStartingOffsets = true),
+          kafkaOffsetReader.fetchPartitionOffsets(endingOffsets, isStartingOffsets = false))
       } finally {
         kafkaOffsetReader.close()
       }
@@ -100,18 +102,14 @@ private[kafka010] class KafkaRelation(
     // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
     val executorKafkaParams =
       KafkaSourceProvider.kafkaParamsForExecutors(specifiedKafkaParams, uniqueGroupId)
+    val toInternalRow = if (includeHeaders) {
+      converter.toInternalRowWithHeaders
+    } else {
+      converter.toInternalRowWithoutHeaders
+    }
     val rdd = new KafkaSourceRDD(
       sqlContext.sparkContext, executorKafkaParams, offsetRanges,
-      pollTimeoutMs, failOnDataLoss).map { cr =>
-      InternalRow(
-        cr.key,
-        cr.value,
-        UTF8String.fromString(cr.topic),
-        cr.partition,
-        cr.offset,
-        DateTimeUtils.fromJavaTimestamp(new java.sql.Timestamp(cr.timestamp)),
-        cr.timestampType.id)
-    }
+      pollTimeoutMs, failOnDataLoss).map(toInternalRow)
     sqlContext.internalCreateDataFrame(rdd.setName("kafka"), schema).rdd
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index d1a35ec53bc94..e1392b6215d3a 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -31,12 +31,11 @@ import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.kafka010.KafkaSource._
-import org.apache.spark.sql.kafka010.KafkaSourceProvider.{INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_FALSE, INSTRUCTION_FOR_FAIL_ON_DATA_LOSS_TRUE}
+import org.apache.spark.sql.kafka010.KafkaSourceProvider._
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A [[Source]] that reads data from Kafka using the following design.
@@ -84,13 +83,15 @@ private[kafka010] class KafkaSource(
 
   private val sc = sqlContext.sparkContext
 
-  private val pollTimeoutMs = sourceOptions.getOrElse(
-    KafkaSourceProvider.CONSUMER_POLL_TIMEOUT,
-    (sc.conf.get(NETWORK_TIMEOUT) * 1000L).toString
-  ).toLong
+  private val pollTimeoutMs =
+    sourceOptions.getOrElse(CONSUMER_POLL_TIMEOUT, (sc.conf.get(NETWORK_TIMEOUT) * 1000L).toString)
+      .toLong
 
   private val maxOffsetsPerTrigger =
-    sourceOptions.get(KafkaSourceProvider.MAX_OFFSET_PER_TRIGGER).map(_.toLong)
+    sourceOptions.get(MAX_OFFSET_PER_TRIGGER).map(_.toLong)
+
+  private val includeHeaders =
+    sourceOptions.getOrElse(INCLUDE_HEADERS, "false").toBoolean
 
   /**
    * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
@@ -104,6 +105,8 @@ private[kafka010] class KafkaSource(
         case EarliestOffsetRangeLimit => KafkaSourceOffset(kafkaReader.fetchEarliestOffsets())
         case LatestOffsetRangeLimit => KafkaSourceOffset(kafkaReader.fetchLatestOffsets(None))
         case SpecificOffsetRangeLimit(p) => kafkaReader.fetchSpecificOffsets(p, reportDataLoss)
+        case SpecificTimestampRangeLimit(p) =>
+          kafkaReader.fetchSpecificTimestampBasedOffsets(p, failsOnNoMatchingOffset = true)
       }
       metadataLog.add(0, offsets)
       logInfo(s"Initial offsets: $offsets")
@@ -113,7 +116,9 @@ private[kafka010] class KafkaSource(
 
   private var currentPartitionOffsets: Option[Map[TopicPartition, Long]] = None
 
-  override def schema: StructType = KafkaOffsetReader.kafkaSchema
+  private val converter = new KafkaRecordToRowConverter()
+
+  override def schema: StructType = KafkaRecordToRowConverter.kafkaSchema(includeHeaders)
 
   /** Returns the maximum available offset for this source. */
   override def getOffset: Option[Offset] = {
@@ -223,7 +228,7 @@ private[kafka010] class KafkaSource(
     val deletedPartitions = fromPartitionOffsets.keySet.diff(untilPartitionOffsets.keySet)
     if (deletedPartitions.nonEmpty) {
       val message = if (kafkaReader.driverKafkaParams.containsKey(ConsumerConfig.GROUP_ID_CONFIG)) {
-        s"$deletedPartitions are gone. ${KafkaSourceProvider.CUSTOM_GROUP_ID_ERROR_MESSAGE}"
+        s"$deletedPartitions are gone. ${CUSTOM_GROUP_ID_ERROR_MESSAGE}"
       } else {
         s"$deletedPartitions are gone. Some data may have been missed."
       }
@@ -267,16 +272,14 @@ private[kafka010] class KafkaSource(
     }.toArray
 
     // Create an RDD that reads from Kafka and get the (key, value) pair as byte arrays.
-    val rdd = new KafkaSourceRDD(
-      sc, executorKafkaParams, offsetRanges, pollTimeoutMs, failOnDataLoss).map { cr =>
-      InternalRow(
-        cr.key,
-        cr.value,
-        UTF8String.fromString(cr.topic),
-        cr.partition,
-        cr.offset,
-        DateTimeUtils.fromJavaTimestamp(new java.sql.Timestamp(cr.timestamp)),
-        cr.timestampType.id)
+    val rdd = if (includeHeaders) {
+      new KafkaSourceRDD(
+        sc, executorKafkaParams, offsetRanges, pollTimeoutMs, failOnDataLoss)
+        .map(converter.toInternalRowWithHeaders)
+    } else {
+      new KafkaSourceRDD(
+        sc, executorKafkaParams, offsetRanges, pollTimeoutMs, failOnDataLoss)
+        .map(converter.toInternalRowWithoutHeaders)
     }
 
     logInfo("GetBatch generating RDD of offset range: " +
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
index 90d70439c5329..b9674a30aee39 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.kafka010
 
 import org.apache.kafka.common.TopicPartition
 
+import org.apache.spark.sql.connector.read.streaming.PartitionOffset
 import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset}
-import org.apache.spark.sql.sources.v2.reader.streaming.PartitionOffset
 
 /**
  * An [[Offset]] for the [[KafkaSource]]. This one tracks all partitions of subscribed topics and
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index c3f0be4be96e2..c15f08d78741d 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -30,14 +30,13 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.kafka010.KafkaConfigUpdater
 import org.apache.spark.sql.{AnalysisException, DataFrame, SaveMode, SQLContext}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.read.{Batch, Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
+import org.apache.spark.sql.connector.write.{BatchWrite, WriteBuilder}
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.streaming.{Sink, Source}
 import org.apache.spark.sql.sources._
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.sources.v2.reader.{Batch, Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, WriteBuilder}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -70,7 +69,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     val caseInsensitiveParameters = CaseInsensitiveMap(parameters)
     validateStreamOptions(caseInsensitiveParameters)
     require(schema.isEmpty, "Kafka source has a fixed schema and cannot be set with a custom one")
-    (shortName(), KafkaOffsetReader.kafkaSchema)
+    val includeHeaders = caseInsensitiveParameters.getOrElse(INCLUDE_HEADERS, "false").toBoolean
+    (shortName(), KafkaRecordToRowConverter.kafkaSchema(includeHeaders))
   }
 
   override def createSource(
@@ -89,7 +89,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     val specifiedKafkaParams = convertToSpecifiedParams(caseInsensitiveParameters)
 
     val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-      caseInsensitiveParameters, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+      caseInsensitiveParameters, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+      STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
     val kafkaOffsetReader = new KafkaOffsetReader(
       strategy(caseInsensitiveParameters),
@@ -108,7 +109,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
   }
 
   override def getTable(options: CaseInsensitiveStringMap): KafkaTable = {
-    new KafkaTable
+    val includeHeaders = options.getBoolean(INCLUDE_HEADERS, false)
+    new KafkaTable(includeHeaders)
   }
 
   /**
@@ -125,19 +127,24 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     val specifiedKafkaParams = convertToSpecifiedParams(caseInsensitiveParameters)
 
     val startingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-      caseInsensitiveParameters, STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit)
+      caseInsensitiveParameters, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+      STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit)
     assert(startingRelationOffsets != LatestOffsetRangeLimit)
 
     val endingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-      caseInsensitiveParameters, ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+      caseInsensitiveParameters, ENDING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+      ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
     assert(endingRelationOffsets != EarliestOffsetRangeLimit)
 
+    val includeHeaders = caseInsensitiveParameters.getOrElse(INCLUDE_HEADERS, "false").toBoolean
+
     new KafkaRelation(
       sqlContext,
       strategy(caseInsensitiveParameters),
       sourceOptions = caseInsensitiveParameters,
       specifiedKafkaParams = specifiedKafkaParams,
       failOnDataLoss = failOnDataLoss(caseInsensitiveParameters),
+      includeHeaders = includeHeaders,
       startingOffsets = startingRelationOffsets,
       endingOffsets = endingRelationOffsets)
   }
@@ -317,13 +324,17 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     // Stream specific options
     params.get(ENDING_OFFSETS_OPTION_KEY).map(_ =>
       throw new IllegalArgumentException("ending offset not valid in streaming queries"))
+    params.get(ENDING_OFFSETS_BY_TIMESTAMP_OPTION_KEY).map(_ =>
+      throw new IllegalArgumentException("ending timestamp not valid in streaming queries"))
+
     validateGeneralOptions(params)
   }
 
   private def validateBatchOptions(params: CaseInsensitiveMap[String]) = {
     // Batch specific options
     KafkaSourceProvider.getKafkaOffsetRangeLimit(
-      params, STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit) match {
+      params, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY, STARTING_OFFSETS_OPTION_KEY,
+      EarliestOffsetRangeLimit) match {
       case EarliestOffsetRangeLimit => // good to go
       case LatestOffsetRangeLimit =>
         throw new IllegalArgumentException("starting offset can't be latest " +
@@ -335,10 +346,12 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
               "be latest for batch queries on Kafka")
           case _ => // ignore
         }
+      case _: SpecificTimestampRangeLimit => // good to go
     }
 
     KafkaSourceProvider.getKafkaOffsetRangeLimit(
-      params, ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit) match {
+      params, ENDING_OFFSETS_BY_TIMESTAMP_OPTION_KEY, ENDING_OFFSETS_OPTION_KEY,
+      LatestOffsetRangeLimit) match {
       case EarliestOffsetRangeLimit =>
         throw new IllegalArgumentException("ending offset can't be earliest " +
           "for batch queries on Kafka")
@@ -350,6 +363,7 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
               "earliest for batch queries on Kafka")
           case _ => // ignore
         }
+      case _: SpecificTimestampRangeLimit => // good to go
     }
 
     validateGeneralOptions(params)
@@ -360,13 +374,14 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
     }
   }
 
-  class KafkaTable extends Table with SupportsRead with SupportsWrite {
+  class KafkaTable(includeHeaders: Boolean) extends Table with SupportsRead with SupportsWrite {
 
     override def name(): String = "KafkaTable"
 
-    override def schema(): StructType = KafkaOffsetReader.kafkaSchema
+    override def schema(): StructType = KafkaRecordToRowConverter.kafkaSchema(includeHeaders)
 
     override def capabilities(): ju.Set[TableCapability] = {
+      import TableCapability._
       // ACCEPT_ANY_SCHEMA is needed because of the following reasons:
       // * Kafka writer validates the schema instead of the SQL analyzer (the schema is fixed)
       // * Read schema differs from write schema (please see Kafka integration guide)
@@ -403,8 +418,11 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
   }
 
   class KafkaScan(options: CaseInsensitiveStringMap) extends Scan {
+    val includeHeaders = options.getBoolean(INCLUDE_HEADERS, false)
 
-    override def readSchema(): StructType = KafkaOffsetReader.kafkaSchema
+    override def readSchema(): StructType = {
+      KafkaRecordToRowConverter.kafkaSchema(includeHeaders)
+    }
 
     override def toBatch(): Batch = {
       val caseInsensitiveOptions = CaseInsensitiveMap(options.asScala.toMap)
@@ -412,10 +430,12 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       val specifiedKafkaParams = convertToSpecifiedParams(caseInsensitiveOptions)
 
       val startingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-        caseInsensitiveOptions, STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit)
+        caseInsensitiveOptions, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+        STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit)
 
       val endingRelationOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-        caseInsensitiveOptions, ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+        caseInsensitiveOptions, ENDING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+        ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
       new KafkaBatch(
         strategy(caseInsensitiveOptions),
@@ -423,7 +443,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
         specifiedKafkaParams,
         failOnDataLoss(caseInsensitiveOptions),
         startingRelationOffsets,
-        endingRelationOffsets)
+        endingRelationOffsets,
+        includeHeaders)
     }
 
     override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
@@ -437,7 +458,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       val specifiedKafkaParams = convertToSpecifiedParams(caseInsensitiveOptions)
 
       val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-        caseInsensitiveOptions, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+        caseInsensitiveOptions, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+        STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
       val kafkaOffsetReader = new KafkaOffsetReader(
         strategy(caseInsensitiveOptions),
@@ -465,7 +487,8 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       val specifiedKafkaParams = convertToSpecifiedParams(caseInsensitiveOptions)
 
       val startingStreamOffsets = KafkaSourceProvider.getKafkaOffsetRangeLimit(
-        caseInsensitiveOptions, STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
+        caseInsensitiveOptions, STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY,
+        STARTING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit)
 
       val kafkaOffsetReader = new KafkaOffsetReader(
         strategy(caseInsensitiveOptions),
@@ -491,6 +514,8 @@ private[kafka010] object KafkaSourceProvider extends Logging {
   private val STRATEGY_OPTION_KEYS = Set(SUBSCRIBE, SUBSCRIBE_PATTERN, ASSIGN)
   private[kafka010] val STARTING_OFFSETS_OPTION_KEY = "startingoffsets"
   private[kafka010] val ENDING_OFFSETS_OPTION_KEY = "endingoffsets"
+  private[kafka010] val STARTING_OFFSETS_BY_TIMESTAMP_OPTION_KEY = "startingoffsetsbytimestamp"
+  private[kafka010] val ENDING_OFFSETS_BY_TIMESTAMP_OPTION_KEY = "endingoffsetsbytimestamp"
   private val FAIL_ON_DATA_LOSS_OPTION_KEY = "failondataloss"
   private[kafka010] val MIN_PARTITIONS_OPTION_KEY = "minpartitions"
   private[kafka010] val MAX_OFFSET_PER_TRIGGER = "maxoffsetspertrigger"
@@ -498,6 +523,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
   private[kafka010] val FETCH_OFFSET_RETRY_INTERVAL_MS = "fetchoffset.retryintervalms"
   private[kafka010] val CONSUMER_POLL_TIMEOUT = "kafkaconsumer.polltimeoutms"
   private val GROUP_ID_PREFIX = "groupidprefix"
+  private[kafka010] val INCLUDE_HEADERS = "includeheaders"
 
   val TOPIC_OPTION_KEY = "topic"
 
@@ -533,15 +559,20 @@ private[kafka010] object KafkaSourceProvider extends Logging {
 
   def getKafkaOffsetRangeLimit(
       params: CaseInsensitiveMap[String],
+      offsetByTimestampOptionKey: String,
       offsetOptionKey: String,
       defaultOffsets: KafkaOffsetRangeLimit): KafkaOffsetRangeLimit = {
-    params.get(offsetOptionKey).map(_.trim) match {
-      case Some(offset) if offset.toLowerCase(Locale.ROOT) == "latest" =>
-        LatestOffsetRangeLimit
-      case Some(offset) if offset.toLowerCase(Locale.ROOT) == "earliest" =>
-        EarliestOffsetRangeLimit
-      case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
-      case None => defaultOffsets
+    params.get(offsetByTimestampOptionKey).map(_.trim) match {
+      case Some(json) => SpecificTimestampRangeLimit(JsonUtils.partitionTimestamps(json))
+      case None =>
+        params.get(offsetOptionKey).map(_.trim) match {
+          case Some(offset) if offset.toLowerCase(Locale.ROOT) == "latest" =>
+            LatestOffsetRangeLimit
+          case Some(offset) if offset.toLowerCase(Locale.ROOT) == "earliest" =>
+            EarliestOffsetRangeLimit
+          case Some(json) => SpecificOffsetRangeLimit(JsonUtils.partitionOffsets(json))
+          case None => defaultOffsets
+        }
     }
   }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
index 6dd1d2984a96e..2b50b771e694e 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.kafka010
 import java.{util => ju}
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.kafka010.KafkaWriter.validateQuery
-import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 
 /**
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
index 041fac7717635..b423ddc959c1b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
@@ -19,9 +19,13 @@ package org.apache.spark.sql.kafka010
 
 import java.{util => ju}
 
+import scala.collection.JavaConverters._
+
 import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata}
+import org.apache.kafka.common.header.Header
+import org.apache.kafka.common.header.internals.RecordHeader
 
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Literal, UnsafeProjection}
 import org.apache.spark.sql.types.{BinaryType, StringType}
 
@@ -88,7 +92,17 @@ private[kafka010] abstract class KafkaRowWriter(
       throw new NullPointerException(s"null topic present in the data. Use the " +
         s"${KafkaSourceProvider.TOPIC_OPTION_KEY} option for setting a default topic.")
     }
-    val record = new ProducerRecord[Array[Byte], Array[Byte]](topic.toString, key, value)
+    val record = if (projectedRow.isNullAt(3)) {
+      new ProducerRecord[Array[Byte], Array[Byte]](topic.toString, null, key, value)
+    } else {
+      val headerArray = projectedRow.getArray(3)
+      val headers = (0 until headerArray.numElements()).map { i =>
+        val struct = headerArray.getStruct(i, 2)
+        new RecordHeader(struct.getUTF8String(0).toString, struct.getBinary(1))
+          .asInstanceOf[Header]
+      }
+      new ProducerRecord[Array[Byte], Array[Byte]](topic.toString, null, key, value, headers.asJava)
+    }
     producer.send(record, callback)
   }
 
@@ -131,9 +145,26 @@ private[kafka010] abstract class KafkaRowWriter(
         throw new IllegalStateException(s"${KafkaWriter.VALUE_ATTRIBUTE_NAME} " +
           s"attribute unsupported type ${t.catalogString}")
     }
+    val headersExpression = inputSchema
+      .find(_.name == KafkaWriter.HEADERS_ATTRIBUTE_NAME).getOrElse(
+      Literal(CatalystTypeConverters.convertToCatalyst(null),
+        KafkaRecordToRowConverter.headersType)
+    )
+    headersExpression.dataType match {
+      case KafkaRecordToRowConverter.headersType => // good
+      case t =>
+        throw new IllegalStateException(s"${KafkaWriter.HEADERS_ATTRIBUTE_NAME} " +
+          s"attribute unsupported type ${t.catalogString}")
+    }
     UnsafeProjection.create(
-      Seq(topicExpression, Cast(keyExpression, BinaryType),
-        Cast(valueExpression, BinaryType)), inputSchema)
+      Seq(
+        topicExpression,
+        Cast(keyExpression, BinaryType),
+        Cast(valueExpression, BinaryType),
+        headersExpression
+      ),
+      inputSchema
+    )
   }
 }
 
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
index e1a9191cc5a84..bbb060356f730 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
+++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
@@ -21,9 +21,10 @@ import java.{util => ju}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
-import org.apache.spark.sql.types.{BinaryType, StringType}
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.types.{BinaryType, MapType, StringType}
 import org.apache.spark.util.Utils
 
 /**
@@ -39,6 +40,7 @@ private[kafka010] object KafkaWriter extends Logging {
   val TOPIC_ATTRIBUTE_NAME: String = "topic"
   val KEY_ATTRIBUTE_NAME: String = "key"
   val VALUE_ATTRIBUTE_NAME: String = "value"
+  val HEADERS_ATTRIBUTE_NAME: String = "headers"
 
   override def toString: String = "KafkaWriter"
 
@@ -75,6 +77,15 @@ private[kafka010] object KafkaWriter extends Logging {
         throw new AnalysisException(s"$VALUE_ATTRIBUTE_NAME attribute type " +
           s"must be a ${StringType.catalogString} or ${BinaryType.catalogString}")
     }
+    schema.find(_.name == HEADERS_ATTRIBUTE_NAME).getOrElse(
+      Literal(CatalystTypeConverters.convertToCatalyst(null),
+        KafkaRecordToRowConverter.headersType)
+    ).dataType match {
+      case KafkaRecordToRowConverter.headersType => // good
+      case _ =>
+        throw new AnalysisException(s"$HEADERS_ATTRIBUTE_NAME attribute type " +
+          s"must be a ${KafkaRecordToRowConverter.headersType.catalogString}")
+    }
   }
 
   def write(
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/commits/0 b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/metadata b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/metadata
new file mode 100644
index 0000000000000..f1b5ab7aa17f0
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/metadata
@@ -0,0 +1 @@
+{"id":"fc415a71-f0a2-4c3c-aeaf-f9e258c3f726"}
\ No newline at end of file
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/offsets/0 b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/offsets/0
new file mode 100644
index 0000000000000..5dbadea57acbe
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1568508285207,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+{"spark-test-topic-2b8619f5-d3c4-4c2d-b5d1-8d9d9458aa62":{"2":3,"4":3,"1":3,"3":3,"0":3}}
\ No newline at end of file
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/sources/0/0 b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/sources/0/0
new file mode 100644
index 0000000000000..8cf9f8e009ce8
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/sources/0/0 differ
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/0/1.delta b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/0/1.delta
new file mode 100644
index 0000000000000..5815bbdcc2467
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/0/1.delta differ
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/1/1.delta b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/1/1.delta
new file mode 100644
index 0000000000000..e1a065b2b1c78
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/1/1.delta differ
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/2/1.delta b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/2/1.delta
new file mode 100644
index 0000000000000..cce14294e0044
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/2/1.delta differ
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/3/1.delta b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/3/1.delta
new file mode 100644
index 0000000000000..57063019503bc
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/3/1.delta differ
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/4/1.delta b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/4/1.delta
new file mode 100644
index 0000000000000..e8b1e4bdc8dba
Binary files /dev/null and b/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/4/1.delta differ
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala
index 80f9a1b410d2c..122fe752615ad 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDataConsumerSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.kafka010
 
+import java.{util => ju}
+import java.nio.charset.StandardCharsets
 import java.util.concurrent.{Executors, TimeUnit}
 
 import scala.collection.JavaConverters._
@@ -29,10 +31,14 @@ import org.apache.kafka.common.serialization.ByteArrayDeserializer
 import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.{TaskContext, TaskContextImpl}
+import org.apache.spark.kafka010.KafkaDelegationTokenTest
 import org.apache.spark.sql.kafka010.KafkaDataConsumer.CacheKey
 import org.apache.spark.sql.test.SharedSparkSession
 
-class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester {
+class KafkaDataConsumerSuite
+  extends SharedSparkSession
+  with PrivateMethodTester
+  with KafkaDelegationTokenTest {
 
   protected var testUtils: KafkaTestUtils = _
   private val topic = "topic" + Random.nextInt()
@@ -65,6 +71,8 @@ class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester
   private var consumerPool: InternalKafkaConsumerPool = _
 
   override def beforeEach(): Unit = {
+    super.beforeEach()
+
     fetchedDataPool = {
       val fetchedDataPoolMethod = PrivateMethod[FetchedDataPool]('fetchedDataPool)
       KafkaDataConsumer.invokePrivate(fetchedDataPoolMethod())
@@ -91,53 +99,93 @@ class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester
   test("new KafkaDataConsumer instance in case of Task retry") {
     try {
       val kafkaParams = getKafkaParams()
-      val key = new CacheKey(groupId, topicPartition)
+      val key = CacheKey(groupId, topicPartition)
 
       val context1 = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null)
       TaskContext.setTaskContext(context1)
-      val consumer1 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-
-      // any method call which requires consumer is necessary
-      consumer1.getAvailableOffsetRange()
-
-      val consumer1Underlying = consumer1._consumer
-      assert(consumer1Underlying.isDefined)
-
-      consumer1.release()
-
-      assert(consumerPool.size(key) === 1)
-      // check whether acquired object is available in pool
-      val pooledObj = consumerPool.borrowObject(key, kafkaParams)
-      assert(consumer1Underlying.get.eq(pooledObj))
-      consumerPool.returnObject(pooledObj)
+      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
 
       val context2 = new TaskContextImpl(0, 0, 0, 0, 1, null, null, null)
       TaskContext.setTaskContext(context2)
-      val consumer2 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-
-      // any method call which requires consumer is necessary
-      consumer2.getAvailableOffsetRange()
+      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
 
-      val consumer2Underlying = consumer2._consumer
-      assert(consumer2Underlying.isDefined)
       // here we expect different consumer as pool will invalidate for task reattempt
-      assert(consumer2Underlying.get.ne(consumer1Underlying.get))
+      assert(consumer2Underlying.ne(consumer1Underlying))
+    } finally {
+      TaskContext.unset()
+    }
+  }
 
-      consumer2.release()
+  test("same KafkaDataConsumer instance in case of same token") {
+    try {
+      val kafkaParams = getKafkaParams()
+      val key = new CacheKey(groupId, topicPartition)
+
+      val context = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null)
+      TaskContext.setTaskContext(context)
+      setSparkEnv(
+        Map(
+          s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
+        )
+      )
+      addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
+      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
+
+      assert(consumer2Underlying.eq(consumer1Underlying))
+    } finally {
+      TaskContext.unset()
+    }
+  }
 
-      // The first consumer should be removed from cache, but the consumer after invalidate
-      // should be cached.
-      assert(consumerPool.size(key) === 1)
-      val pooledObj2 = consumerPool.borrowObject(key, kafkaParams)
-      assert(consumer2Underlying.get.eq(pooledObj2))
-      consumerPool.returnObject(pooledObj2)
+  test("new KafkaDataConsumer instance in case of token renewal") {
+    try {
+      val kafkaParams = getKafkaParams()
+      val key = new CacheKey(groupId, topicPartition)
+
+      val context = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null)
+      TaskContext.setTaskContext(context)
+      setSparkEnv(
+        Map(
+          s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
+        )
+      )
+      addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
+      addTokenToUGI(tokenService1, tokenId2, tokenPassword2)
+      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
+
+      assert(consumer2Underlying.ne(consumer1Underlying))
     } finally {
       TaskContext.unset()
     }
   }
 
+  private def initSingleConsumer(
+      kafkaParams: ju.Map[String, Object],
+      key: CacheKey): InternalKafkaConsumer = {
+    val consumer = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+
+    // any method call which requires consumer is necessary
+    consumer.getOrRetrieveConsumer()
+
+    val consumerUnderlying = consumer._consumer
+    assert(consumerUnderlying.isDefined)
+
+    consumer.release()
+
+    assert(consumerPool.size(key) === 1)
+    // check whether acquired object is available in pool
+    val pooledObj = consumerPool.borrowObject(key, kafkaParams)
+    assert(consumerUnderlying.get.eq(pooledObj))
+    consumerPool.returnObject(pooledObj)
+
+    consumerUnderlying.get
+  }
+
   test("SPARK-23623: concurrent use of KafkaDataConsumer") {
-    val data: immutable.IndexedSeq[String] = prepareTestTopicHavingTestMessages(topic)
+    val data: immutable.IndexedSeq[(String, Seq[(String, Array[Byte])])] =
+      prepareTestTopicHavingTestMessages(topic)
 
     val topicPartition = new TopicPartition(topic, 0)
     val kafkaParams = getKafkaParams()
@@ -157,10 +205,22 @@ class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester
       try {
         val range = consumer.getAvailableOffsetRange()
         val rcvd = range.earliest until range.latest map { offset =>
-          val bytes = consumer.get(offset, Long.MaxValue, 10000, failOnDataLoss = false).value()
-          new String(bytes)
+          val record = consumer.get(offset, Long.MaxValue, 10000, failOnDataLoss = false)
+          val value = new String(record.value(), StandardCharsets.UTF_8)
+          val headers = record.headers().toArray.map(header => (header.key(), header.value())).toSeq
+          (value, headers)
+        }
+        data.zip(rcvd).foreach { case (expected, actual) =>
+          // value
+          assert(expected._1 === actual._1)
+          // headers
+          expected._2.zip(actual._2).foreach { case (l, r) =>
+            // header key
+            assert(l._1 === r._1)
+            // header value
+            assert(l._2 === r._2)
+          }
         }
-        assert(rcvd == data)
       } catch {
         case e: Throwable =>
           error = e
@@ -307,9 +367,12 @@ class KafkaDataConsumerSuite extends SharedSparkSession with PrivateMethodTester
   }
 
   private def prepareTestTopicHavingTestMessages(topic: String) = {
-    val data = (1 to 1000).map(_.toString)
+    val data = (1 to 1000).map(i => (i.toString, Seq[(String, Array[Byte])]()))
     testUtils.createTopic(topic, 1)
-    testUtils.sendMessages(topic, data.toArray)
+    val messages = data.map { case (value, hdrs) =>
+      new RecordBuilder(topic, value).headers(hdrs).build()
+    }
+    testUtils.sendMessages(messages)
     data
   }
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
index 9850a91f34f63..306483825ae3b 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
@@ -82,7 +82,6 @@ class KafkaDelegationTokenSuite extends StreamTest with SharedSparkSession with
         .format("kafka")
         .option("checkpointLocation", checkpointDir.getCanonicalPath)
         .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-        .option("kafka.security.protocol", SASL_PLAINTEXT.name)
         .option("topic", topic)
         .start()
 
@@ -99,7 +98,6 @@ class KafkaDelegationTokenSuite extends StreamTest with SharedSparkSession with
     val streamingDf = spark.readStream
       .format("kafka")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
-      .option("kafka.security.protocol", SASL_PLAINTEXT.name)
       .option("startingOffsets", s"earliest")
       .option("subscribe", topic)
       .load()
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index ae8a6886b2b4d..3ee59e57a6edf 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -28,13 +28,15 @@ import scala.collection.JavaConverters._
 import scala.io.Source
 import scala.util.Random
 
+import org.apache.commons.io.FileUtils
 import org.apache.kafka.clients.producer.{ProducerRecord, RecordMetadata}
 import org.apache.kafka.common.TopicPartition
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.sql.{Dataset, ForeachWriter, SparkSession}
+import org.apache.spark.sql.{Dataset, ForeachWriter, Row, SparkSession}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.streaming._
@@ -42,11 +44,11 @@ import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
 import org.apache.spark.sql.functions.{count, window}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
-import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
 import org.apache.spark.sql.streaming.{StreamTest, Trigger}
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.Utils
 
 abstract class KafkaSourceTest extends StreamTest with SharedSparkSession with KafkaTest {
 
@@ -677,7 +679,8 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     })
   }
 
-  private def testGroupId(groupIdKey: String, validateGroupId: (String, Iterable[String]) => Unit) {
+  private def testGroupId(groupIdKey: String,
+      validateGroupId: (String, Iterable[String]) => Unit): Unit = {
     // Tests code path KafkaSourceProvider.{sourceSchema(.), createSource(.)}
     // as well as KafkaOffsetReader.createConsumer(.)
     val topic = newTopic()
@@ -1162,6 +1165,63 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
     intercept[IllegalArgumentException] { test(minPartitions = "-1", 1, true) }
   }
 
+  test("default config of includeHeader doesn't break existing query from Spark 2.4") {
+    import testImplicits._
+
+    // This topic name is migrated from Spark 2.4.3 test run
+    val topic = "spark-test-topic-2b8619f5-d3c4-4c2d-b5d1-8d9d9458aa62"
+    // create same topic and messages as test run
+    testUtils.createTopic(topic, partitions = 5, overwrite = true)
+    testUtils.sendMessages(topic, Array(-20, -21, -22).map(_.toString), Some(0))
+    testUtils.sendMessages(topic, Array(-10, -11, -12).map(_.toString), Some(1))
+    testUtils.sendMessages(topic, Array(0, 1, 2).map(_.toString), Some(2))
+    testUtils.sendMessages(topic, Array(10, 11, 12).map(_.toString), Some(3))
+    testUtils.sendMessages(topic, Array(20, 21, 22).map(_.toString), Some(4))
+    require(testUtils.getLatestOffsets(Set(topic)).size === 5)
+
+    val headers = Seq(("a", "b".getBytes(UTF_8)), ("c", "d".getBytes(UTF_8)))
+    (31 to 35).map { num =>
+      new RecordBuilder(topic, num.toString).partition(num - 31).headers(headers).build()
+    }.foreach { rec => testUtils.sendMessage(rec) }
+
+    val kafka = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("subscribePattern", topic)
+      .option("startingOffsets", "earliest")
+      .load()
+
+    val query = kafka.dropDuplicates()
+      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+      .as[(String, String)]
+      .map(kv => kv._2.toInt + 1)
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/").toURI
+
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+    testStream(query)(
+      StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+      /*
+        Note: The checkpoint was generated using the following input in Spark version 2.4.3
+        testUtils.createTopic(topic, partitions = 5, overwrite = true)
+
+        testUtils.sendMessages(topic, Array(-20, -21, -22).map(_.toString), Some(0))
+        testUtils.sendMessages(topic, Array(-10, -11, -12).map(_.toString), Some(1))
+        testUtils.sendMessages(topic, Array(0, 1, 2).map(_.toString), Some(2))
+        testUtils.sendMessages(topic, Array(10, 11, 12).map(_.toString), Some(3))
+        testUtils.sendMessages(topic, Array(20, 21, 22).map(_.toString), Some(4))
+        */
+      makeSureGetOffsetCalled,
+      CheckNewAnswer(32, 33, 34, 35, 36)
+    )
+  }
 }
 
 abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
@@ -1219,6 +1279,16 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
         "failOnDataLoss" -> failOnDataLoss.toString)
     }
 
+    test(s"assign from specific timestamps (failOnDataLoss: $failOnDataLoss)") {
+      val topic = newTopic()
+      testFromSpecificTimestamps(
+        topic,
+        failOnDataLoss = failOnDataLoss,
+        addPartitions = false,
+        "assign" -> assignString(topic, 0 to 4),
+        "failOnDataLoss" -> failOnDataLoss.toString)
+    }
+
     test(s"subscribing topic by name from latest offsets (failOnDataLoss: $failOnDataLoss)") {
       val topic = newTopic()
       testFromLatestOffsets(
@@ -1242,6 +1312,12 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       testFromSpecificOffsets(topic, failOnDataLoss = failOnDataLoss, "subscribe" -> topic)
     }
 
+    test(s"subscribing topic by name from specific timestamps (failOnDataLoss: $failOnDataLoss)") {
+      val topic = newTopic()
+      testFromSpecificTimestamps(topic, failOnDataLoss = failOnDataLoss, addPartitions = true,
+        "subscribe" -> topic)
+    }
+
     test(s"subscribing topic by pattern from latest offsets (failOnDataLoss: $failOnDataLoss)") {
       val topicPrefix = newTopic()
       val topic = topicPrefix + "-suffix"
@@ -1270,6 +1346,17 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
         failOnDataLoss = failOnDataLoss,
         "subscribePattern" -> s"$topicPrefix-.*")
     }
+
+    test(s"subscribing topic by pattern from specific timestamps " +
+      s"(failOnDataLoss: $failOnDataLoss)") {
+      val topicPrefix = newTopic()
+      val topic = topicPrefix + "-suffix"
+      testFromSpecificTimestamps(
+        topic,
+        failOnDataLoss = failOnDataLoss,
+        addPartitions = true,
+        "subscribePattern" -> s"$topicPrefix-.*")
+    }
   }
 
   test("bad source options") {
@@ -1289,6 +1376,9 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
     // Specifying an ending offset
     testBadOptions("endingOffsets" -> "latest")("Ending offset not valid in streaming queries")
 
+    testBadOptions("subscribe" -> "t", "endingOffsetsByTimestamp" -> "{\"t\": {\"0\": 1000}}")(
+      "Ending timestamp not valid in streaming queries")
+
     // No strategy specified
     testBadOptions()("options must be specified", "subscribe", "subscribePattern")
 
@@ -1337,7 +1427,8 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       (STARTING_OFFSETS_OPTION_KEY, """{"topic-A":{"0":23}}""",
         SpecificOffsetRangeLimit(Map(new TopicPartition("topic-A", 0) -> 23))))) {
       val offset = getKafkaOffsetRangeLimit(
-        CaseInsensitiveMap[String](Map(optionKey -> optionValue)), optionKey, answer)
+        CaseInsensitiveMap[String](Map(optionKey -> optionValue)), "dummy", optionKey,
+        answer)
       assert(offset === answer)
     }
 
@@ -1345,7 +1436,7 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       (STARTING_OFFSETS_OPTION_KEY, EarliestOffsetRangeLimit),
       (ENDING_OFFSETS_OPTION_KEY, LatestOffsetRangeLimit))) {
       val offset = getKafkaOffsetRangeLimit(
-        CaseInsensitiveMap[String](Map.empty), optionKey, answer)
+        CaseInsensitiveMap[String](Map.empty), "dummy", optionKey, answer)
       assert(offset === answer)
     }
   }
@@ -1410,11 +1501,90 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
     )
   }
 
+  private def testFromSpecificTimestamps(
+      topic: String,
+      failOnDataLoss: Boolean,
+      addPartitions: Boolean,
+      options: (String, String)*): Unit = {
+    def sendMessages(topic: String, msgs: Seq[String], part: Int, ts: Long): Unit = {
+      val records = msgs.map { msg =>
+        new RecordBuilder(topic, msg).partition(part).timestamp(ts).build()
+      }
+      testUtils.sendMessages(records)
+    }
+
+    testUtils.createTopic(topic, partitions = 5)
+
+    val firstTimestamp = System.currentTimeMillis() - 5000
+    sendMessages(topic, Array(-20).map(_.toString), 0, firstTimestamp)
+    sendMessages(topic, Array(-10).map(_.toString), 1, firstTimestamp)
+    sendMessages(topic, Array(0, 1).map(_.toString), 2, firstTimestamp)
+    sendMessages(topic, Array(10, 11).map(_.toString), 3, firstTimestamp)
+    sendMessages(topic, Array(20, 21, 22).map(_.toString), 4, firstTimestamp)
+
+    val secondTimestamp = firstTimestamp + 1000
+    sendMessages(topic, Array(-21, -22).map(_.toString), 0, secondTimestamp)
+    sendMessages(topic, Array(-11, -12).map(_.toString), 1, secondTimestamp)
+    sendMessages(topic, Array(2).map(_.toString), 2, secondTimestamp)
+    sendMessages(topic, Array(12).map(_.toString), 3, secondTimestamp)
+    // no data after second timestamp for partition 4
+
+    require(testUtils.getLatestOffsets(Set(topic)).size === 5)
+
+    // we intentionally starts from second timestamp,
+    // except for partition 4 - it starts from first timestamp
+    val startPartitionTimestamps: Map[TopicPartition, Long] = Map(
+      (0 to 3).map(new TopicPartition(topic, _) -> secondTimestamp): _*
+    ) ++ Map(new TopicPartition(topic, 4) -> firstTimestamp)
+    val startingTimestamps = JsonUtils.partitionTimestamps(startPartitionTimestamps)
+
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("startingOffsetsByTimestamp", startingTimestamps)
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("failOnDataLoss", failOnDataLoss.toString)
+    options.foreach { case (k, v) => reader.option(k, v) }
+    val kafka = reader.load()
+      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+      .as[(String, String)]
+    val mapped: org.apache.spark.sql.Dataset[_] = kafka.map(kv => kv._2.toInt)
+
+    testStream(mapped)(
+      makeSureGetOffsetCalled,
+      Execute { q =>
+        val partitions = (0 to 4).map(new TopicPartition(topic, _))
+        // wait to reach the last offset in every partition
+        q.awaitOffset(
+          0, KafkaSourceOffset(partitions.map(tp => tp -> 3L).toMap), streamingTimeout.toMillis)
+      },
+      CheckAnswer(-21, -22, -11, -12, 2, 12, 20, 21, 22),
+      StopStream,
+      StartStream(),
+      CheckAnswer(-21, -22, -11, -12, 2, 12, 20, 21, 22), // Should get the data back on recovery
+      StopStream,
+      AddKafkaData(Set(topic), 30, 31, 32), // Add data when stream is stopped
+      StartStream(),
+      CheckAnswer(-21, -22, -11, -12, 2, 12, 20, 21, 22, 30, 31, 32), // Should get the added data
+      AssertOnQuery("Add partitions") { query: StreamExecution =>
+        if (addPartitions) setTopicPartitions(topic, 10, query)
+        true
+      },
+      AddKafkaData(Set(topic), 40, 41, 42, 43, 44)(ensureDataInMultiplePartition = true),
+      CheckAnswer(-21, -22, -11, -12, 2, 12, 20, 21, 22, 30, 31, 32, 40, 41, 42, 43, 44),
+      StopStream
+    )
+  }
+
   test("Kafka column types") {
     val now = System.currentTimeMillis()
     val topic = newTopic()
     testUtils.createTopic(newTopic(), partitions = 1)
-    testUtils.sendMessages(topic, Array(1).map(_.toString))
+    testUtils.sendMessage(
+      new RecordBuilder(topic, "1")
+        .headers(Seq(("a", "b".getBytes(UTF_8)), ("c", "d".getBytes(UTF_8)))).build()
+    )
 
     val kafka = spark
       .readStream
@@ -1423,6 +1593,7 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       .option("kafka.metadata.max.age.ms", "1")
       .option("startingOffsets", s"earliest")
       .option("subscribe", topic)
+      .option("includeHeaders", "true")
       .load()
 
     val query = kafka
@@ -1445,6 +1616,21 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
     // producer. So here we just use a low bound to make sure the internal conversion works.
     assert(row.getAs[java.sql.Timestamp]("timestamp").getTime >= now, s"Unexpected results: $row")
     assert(row.getAs[Int]("timestampType") === 0, s"Unexpected results: $row")
+
+    def checkHeader(row: Row, expected: Seq[(String, Array[Byte])]): Unit = {
+      // array<struct<key:string,value:binary>>
+      val headers = row.getList[Row](row.fieldIndex("headers")).asScala
+      assert(headers.length === expected.length)
+
+      (0 until expected.length).foreach { idx =>
+        val key = headers(idx).getAs[String]("key")
+        val value = headers(idx).getAs[Array[Byte]]("value")
+        assert(key === expected(idx)._1)
+        assert(value === expected(idx)._2)
+      }
+    }
+
+    checkHeader(row, Seq(("a", "b".getBytes(UTF_8)), ("c", "d".getBytes(UTF_8))))
     query.stop()
   }
 
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
index b4e1b78c7db4e..063e2e2bc8b77 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.kafka010
 
+import java.nio.charset.StandardCharsets.UTF_8
 import java.util.Locale
 import java.util.concurrent.atomic.AtomicInteger
 
+import scala.annotation.tailrec
 import scala.collection.JavaConverters._
 import scala.util.Random
 
@@ -27,11 +29,11 @@ import org.apache.kafka.clients.producer.ProducerRecord
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.QueryTest
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{DataFrameReader, QueryTest}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.Utils
 
@@ -70,7 +72,8 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
   protected def createDF(
       topic: String,
       withOptions: Map[String, String] = Map.empty[String, String],
-      brokerAddress: Option[String] = None) = {
+      brokerAddress: Option[String] = None,
+      includeHeaders: Boolean = false) = {
     val df = spark
       .read
       .format("kafka")
@@ -80,7 +83,13 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
     withOptions.foreach {
       case (key, value) => df.option(key, value)
     }
-    df.load().selectExpr("CAST(value AS STRING)")
+    if (includeHeaders) {
+      df.option("includeHeaders", "true")
+      df.load()
+        .selectExpr("CAST(value AS STRING)", "headers")
+    } else {
+      df.load().selectExpr("CAST(value AS STRING)")
+    }
   }
 
   test("explicit earliest to latest offsets") {
@@ -147,6 +156,214 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
     checkAnswer(df, (0 to 30).map(_.toString).toDF)
   }
 
+  test("default starting and ending offsets with headers") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+    testUtils.sendMessage(
+      new RecordBuilder(topic, "1").headers(Seq()).partition(0).build()
+    )
+    testUtils.sendMessage(
+      new RecordBuilder(topic, "2").headers(
+        Seq(("a", "b".getBytes(UTF_8)), ("c", "d".getBytes(UTF_8)))).partition(1).build()
+    )
+    testUtils.sendMessage(
+      new RecordBuilder(topic, "3").headers(
+        Seq(("e", "f".getBytes(UTF_8)), ("e", "g".getBytes(UTF_8)))).partition(2).build()
+    )
+
+    // Implicit offset values, should default to earliest and latest
+    val df = createDF(topic, includeHeaders = true)
+    // Test that we default to "earliest" and "latest"
+    checkAnswer(df, Seq(("1", null),
+      ("2", Seq(("a", "b".getBytes(UTF_8)), ("c", "d".getBytes(UTF_8)))),
+      ("3", Seq(("e", "f".getBytes(UTF_8)), ("e", "g".getBytes(UTF_8))))).toDF)
+  }
+
+  test("timestamp provided for starting and ending") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // timestamp both presented: starting "first" ending "finalized"
+    verifyTimestampRelatedQueryResult({ df =>
+      val startPartitionTimestamps: Map[TopicPartition, Long] = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps(1)): _*)
+      val startingTimestamps = JsonUtils.partitionTimestamps(startPartitionTimestamps)
+
+      val endPartitionTimestamps = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps(2)): _*)
+      val endingTimestamps = JsonUtils.partitionTimestamps(endPartitionTimestamps)
+
+      df.option("startingOffsetsByTimestamp", startingTimestamps)
+        .option("endingOffsetsByTimestamp", endingTimestamps)
+    }, topic, 10 to 19)
+  }
+
+  test("timestamp provided for starting, offset provided for ending") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // starting only presented as "first", and ending presented as endingOffsets
+    verifyTimestampRelatedQueryResult({ df =>
+      val startTopicTimestamps = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps.head): _*)
+      val startingTimestamps = JsonUtils.partitionTimestamps(startTopicTimestamps)
+
+      val endPartitionOffsets = Map(
+        new TopicPartition(topic, 0) -> -1L, // -1 => latest
+        new TopicPartition(topic, 1) -> -1L,
+        new TopicPartition(topic, 2) -> 1L  // explicit offset - take only first one
+      )
+      val endingOffsets = JsonUtils.partitionOffsets(endPartitionOffsets)
+
+      // so we here expect full of records from partition 0 and 1, and only the first record
+      // from partition 2 which is "2"
+
+      df.option("startingOffsetsByTimestamp", startingTimestamps)
+        .option("endingOffsets", endingOffsets)
+    }, topic, (0 to 29).filterNot(_ % 3 == 2) ++ Seq(2))
+  }
+
+  test("timestamp provided for ending, offset provided for starting") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // ending only presented as "third", and starting presented as startingOffsets
+    verifyTimestampRelatedQueryResult({ df =>
+      val startPartitionOffsets = Map(
+        new TopicPartition(topic, 0) -> -2L, // -2 => earliest
+        new TopicPartition(topic, 1) -> -2L,
+        new TopicPartition(topic, 2) -> 0L   // explicit earliest
+      )
+      val startingOffsets = JsonUtils.partitionOffsets(startPartitionOffsets)
+
+      val endTopicTimestamps = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps(2)): _*)
+      val endingTimestamps = JsonUtils.partitionTimestamps(endTopicTimestamps)
+
+      df.option("startingOffsets", startingOffsets)
+        .option("endingOffsetsByTimestamp", endingTimestamps)
+    }, topic, 0 to 19)
+  }
+
+  test("timestamp provided for starting, ending not provided") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // starting only presented as "second", and ending not presented
+    verifyTimestampRelatedQueryResult({ df =>
+      val startTopicTimestamps = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps(1)): _*)
+      val startingTimestamps = JsonUtils.partitionTimestamps(startTopicTimestamps)
+
+      df.option("startingOffsetsByTimestamp", startingTimestamps)
+    }, topic, 10 to 29)
+  }
+
+  test("timestamp provided for ending, starting not provided") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // ending only presented as "third", and starting not presented
+    verifyTimestampRelatedQueryResult({ df =>
+      val endTopicTimestamps = Map(
+        (0 to 2).map(new TopicPartition(topic, _) -> timestamps(2)): _*)
+      val endingTimestamps = JsonUtils.partitionTimestamps(endTopicTimestamps)
+
+      df.option("endingOffsetsByTimestamp", endingTimestamps)
+    }, topic, 0 to 19)
+  }
+
+  test("no matched offset for timestamp - startingOffsets") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    val e = intercept[SparkException] {
+      verifyTimestampRelatedQueryResult({ df =>
+        // partition 2 will make query fail
+        val startTopicTimestamps = Map(
+          (0 to 1).map(new TopicPartition(topic, _) -> timestamps(1)): _*) ++
+          Map(new TopicPartition(topic, 2) -> Long.MaxValue)
+
+        val startingTimestamps = JsonUtils.partitionTimestamps(startTopicTimestamps)
+
+        df.option("startingOffsetsByTimestamp", startingTimestamps)
+      }, topic, Seq.empty)
+    }
+
+    @tailrec
+    def assertionErrorInExceptionChain(e: Throwable): Boolean = {
+      if (e.isInstanceOf[AssertionError]) {
+        true
+      } else if (e.getCause == null) {
+        false
+      } else {
+        assertionErrorInExceptionChain(e.getCause)
+      }
+    }
+
+    assert(assertionErrorInExceptionChain(e),
+      "Cannot find expected AssertionError in chained exceptions")
+  }
+
+  test("no matched offset for timestamp - endingOffsets") {
+    val (topic, timestamps) = prepareTimestampRelatedUnitTest
+
+    // the query will run fine, since we allow no matching offset for timestamp
+    // if it's endingOffsets
+    // for partition 0 and 1, it only takes records between first and second timestamp
+    // for partition 2, it will take all records
+    verifyTimestampRelatedQueryResult({ df =>
+      val endTopicTimestamps = Map(
+        (0 to 1).map(new TopicPartition(topic, _) -> timestamps(1)): _*) ++
+        Map(new TopicPartition(topic, 2) -> Long.MaxValue)
+
+      val endingTimestamps = JsonUtils.partitionTimestamps(endTopicTimestamps)
+
+      df.option("endingOffsetsByTimestamp", endingTimestamps)
+    }, topic, (0 to 9) ++ (10 to 29).filter(_ % 3 == 2))
+  }
+
+  private def prepareTimestampRelatedUnitTest: (String, Seq[Long]) = {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+
+    def sendMessages(topic: String, msgs: Array[String], part: Int, ts: Long): Unit = {
+      val records = msgs.map { msg =>
+        new RecordBuilder(topic, msg).partition(part).timestamp(ts).build()
+      }
+      testUtils.sendMessages(records)
+    }
+
+    val firstTimestamp = System.currentTimeMillis() - 5000
+    (0 to 2).foreach { partNum =>
+      sendMessages(topic, (0 to 9).filter(_ % 3 == partNum)
+        .map(_.toString).toArray, partNum, firstTimestamp)
+    }
+
+    val secondTimestamp = firstTimestamp + 1000
+    (0 to 2).foreach { partNum =>
+      sendMessages(topic, (10 to 19).filter(_ % 3 == partNum)
+        .map(_.toString).toArray, partNum, secondTimestamp)
+    }
+
+    val thirdTimestamp = secondTimestamp + 1000
+    (0 to 2).foreach { partNum =>
+      sendMessages(topic, (20 to 29).filter(_ % 3 == partNum)
+        .map(_.toString).toArray, partNum, thirdTimestamp)
+    }
+
+    val finalizedTimestamp = thirdTimestamp + 1000
+
+    (topic, Seq(firstTimestamp, secondTimestamp, thirdTimestamp, finalizedTimestamp))
+  }
+
+  private def verifyTimestampRelatedQueryResult(
+      optionFn: DataFrameReader => DataFrameReader,
+      topic: String,
+      expectation: Seq[Int]): Unit = {
+    val df = spark.read
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("subscribe", topic)
+
+    val df2 = optionFn(df).load().selectExpr("CAST(value AS STRING)")
+    checkAnswer(df2, expectation.map(_.toString).toDF)
+  }
+
   test("reuse same dataframe in query") {
     // This test ensures that we do not cache the Kafka Consumer in KafkaRelation
     val topic = newTopic()
@@ -263,7 +480,8 @@ abstract class KafkaRelationSuiteBase extends QueryTest with SharedSparkSession
     })
   }
 
-  private def testGroupId(groupIdKey: String, validateGroupId: (String, Iterable[String]) => Unit) {
+  private def testGroupId(groupIdKey: String,
+      validateGroupId: (String, Iterable[String]) => Unit): Unit = {
     // Tests code path KafkaSourceProvider.createRelation(.)
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 3)
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
index 84ad41610cccd..d77b9a3b6a9e1 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.kafka010
 
+import java.nio.charset.StandardCharsets.UTF_8
 import java.util.Locale
 import java.util.concurrent.atomic.AtomicInteger
 
@@ -32,7 +33,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{BinaryType, DataType}
+import org.apache.spark.sql.types.{BinaryType, DataType, StringType, StructField, StructType}
 
 abstract class KafkaSinkSuiteBase extends QueryTest with SharedSparkSession with KafkaTest {
   protected var testUtils: KafkaTestUtils = _
@@ -59,13 +60,14 @@ abstract class KafkaSinkSuiteBase extends QueryTest with SharedSparkSession with
 
   protected def newTopic(): String = s"topic-${topicId.getAndIncrement()}"
 
-  protected def createKafkaReader(topic: String): DataFrame = {
+  protected def createKafkaReader(topic: String, includeHeaders: Boolean = false): DataFrame = {
     spark.read
       .format("kafka")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("startingOffsets", "earliest")
       .option("endingOffsets", "latest")
       .option("subscribe", topic)
+      .option("includeHeaders", includeHeaders.toString)
       .load()
   }
 }
@@ -368,15 +370,52 @@ abstract class KafkaSinkBatchSuiteBase extends KafkaSinkSuiteBase {
   test("batch - write to kafka") {
     val topic = newTopic()
     testUtils.createTopic(topic)
-    val df = Seq("1", "2", "3", "4", "5").map(v => (topic, v)).toDF("topic", "value")
+    val data = Seq(
+      Row(topic, "1", Seq(
+        Row("a", "b".getBytes(UTF_8))
+      )),
+      Row(topic, "2", Seq(
+        Row("c", "d".getBytes(UTF_8)),
+        Row("e", "f".getBytes(UTF_8))
+      )),
+      Row(topic, "3", Seq(
+        Row("g", "h".getBytes(UTF_8)),
+        Row("g", "i".getBytes(UTF_8))
+      )),
+      Row(topic, "4", null),
+      Row(topic, "5", Seq(
+        Row("j", "k".getBytes(UTF_8)),
+        Row("j", "l".getBytes(UTF_8)),
+        Row("m", "n".getBytes(UTF_8))
+      ))
+    )
+
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(data),
+      StructType(Seq(StructField("topic", StringType), StructField("value", StringType),
+        StructField("headers", KafkaRecordToRowConverter.headersType)))
+    )
+
     df.write
       .format("kafka")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("topic", topic)
+      .mode("append")
       .save()
     checkAnswer(
-      createKafkaReader(topic).selectExpr("CAST(value as STRING) value"),
-      Row("1") :: Row("2") :: Row("3") :: Row("4") :: Row("5") :: Nil)
+      createKafkaReader(topic, includeHeaders = true).selectExpr(
+        "CAST(value as STRING) value", "headers"
+      ),
+      Row("1", Seq(Row("a", "b".getBytes(UTF_8)))) ::
+        Row("2", Seq(Row("c", "d".getBytes(UTF_8)), Row("e", "f".getBytes(UTF_8)))) ::
+        Row("3", Seq(Row("g", "h".getBytes(UTF_8)), Row("g", "i".getBytes(UTF_8)))) ::
+        Row("4", null) ::
+        Row("5", Seq(
+          Row("j", "k".getBytes(UTF_8)),
+          Row("j", "l".getBytes(UTF_8)),
+          Row("m", "n".getBytes(UTF_8)))) ::
+        Nil
+    )
   }
 
   test("batch - null topic field value, and no topic option") {
@@ -385,12 +424,13 @@ abstract class KafkaSinkBatchSuiteBase extends KafkaSinkSuiteBase {
       df.write
         .format("kafka")
         .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+        .mode("append")
         .save()
     }
     TestUtils.assertExceptionMsg(ex, "null topic present in the data")
   }
 
-  protected def testUnsupportedSaveModes(msg: (SaveMode) => String) {
+  protected def testUnsupportedSaveModes(msg: (SaveMode) => String): Unit = {
     val topic = newTopic()
     testUtils.createTopic(topic)
     val df = Seq[(String, String)](null.asInstanceOf[String] -> "1").toDF("topic", "value")
@@ -419,6 +459,7 @@ abstract class KafkaSinkBatchSuiteBase extends KafkaSinkSuiteBase {
       .format("kafka")
       .option("kafka.bootstrap.servers", testUtils.brokerAddress)
       .option("topic", topic)
+      .mode("append")
       .save()
   }
 }
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala
index 8e6de88865e06..f7b00b31ebba0 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala
@@ -24,7 +24,7 @@ import scala.collection.JavaConverters._
 import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
-import org.apache.spark.sql.sources.v2.reader.Scan
+import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class KafkaSourceProviderSuite extends SparkFunSuite {
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index d7cb30f530396..bbb72bf9973e3 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.kafka010
 
 import java.io.{File, IOException}
 import java.lang.{Integer => JInt}
-import java.net.InetSocketAddress
+import java.net.{InetAddress, InetSocketAddress}
 import java.nio.charset.StandardCharsets
 import java.util.{Collections, Map => JMap, Properties, UUID}
 import java.util.concurrent.TimeUnit
@@ -41,6 +41,8 @@ import org.apache.kafka.clients.consumer.KafkaConsumer
 import org.apache.kafka.clients.producer._
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.config.SaslConfigs
+import org.apache.kafka.common.header.Header
+import org.apache.kafka.common.header.internals.RecordHeader
 import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.security.auth.SecurityProtocol.{PLAINTEXT, SASL_PLAINTEXT}
 import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer}
@@ -66,10 +68,13 @@ class KafkaTestUtils(
 
   private val JAVA_AUTH_CONFIG = "java.security.auth.login.config"
 
+  private val localCanonicalHostName = InetAddress.getLoopbackAddress().getCanonicalHostName()
+  logInfo(s"Local host name is $localCanonicalHostName")
+
   private var kdc: MiniKdc = _
 
   // Zookeeper related configurations
-  private val zkHost = "localhost"
+  private val zkHost = localCanonicalHostName
   private var zkPort: Int = 0
   private val zkConnectionTimeout = 60000
   private val zkSessionTimeout = 10000
@@ -78,12 +83,12 @@ class KafkaTestUtils(
   private var zkUtils: ZkUtils = _
 
   // Kafka broker related configurations
-  private val brokerHost = "localhost"
+  private val brokerHost = localCanonicalHostName
   private var brokerPort = 0
   private var brokerConf: KafkaConfig = _
 
   private val brokerServiceName = "kafka"
-  private val clientUser = "client/localhost"
+  private val clientUser = s"client/$localCanonicalHostName"
   private var clientKeytabFile: File = _
 
   // Kafka broker server
@@ -137,17 +142,17 @@ class KafkaTestUtils(
     assert(kdcReady, "KDC should be set up beforehand")
     val baseDir = Utils.createTempDir()
 
-    val zkServerUser = "zookeeper/localhost"
+    val zkServerUser = s"zookeeper/$localCanonicalHostName"
     val zkServerKeytabFile = new File(baseDir, "zookeeper.keytab")
     kdc.createPrincipal(zkServerKeytabFile, zkServerUser)
     logDebug(s"Created keytab file: ${zkServerKeytabFile.getAbsolutePath()}")
 
-    val zkClientUser = "zkclient/localhost"
+    val zkClientUser = s"zkclient/$localCanonicalHostName"
     val zkClientKeytabFile = new File(baseDir, "zkclient.keytab")
     kdc.createPrincipal(zkClientKeytabFile, zkClientUser)
     logDebug(s"Created keytab file: ${zkClientKeytabFile.getAbsolutePath()}")
 
-    val kafkaServerUser = "kafka/localhost"
+    val kafkaServerUser = s"kafka/$localCanonicalHostName"
     val kafkaServerKeytabFile = new File(baseDir, "kafka.keytab")
     kdc.createPrincipal(kafkaServerKeytabFile, kafkaServerUser)
     logDebug(s"Created keytab file: ${kafkaServerKeytabFile.getAbsolutePath()}")
@@ -348,38 +353,33 @@ class KafkaTestUtils(
     }
   }
 
-  /** Java-friendly function for sending messages to the Kafka broker */
-  def sendMessages(topic: String, messageToFreq: JMap[String, JInt]): Unit = {
-    sendMessages(topic, Map(messageToFreq.asScala.mapValues(_.intValue()).toSeq: _*))
+  def sendMessages(topic: String, msgs: Array[String]): Seq[(String, RecordMetadata)] = {
+    sendMessages(topic, msgs, None)
   }
 
-  /** Send the messages to the Kafka broker */
-  def sendMessages(topic: String, messageToFreq: Map[String, Int]): Unit = {
-    val messages = messageToFreq.flatMap { case (s, freq) => Seq.fill(freq)(s) }.toArray
-    sendMessages(topic, messages)
+  def sendMessages(
+      topic: String,
+      msgs: Array[String],
+      part: Option[Int]): Seq[(String, RecordMetadata)] = {
+    val records = msgs.map { msg =>
+      val builder = new RecordBuilder(topic, msg)
+      part.foreach { p => builder.partition(p) }
+      builder.build()
+    }
+    sendMessages(records)
   }
 
-  /** Send the array of messages to the Kafka broker */
-  def sendMessages(topic: String, messages: Array[String]): Seq[(String, RecordMetadata)] = {
-    sendMessages(topic, messages, None)
+  def sendMessage(msg: ProducerRecord[String, String]): Seq[(String, RecordMetadata)] = {
+    sendMessages(Array(msg))
   }
 
-  /** Send the array of messages to the Kafka broker using specified partition */
-  def sendMessages(
-      topic: String,
-      messages: Array[String],
-      partition: Option[Int]): Seq[(String, RecordMetadata)] = {
+  def sendMessages(msgs: Seq[ProducerRecord[String, String]]): Seq[(String, RecordMetadata)] = {
     producer = new KafkaProducer[String, String](producerConfiguration)
     val offsets = try {
-      messages.map { m =>
-        val record = partition match {
-          case Some(p) => new ProducerRecord[String, String](topic, p, null, m)
-          case None => new ProducerRecord[String, String](topic, m)
-        }
-        val metadata =
-          producer.send(record).get(10, TimeUnit.SECONDS)
-          logInfo(s"\tSent $m to partition ${metadata.partition}, offset ${metadata.offset}")
-        (m, metadata)
+      msgs.map { msg =>
+        val metadata = producer.send(msg).get(10, TimeUnit.SECONDS)
+        logInfo(s"\tSent ($msg) to partition ${metadata.partition}, offset ${metadata.offset}")
+        (msg.value(), metadata)
       }
     } finally {
       if (producer != null) {
@@ -550,7 +550,7 @@ class KafkaTestUtils(
       zkUtils: ZkUtils,
       topic: String,
       numPartitions: Int,
-      servers: Seq[KafkaServer]) {
+      servers: Seq[KafkaServer]): Unit = {
     eventually(timeout(1.minute), interval(200.milliseconds)) {
       try {
         verifyTopicDeletion(topic, numPartitions, servers)
@@ -613,7 +613,7 @@ class KafkaTestUtils(
 
     val actualPort = factory.getLocalPort
 
-    def shutdown() {
+    def shutdown(): Unit = {
       factory.shutdown()
       // The directories are not closed even if the ZooKeeper server is shut down.
       // Please see ZOOKEEPER-1844, which is fixed in 3.4.6+. It leads to test failures
@@ -634,4 +634,3 @@ class KafkaTestUtils(
     }
   }
 }
-
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/RecordBuilder.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/RecordBuilder.scala
new file mode 100644
index 0000000000000..ef07798442e56
--- /dev/null
+++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/RecordBuilder.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.lang.{Integer => JInt, Long => JLong}
+
+import scala.collection.JavaConverters._
+
+import org.apache.kafka.clients.producer.ProducerRecord
+import org.apache.kafka.common.header.Header
+import org.apache.kafka.common.header.internals.RecordHeader
+
+class RecordBuilder(topic: String, value: String) {
+  var _partition: Option[JInt] = None
+  var _timestamp: Option[JLong] = None
+  var _key: Option[String] = None
+  var _headers: Option[Seq[(String, Array[Byte])]] = None
+
+  def partition(part: JInt): RecordBuilder = {
+    _partition = Some(part)
+    this
+  }
+
+  def partition(part: Int): RecordBuilder = {
+    _partition = Some(part.intValue())
+    this
+  }
+
+  def timestamp(ts: JLong): RecordBuilder = {
+    _timestamp = Some(ts)
+    this
+  }
+
+  def timestamp(ts: Long): RecordBuilder = {
+    _timestamp = Some(ts.longValue())
+    this
+  }
+
+  def key(k: String): RecordBuilder = {
+    _key = Some(k)
+    this
+  }
+
+  def headers(hdrs: Seq[(String, Array[Byte])]): RecordBuilder = {
+    _headers = Some(hdrs)
+    this
+  }
+
+  def build(): ProducerRecord[String, String] = {
+    val part = _partition.orNull
+    val ts = _timestamp.orNull
+    val k = _key.orNull
+    val hdrs = _headers.map { h =>
+      h.map { case (k, v) => new RecordHeader(k, v).asInstanceOf[Header] }
+    }.map(_.asJava).orNull
+
+    new ProducerRecord[String, String](topic, part, ts, k, value, hdrs)
+  }
+}
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
index 0c61045d6d487..f54ff0d146f7a 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
@@ -57,6 +57,12 @@ private[spark] case class KafkaConfigUpdater(module: String, kafkaParams: Map[St
   }
 
   def setAuthenticationConfigIfNeeded(): this.type = {
+    val clusterConfig = KafkaTokenUtil.findMatchingTokenClusterConfig(SparkEnv.get.conf,
+      kafkaParams(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG).asInstanceOf[String])
+    setAuthenticationConfigIfNeeded(clusterConfig)
+  }
+
+  def setAuthenticationConfigIfNeeded(clusterConfig: Option[KafkaTokenClusterConf]): this.type = {
     // There are multiple possibilities to log in and applied in the following order:
     // - JVM global security provided -> try to log in with JVM global security configuration
     //   which can be configured for example with 'java.security.auth.login.config'.
@@ -66,10 +72,9 @@ private[spark] case class KafkaConfigUpdater(module: String, kafkaParams: Map[St
     if (KafkaTokenUtil.isGlobalJaasConfigurationProvided) {
       logDebug("JVM global security configuration detected, using it for login.")
     } else {
-      val clusterConfig = KafkaTokenUtil.findMatchingToken(SparkEnv.get.conf,
-        map.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG).asInstanceOf[String])
       clusterConfig.foreach { clusterConf =>
         logDebug("Delegation token detected, using it for login.")
+        setIfUnset(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, clusterConf.securityProtocol)
         val jaasParams = KafkaTokenUtil.getTokenJaasParams(clusterConf)
         set(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
         require(clusterConf.tokenMechanism.startsWith("SCRAM"),
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
index e1f3c800a51f8..ed4a6f1e34c55 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
@@ -57,6 +57,7 @@ private [kafka010] object KafkaTokenSparkConf extends Logging {
   val CLUSTERS_CONFIG_PREFIX = "spark.kafka.clusters."
   val DEFAULT_TARGET_SERVERS_REGEX = ".*"
   val DEFAULT_SASL_KERBEROS_SERVICE_NAME = "kafka"
+  val DEFAULT_SECURITY_PROTOCOL_CONFIG = SASL_SSL.name
   val DEFAULT_SASL_TOKEN_MECHANISM = "SCRAM-SHA-512"
 
   def getClusterConfig(sparkConf: SparkConf, identifier: String): KafkaTokenClusterConf = {
@@ -72,7 +73,8 @@ private [kafka010] object KafkaTokenSparkConf extends Logging {
             s"${configPrefix}auth.${CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG}")),
       sparkClusterConf.getOrElse(s"target.${CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG}.regex",
         KafkaTokenSparkConf.DEFAULT_TARGET_SERVERS_REGEX),
-      sparkClusterConf.getOrElse(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, SASL_SSL.name),
+      sparkClusterConf.getOrElse(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG,
+        DEFAULT_SECURITY_PROTOCOL_CONFIG),
       sparkClusterConf.getOrElse(SaslConfigs.SASL_KERBEROS_SERVICE_NAME,
         KafkaTokenSparkConf.DEFAULT_SASL_KERBEROS_SERVICE_NAME),
       sparkClusterConf.get(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG),
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
index 39e3ac74a9aeb..0ebe98330b4ae 100644
--- a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
+++ b/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
@@ -36,7 +36,7 @@ import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, S
 import org.apache.kafka.common.security.scram.ScramLoginModule
 import org.apache.kafka.common.security.token.delegation.DelegationToken
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
@@ -241,8 +241,8 @@ private[spark] object KafkaTokenUtil extends Logging {
         "TOKENID", "HMAC", "OWNER", "RENEWERS", "ISSUEDATE", "EXPIRYDATE", "MAXDATE"))
       val tokenInfo = token.tokenInfo
       logDebug("%-15s %-15s %-15s %-25s %-15s %-15s %-15s".format(
-        REDACTION_REPLACEMENT_TEXT,
         tokenInfo.tokenId,
+        REDACTION_REPLACEMENT_TEXT,
         tokenInfo.owner,
         tokenInfo.renewersAsString,
         dateFormat.format(tokenInfo.issueTimestamp),
@@ -251,7 +251,7 @@ private[spark] object KafkaTokenUtil extends Logging {
     }
   }
 
-  def findMatchingToken(
+  def findMatchingTokenClusterConfig(
       sparkConf: SparkConf,
       bootStrapServers: String): Option[KafkaTokenClusterConf] = {
     val tokens = UserGroupInformation.getCurrentUser().getCredentials.getAllTokens.asScala
@@ -272,6 +272,7 @@ private[spark] object KafkaTokenUtil extends Logging {
   def getTokenJaasParams(clusterConf: KafkaTokenClusterConf): String = {
     val token = UserGroupInformation.getCurrentUser().getCredentials.getToken(
       getTokenService(clusterConf.identifier))
+    require(token != null, s"Token for identifier ${clusterConf.identifier} must exist")
     val username = new String(token.getIdentifier)
     val password = new String(token.getPassword)
 
@@ -288,4 +289,17 @@ private[spark] object KafkaTokenUtil extends Logging {
 
     params
   }
+
+  def isConnectorUsingCurrentToken(
+      params: ju.Map[String, Object],
+      clusterConfig: Option[KafkaTokenClusterConf]): Boolean = {
+    if (params.containsKey(SaslConfigs.SASL_JAAS_CONFIG)) {
+      logDebug("Delegation token used by connector, checking if uses the latest token.")
+      val consumerJaasParams = params.get(SaslConfigs.SASL_JAAS_CONFIG).asInstanceOf[String]
+      require(clusterConfig.isDefined, "Delegation token must exist for this connector.")
+      getTokenJaasParams(clusterConfig.get) == consumerJaasParams
+    } else {
+      true
+    }
+  }
 }
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
index 7a172892e778c..dc1e7cb8d979e 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
@@ -17,8 +17,13 @@
 
 package org.apache.spark.kafka010
 
+import java.{util => ju}
+
+import scala.collection.JavaConverters._
+
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.common.config.SaslConfigs
+import org.apache.kafka.common.security.auth.SecurityProtocol.SASL_PLAINTEXT
 
 import org.apache.spark.SparkFunSuite
 
@@ -62,36 +67,64 @@ class KafkaConfigUpdaterSuite extends SparkFunSuite with KafkaDelegationTokenTes
   }
 
   test("setAuthenticationConfigIfNeeded with global security should not set values") {
-    val params = Map.empty[String, String]
+    val params = Map(
+      CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers
+    )
+    setSparkEnv(
+      Map(
+        s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
+      )
+    )
     setGlobalKafkaClientConfig()
 
     val updatedParams = KafkaConfigUpdater(testModule, params)
       .setAuthenticationConfigIfNeeded()
       .build()
 
-    assert(updatedParams.size() === 0)
+    assert(updatedParams.asScala === params)
   }
 
   test("setAuthenticationConfigIfNeeded with token should set values") {
     val params = Map(
       CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers
     )
+    testWithTokenSetValues(params) { updatedParams =>
+      assert(updatedParams.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG) ===
+        KafkaTokenSparkConf.DEFAULT_SECURITY_PROTOCOL_CONFIG)
+    }
+  }
+
+  test("setAuthenticationConfigIfNeeded with token should not override user-defined protocol") {
+    val overrideProtocolName = SASL_PLAINTEXT.name
+    val params = Map(
+      CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers,
+      CommonClientConfigs.SECURITY_PROTOCOL_CONFIG -> overrideProtocolName
+    )
+    testWithTokenSetValues(params) { updatedParams =>
+      assert(updatedParams.get(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG) ===
+        overrideProtocolName)
+    }
+  }
+
+  def testWithTokenSetValues(params: Map[String, String])
+      (validate: (ju.Map[String, Object]) => Unit): Unit = {
     setSparkEnv(
       Map(
         s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
       )
     )
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
 
     val updatedParams = KafkaConfigUpdater(testModule, params)
       .setAuthenticationConfigIfNeeded()
       .build()
 
-    assert(updatedParams.size() === 3)
+    assert(updatedParams.size() === 4)
     assert(updatedParams.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG) === bootStrapServers)
     assert(updatedParams.containsKey(SaslConfigs.SASL_JAAS_CONFIG))
     assert(updatedParams.get(SaslConfigs.SASL_MECHANISM) ===
       KafkaTokenSparkConf.DEFAULT_SASL_TOKEN_MECHANISM)
+    validate(updatedParams)
   }
 
   test("setAuthenticationConfigIfNeeded with invalid mechanism should throw exception") {
@@ -104,7 +137,7 @@ class KafkaConfigUpdaterSuite extends SparkFunSuite with KafkaDelegationTokenTes
         s"spark.kafka.clusters.$identifier1.sasl.token.mechanism" -> "intentionally_invalid"
       )
     )
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
 
     val e = intercept[IllegalArgumentException] {
       KafkaConfigUpdater(testModule, params)
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
index eebbf96afa470..19335f4221e40 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
@@ -37,8 +37,12 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
 
   private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
 
-  protected val tokenId = "tokenId" + ju.UUID.randomUUID().toString
-  protected val tokenPassword = "tokenPassword" + ju.UUID.randomUUID().toString
+  private var savedSparkEnv: SparkEnv = _
+
+  protected val tokenId1 = "tokenId" + ju.UUID.randomUUID().toString
+  protected val tokenPassword1 = "tokenPassword" + ju.UUID.randomUUID().toString
+  protected val tokenId2 = "tokenId" + ju.UUID.randomUUID().toString
+  protected val tokenPassword2 = "tokenPassword" + ju.UUID.randomUUID().toString
 
   protected val identifier1 = "cluster1"
   protected val identifier2 = "cluster2"
@@ -72,11 +76,16 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
     }
   }
 
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    savedSparkEnv = SparkEnv.get
+  }
+
   override def afterEach(): Unit = {
     try {
       Configuration.setConfiguration(null)
-      UserGroupInformation.setLoginUser(null)
-      SparkEnv.set(null)
+      UserGroupInformation.reset()
+      SparkEnv.set(savedSparkEnv)
     } finally {
       super.afterEach()
     }
@@ -86,7 +95,7 @@ trait KafkaDelegationTokenTest extends BeforeAndAfterEach {
     Configuration.setConfiguration(new KafkaJaasConfiguration)
   }
 
-  protected def addTokenToUGI(tokenService: Text): Unit = {
+  protected def addTokenToUGI(tokenService: Text, tokenId: String, tokenPassword: String): Unit = {
     val token = new Token[KafkaDelegationTokenIdentifier](
       tokenId.getBytes,
       tokenPassword.getBytes,
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala
index 42a9fb5567b6f..225afbe5f3649 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala
@@ -68,7 +68,7 @@ class KafkaRedactionUtilSuite extends SparkFunSuite with KafkaDelegationTokenTes
   test("redactParams should redact token password from parameters") {
     setSparkEnv(Map.empty)
     val groupId = "id-" + ju.UUID.randomUUID().toString
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
     val clusterConf = createClusterConf(identifier1, SASL_SSL.name)
     val jaasParams = KafkaTokenUtil.getTokenJaasParams(clusterConf)
     val kafkaParams = Seq(
@@ -81,8 +81,8 @@ class KafkaRedactionUtilSuite extends SparkFunSuite with KafkaDelegationTokenTes
     assert(redactedParams.size === 2)
     assert(redactedParams.get(ConsumerConfig.GROUP_ID_CONFIG).get === groupId)
     val redactedJaasParams = redactedParams.get(SaslConfigs.SASL_JAAS_CONFIG).get
-    assert(redactedJaasParams.contains(tokenId))
-    assert(!redactedJaasParams.contains(tokenPassword))
+    assert(redactedJaasParams.contains(tokenId1))
+    assert(!redactedJaasParams.contains(tokenPassword1))
   }
 
   test("redactParams should redact passwords from parameters") {
@@ -113,13 +113,13 @@ class KafkaRedactionUtilSuite extends SparkFunSuite with KafkaDelegationTokenTes
   }
 
   test("redactJaasParam should redact token password") {
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
     val clusterConf = createClusterConf(identifier1, SASL_SSL.name)
     val jaasParams = KafkaTokenUtil.getTokenJaasParams(clusterConf)
 
     val redactedJaasParams = KafkaRedactionUtil.redactJaasParam(jaasParams)
 
-    assert(redactedJaasParams.contains(tokenId))
-    assert(!redactedJaasParams.contains(tokenPassword))
+    assert(redactedJaasParams.contains(tokenId1))
+    assert(!redactedJaasParams.contains(tokenPassword1))
   }
 }
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
index 5496195b41490..6fa1b56bff977 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
+++ b/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
@@ -17,15 +17,18 @@
 
 package org.apache.spark.kafka010
 
+import java.{util => ju}
 import java.security.PrivilegedExceptionAction
 
+import scala.collection.JavaConverters._
+
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.security.UserGroupInformation
 import org.apache.kafka.clients.CommonClientConfigs
 import org.apache.kafka.common.config.{SaslConfigs, SslConfigs}
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, SASL_SSL, SSL}
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
 import org.apache.spark.internal.config._
 
 class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
@@ -174,58 +177,102 @@ class KafkaTokenUtilSuite extends SparkFunSuite with KafkaDelegationTokenTest {
     assert(KafkaTokenUtil.isGlobalJaasConfigurationProvided)
   }
 
-  test("findMatchingToken without token should return None") {
-    assert(KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers) === None)
+  test("findMatchingTokenClusterConfig without token should return None") {
+    assert(KafkaTokenUtil.findMatchingTokenClusterConfig(sparkConf, bootStrapServers) === None)
   }
 
-  test("findMatchingToken with non-matching tokens should return None") {
+  test("findMatchingTokenClusterConfig with non-matching tokens should return None") {
     sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.target.bootstrap.servers.regex",
       nonMatchingTargetServersRegex)
     sparkConf.set(s"spark.kafka.clusters.$identifier2.bootstrap.servers", bootStrapServers)
     sparkConf.set(s"spark.kafka.clusters.$identifier2.target.bootstrap.servers.regex",
       matchingTargetServersRegex)
-    addTokenToUGI(tokenService1)
-    addTokenToUGI(new Text("intentionally_garbage"))
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+    addTokenToUGI(new Text("intentionally_garbage"), tokenId1, tokenPassword1)
 
-    assert(KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers) === None)
+    assert(KafkaTokenUtil.findMatchingTokenClusterConfig(sparkConf, bootStrapServers) === None)
   }
 
-  test("findMatchingToken with one matching token should return cluster configuration") {
+  test("findMatchingTokenClusterConfig with one matching token should return token and cluster " +
+    "configuration") {
     sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.target.bootstrap.servers.regex",
       matchingTargetServersRegex)
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
 
-    assert(KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers) ===
-      Some(KafkaTokenSparkConf.getClusterConfig(sparkConf, identifier1)))
+    val clusterConfig = KafkaTokenUtil.findMatchingTokenClusterConfig(sparkConf, bootStrapServers)
+    assert(clusterConfig.get === KafkaTokenSparkConf.getClusterConfig(sparkConf, identifier1))
   }
 
-  test("findMatchingToken with multiple matching tokens should throw exception") {
+  test("findMatchingTokenClusterConfig with multiple matching tokens should throw exception") {
     sparkConf.set(s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers", bootStrapServers)
     sparkConf.set(s"spark.kafka.clusters.$identifier1.target.bootstrap.servers.regex",
       matchingTargetServersRegex)
     sparkConf.set(s"spark.kafka.clusters.$identifier2.auth.bootstrap.servers", bootStrapServers)
     sparkConf.set(s"spark.kafka.clusters.$identifier2.target.bootstrap.servers.regex",
       matchingTargetServersRegex)
-    addTokenToUGI(tokenService1)
-    addTokenToUGI(tokenService2)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+    addTokenToUGI(tokenService2, tokenId1, tokenPassword1)
 
     val thrown = intercept[IllegalArgumentException] {
-      KafkaTokenUtil.findMatchingToken(sparkConf, bootStrapServers)
+      KafkaTokenUtil.findMatchingTokenClusterConfig(sparkConf, bootStrapServers)
     }
     assert(thrown.getMessage.contains("More than one delegation token matches"))
   }
 
   test("getTokenJaasParams with token should return scram module") {
-    addTokenToUGI(tokenService1)
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
     val clusterConf = createClusterConf(identifier1, SASL_SSL.name)
 
     val jaasParams = KafkaTokenUtil.getTokenJaasParams(clusterConf)
 
     assert(jaasParams.contains("ScramLoginModule required"))
     assert(jaasParams.contains("tokenauth=true"))
-    assert(jaasParams.contains(tokenId))
-    assert(jaasParams.contains(tokenPassword))
+    assert(jaasParams.contains(tokenId1))
+    assert(jaasParams.contains(tokenPassword1))
+  }
+
+  test("isConnectorUsingCurrentToken without security should return true") {
+    val kafkaParams = Map[String, Object]().asJava
+
+    assert(KafkaTokenUtil.isConnectorUsingCurrentToken(kafkaParams, None))
+  }
+
+  test("isConnectorUsingCurrentToken with same token should return true") {
+    setSparkEnv(
+      Map(
+        s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
+      )
+    )
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+    val kafkaParams = getKafkaParams()
+    val clusterConfig = KafkaTokenUtil.findMatchingTokenClusterConfig(SparkEnv.get.conf,
+      kafkaParams.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG).asInstanceOf[String])
+
+    assert(KafkaTokenUtil.isConnectorUsingCurrentToken(kafkaParams, clusterConfig))
+  }
+
+  test("isConnectorUsingCurrentToken with different token should return false") {
+    setSparkEnv(
+      Map(
+        s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
+      )
+    )
+    addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+    val kafkaParams = getKafkaParams()
+    addTokenToUGI(tokenService1, tokenId2, tokenPassword2)
+    val clusterConfig = KafkaTokenUtil.findMatchingTokenClusterConfig(SparkEnv.get.conf,
+      kafkaParams.get(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG).asInstanceOf[String])
+
+    assert(!KafkaTokenUtil.isConnectorUsingCurrentToken(kafkaParams, clusterConfig))
+  }
+
+  private def getKafkaParams(): ju.Map[String, Object] = {
+    val clusterConf = createClusterConf(identifier1, SASL_SSL.name)
+    Map[String, Object](
+      CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG -> bootStrapServers,
+      SaslConfigs.SASL_JAAS_CONFIG -> KafkaTokenUtil.getTokenJaasParams(clusterConf)
+    ).asJava
   }
 }
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
index 397de87d3cdff..d11569d709b23 100644
--- a/external/kafka-0-10/pom.xml
+++ b/external/kafka-0-10/pom.xml
@@ -45,6 +45,13 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
index 4d3e476e7cc58..925327d9d58e6 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.streaming.kafka010
 
 import java.io.File
-import java.lang.{ Long => JLong }
-import java.util.{ Arrays, HashMap => JHashMap, Map => JMap, UUID }
+import java.lang.{Long => JLong}
+import java.util.{Arrays, HashMap => JHashMap, Map => JMap, UUID}
 import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.concurrent.atomic.AtomicLong
@@ -31,13 +31,12 @@ import scala.util.Random
 import org.apache.kafka.clients.consumer._
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.serialization.StringDeserializer
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.streaming.{Milliseconds, StreamingContext, Time}
+import org.apache.spark.streaming.{LocalStreamingContext, Milliseconds, StreamingContext, Time}
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.streaming.scheduler._
 import org.apache.spark.streaming.scheduler.rate.RateEstimator
@@ -45,8 +44,7 @@ import org.apache.spark.util.Utils
 
 class DirectKafkaStreamSuite
   extends SparkFunSuite
-  with BeforeAndAfter
-  with BeforeAndAfterAll
+  with LocalStreamingContext
   with Eventually
   with Logging {
   val sparkConf = new SparkConf()
@@ -56,18 +54,17 @@ class DirectKafkaStreamSuite
     // Otherwise the poll timeout defaults to 2 minutes and causes test cases to run longer.
     .set("spark.streaming.kafka.consumer.poll.ms", "10000")
 
-  private var ssc: StreamingContext = _
   private var testDir: File = _
 
   private var kafkaTestUtils: KafkaTestUtils = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     kafkaTestUtils = new KafkaTestUtils
     kafkaTestUtils.setup()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       if (kafkaTestUtils != null) {
         kafkaTestUtils.teardown()
@@ -78,12 +75,13 @@ class DirectKafkaStreamSuite
     }
   }
 
-  after {
-    if (ssc != null) {
-      ssc.stop(stopSparkContext = true)
-    }
-    if (testDir != null) {
-      Utils.deleteRecursively(testDir)
+  override def afterEach(): Unit = {
+    try {
+      if (testDir != null) {
+        Utils.deleteRecursively(testDir)
+      }
+    } finally {
+      super.afterEach()
     }
   }
 
@@ -342,7 +340,7 @@ class DirectKafkaStreamSuite
     val kafkaParams = getKafkaParams("auto.offset.reset" -> "earliest")
 
     // Send data to Kafka
-    def sendData(data: Seq[Int]) {
+    def sendData(data: Seq[Int]): Unit = {
       val strings = data.map { _.toString}
       kafkaTestUtils.sendMessages(topic, strings.map { _ -> 1}.toMap)
     }
@@ -434,7 +432,7 @@ class DirectKafkaStreamSuite
     val committed = new ConcurrentHashMap[TopicPartition, OffsetAndMetadata]()
 
     // Send data to Kafka and wait for it to be received
-    def sendDataAndWaitForReceive(data: Seq[Int]) {
+    def sendDataAndWaitForReceive(data: Seq[Int]): Unit = {
       val strings = data.map { _.toString}
       kafkaTestUtils.sendMessages(topic, strings.map { _ -> 1}.toMap)
       eventually(timeout(10.seconds), interval(50.milliseconds)) {
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
index 431473e7f1d38..82913cf416a5f 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
@@ -27,7 +27,7 @@ import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.serialization.ByteArrayDeserializer
 import org.mockito.Mockito.when
 import org.scalatest.BeforeAndAfterAll
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
index 47bc8fec2c80c..d6123e16dd238 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
@@ -47,14 +47,14 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
 
   private var sc: SparkContext = _
 
-  override def beforeAll {
+  override def beforeAll: Unit = {
     super.beforeAll()
     sc = new SparkContext(sparkConf)
     kafkaTestUtils = new KafkaTestUtils
     kafkaTestUtils.setup()
   }
 
-  override def afterAll {
+  override def afterAll: Unit = {
     try {
       try {
         if (sc != null) {
@@ -81,7 +81,8 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
 
   private val preferredHosts = LocationStrategies.PreferConsistent
 
-  private def compactLogs(topic: String, partition: Int, messages: Array[(String, String)]) {
+  private def compactLogs(topic: String, partition: Int,
+      messages: Array[(String, String)]): Unit = {
     val mockTime = new MockTime()
     val logs = new Pool[TopicPartition, Log]()
     val logDir = kafkaTestUtils.brokerLogDir
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
index 5dec9709011e6..999870acfb532 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
@@ -316,7 +316,7 @@ private[kafka010] class KafkaTestUtils extends Logging {
 
     val actualPort = factory.getLocalPort
 
-    def shutdown() {
+    def shutdown(): Unit = {
       factory.shutdown()
       // The directories are not closed even if the ZooKeeper server is shut down.
       // Please see ZOOKEEPER-1844, which is fixed in 3.4.6+. It leads to test failures
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala
index dedd691cd1b23..d38ed9fc9263d 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala
+++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala
@@ -45,7 +45,7 @@ private[kafka010] class MockTime(@volatile private var currentMs: Long) extends
   override def nanoseconds: Long =
     TimeUnit.NANOSECONDS.convert(currentMs, TimeUnit.MILLISECONDS)
 
-  override def sleep(ms: Long) {
+  override def sleep(ms: Long): Unit = {
     this.currentMs += ms
     scheduler.tick()
   }
diff --git a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
index 86c42df9e8435..31ca2fe5c95ff 100644
--- a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
+++ b/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
@@ -32,13 +32,14 @@
 import org.apache.spark.streaming.api.java.JavaDStream;
 import org.apache.spark.streaming.api.java.JavaPairDStream;
 import org.apache.spark.streaming.api.java.JavaStreamingContext;
-import org.apache.spark.streaming.kinesis.KinesisUtils;
 
+import org.apache.spark.streaming.kinesis.KinesisInitialPositions;
+import org.apache.spark.streaming.kinesis.KinesisInputDStream;
 import scala.Tuple2;
+import scala.reflect.ClassTag$;
 
 import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
 import com.amazonaws.services.kinesis.AmazonKinesisClient;
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
 
 /**
  * Consumes messages from a Amazon Kinesis streams and does wordcount.
@@ -135,11 +136,19 @@ public static void main(String[] args) throws Exception {
     // Create the Kinesis DStreams
     List<JavaDStream<byte[]>> streamsList = new ArrayList<>(numStreams);
     for (int i = 0; i < numStreams; i++) {
-      streamsList.add(
-          KinesisUtils.createStream(jssc, kinesisAppName, streamName, endpointUrl, regionName,
-              InitialPositionInStream.LATEST, kinesisCheckpointInterval,
-              StorageLevel.MEMORY_AND_DISK_2())
-      );
+      streamsList.add(JavaDStream.fromDStream(
+          KinesisInputDStream.builder()
+              .streamingContext(jssc)
+              .checkpointAppName(kinesisAppName)
+              .streamName(streamName)
+              .endpointUrl(endpointUrl)
+              .regionName(regionName)
+              .initialPosition(new KinesisInitialPositions.Latest())
+              .checkpointInterval(kinesisCheckpointInterval)
+              .storageLevel(StorageLevel.MEMORY_AND_DISK_2())
+              .build(),
+          ClassTag$.MODULE$.apply(byte[].class)
+      ));
     }
 
     // Union all the streams if there is more than 1 stream
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
index fcb790e3ea1f9..a5d5ac769b28d 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
@@ -73,7 +73,7 @@ import org.apache.spark.streaming.kinesis.KinesisInputDStream
  * the Kinesis Spark Streaming integration.
  */
 object KinesisWordCountASL extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // Check that all required args were passed in.
     if (args.length != 3) {
       System.err.println(
@@ -178,7 +178,7 @@ object KinesisWordCountASL extends Logging {
  *         https://kinesis.us-east-1.amazonaws.com us-east-1 10 5
  */
 object KinesisWordProducerASL {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 4) {
       System.err.println(
         """
@@ -269,7 +269,7 @@ object KinesisWordProducerASL {
  */
 private[streaming] object StreamingExamples extends Logging {
   // Set reasonable logging levels for streaming if the user has not configured log4j.
-  def setStreamingLogLevels() {
+  def setStreamingLogLevels(): Unit = {
     val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
     if (!log4jInitialized) {
       // We first log something to initialize Spark's default logging, then we override the
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
index 5fb83b26f8382..11e949536f2b6 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
@@ -68,7 +68,7 @@ private[kinesis] class KinesisCheckpointer(
     if (checkpointer != null) {
       try {
         // We must call `checkpoint()` with no parameter to finish reading shards.
-        // See an URL below for details:
+        // See a URL below for details:
         // https://forums.aws.amazon.com/thread.jspa?threadID=244218
         KinesisRecordProcessor.retryRandom(checkpointer.checkpoint(), 4, 100)
       } catch {
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
index 608da0b8bf563..8c3931a1c87fd 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
@@ -19,7 +19,9 @@ package org.apache.spark.streaming.kinesis
 
 import scala.reflect.ClassTag
 
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import collection.JavaConverters._
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{InitialPositionInStream, KinesisClientLibConfiguration}
+import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel
 import com.amazonaws.services.kinesis.model.Record
 
 import org.apache.spark.rdd.RDD
@@ -43,7 +45,9 @@ private[kinesis] class KinesisInputDStream[T: ClassTag](
     val messageHandler: Record => T,
     val kinesisCreds: SparkAWSCredentials,
     val dynamoDBCreds: Option[SparkAWSCredentials],
-    val cloudWatchCreds: Option[SparkAWSCredentials]
+    val cloudWatchCreds: Option[SparkAWSCredentials],
+    val metricsLevel: MetricsLevel,
+    val metricsEnabledDimensions: Set[String]
   ) extends ReceiverInputDStream[T](_ssc) {
 
   import KinesisReadConfigurations._
@@ -79,7 +83,8 @@ private[kinesis] class KinesisInputDStream[T: ClassTag](
   override def getReceiver(): Receiver[T] = {
     new KinesisReceiver(streamName, endpointUrl, regionName, initialPosition,
       checkpointAppName, checkpointInterval, _storageLevel, messageHandler,
-      kinesisCreds, dynamoDBCreds, cloudWatchCreds)
+      kinesisCreds, dynamoDBCreds, cloudWatchCreds,
+      metricsLevel, metricsEnabledDimensions)
   }
 }
 
@@ -104,6 +109,8 @@ object KinesisInputDStream {
     private var kinesisCredsProvider: Option[SparkAWSCredentials] = None
     private var dynamoDBCredsProvider: Option[SparkAWSCredentials] = None
     private var cloudWatchCredsProvider: Option[SparkAWSCredentials] = None
+    private var metricsLevel: Option[MetricsLevel] = None
+    private var metricsEnabledDimensions: Option[Set[String]] = None
 
     /**
      * Sets the StreamingContext that will be used to construct the Kinesis DStream. This is a
@@ -237,6 +244,7 @@ object KinesisInputDStream {
      * endpoint. Defaults to [[DefaultCredentialsProvider]] if no custom value is specified.
      *
      * @param credentials [[SparkAWSCredentials]] to use for Kinesis authentication
+     * @return Reference to this [[KinesisInputDStream.Builder]]
      */
     def kinesisCredentials(credentials: SparkAWSCredentials): Builder = {
       kinesisCredsProvider = Option(credentials)
@@ -248,6 +256,7 @@ object KinesisInputDStream {
      * endpoint. Will use the same credentials used for AWS Kinesis if no custom value is set.
      *
      * @param credentials [[SparkAWSCredentials]] to use for DynamoDB authentication
+     * @return Reference to this [[KinesisInputDStream.Builder]]
      */
     def dynamoDBCredentials(credentials: SparkAWSCredentials): Builder = {
       dynamoDBCredsProvider = Option(credentials)
@@ -259,12 +268,43 @@ object KinesisInputDStream {
      * endpoint. Will use the same credentials used for AWS Kinesis if no custom value is set.
      *
      * @param credentials [[SparkAWSCredentials]] to use for CloudWatch authentication
+     * @return Reference to this [[KinesisInputDStream.Builder]]
      */
     def cloudWatchCredentials(credentials: SparkAWSCredentials): Builder = {
       cloudWatchCredsProvider = Option(credentials)
       this
     }
 
+    /**
+     * Sets the CloudWatch metrics level. Defaults to
+     * [[KinesisClientLibConfiguration.DEFAULT_METRICS_LEVEL]] if no custom value is specified.
+     *
+     * @param metricsLevel [[MetricsLevel]] to specify the CloudWatch metrics level
+     * @return Reference to this [[KinesisInputDStream.Builder]]
+     * @see
+     * [[https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html#metric-levels]]
+     */
+    def metricsLevel(metricsLevel: MetricsLevel): Builder = {
+      this.metricsLevel = Option(metricsLevel)
+      this
+    }
+
+    /**
+     * Sets the enabled CloudWatch metrics dimensions. Defaults to
+     * [[KinesisClientLibConfiguration.DEFAULT_METRICS_ENABLED_DIMENSIONS]]
+     * if no custom value is specified.
+     *
+     * @param metricsEnabledDimensions Set[String] to specify which CloudWatch metrics dimensions
+     *   should be enabled
+     * @return Reference to this [[KinesisInputDStream.Builder]]
+     * @see
+     * [[https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html#metric-levels]]
+     */
+    def metricsEnabledDimensions(metricsEnabledDimensions: Set[String]): Builder = {
+      this.metricsEnabledDimensions = Option(metricsEnabledDimensions)
+      this
+    }
+
     /**
      * Create a new instance of [[KinesisInputDStream]] with configured parameters and the provided
      * message handler.
@@ -287,7 +327,9 @@ object KinesisInputDStream {
         ssc.sc.clean(handler),
         kinesisCredsProvider.getOrElse(DefaultCredentials),
         dynamoDBCredsProvider,
-        cloudWatchCredsProvider)
+        cloudWatchCredsProvider,
+        metricsLevel.getOrElse(DEFAULT_METRICS_LEVEL),
+        metricsEnabledDimensions.getOrElse(DEFAULT_METRICS_ENABLED_DIMENSIONS))
     }
 
     /**
@@ -324,4 +366,8 @@ object KinesisInputDStream {
   private[kinesis] val DEFAULT_KINESIS_REGION_NAME: String = "us-east-1"
   private[kinesis] val DEFAULT_INITIAL_POSITION: KinesisInitialPosition = new Latest()
   private[kinesis] val DEFAULT_STORAGE_LEVEL: StorageLevel = StorageLevel.MEMORY_AND_DISK_2
+  private[kinesis] val DEFAULT_METRICS_LEVEL: MetricsLevel =
+    KinesisClientLibConfiguration.DEFAULT_METRICS_LEVEL
+  private[kinesis] val DEFAULT_METRICS_ENABLED_DIMENSIONS: Set[String] =
+    KinesisClientLibConfiguration.DEFAULT_METRICS_ENABLED_DIMENSIONS.asScala.toSet
 }
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
index 69c52365b1bf8..6feb8f1b5598f 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -25,6 +25,7 @@ import scala.util.control.NonFatal
 
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.{IRecordProcessor, IRecordProcessorCheckpointer, IRecordProcessorFactory}
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{KinesisClientLibConfiguration, Worker}
+import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel
 import com.amazonaws.services.kinesis.model.Record
 
 import org.apache.spark.internal.Logging
@@ -92,7 +93,9 @@ private[kinesis] class KinesisReceiver[T](
     messageHandler: Record => T,
     kinesisCreds: SparkAWSCredentials,
     dynamoDBCreds: Option[SparkAWSCredentials],
-    cloudWatchCreds: Option[SparkAWSCredentials])
+    cloudWatchCreds: Option[SparkAWSCredentials],
+    metricsLevel: MetricsLevel,
+    metricsEnabledDimensions: Set[String])
   extends Receiver[T](storageLevel) with Logging { receiver =>
 
   /*
@@ -143,7 +146,7 @@ private[kinesis] class KinesisReceiver[T](
    * This is called when the KinesisReceiver starts and must be non-blocking.
    * The KCL creates and manages the receiving/processing thread pool through Worker.run().
    */
-  override def onStart() {
+  override def onStart(): Unit = {
     blockGenerator = supervisor.createBlockGenerator(new GeneratedBlockHandler)
 
     workerId = Utils.localHostName() + ":" + UUID.randomUUID()
@@ -162,6 +165,8 @@ private[kinesis] class KinesisReceiver[T](
         .withKinesisEndpoint(endpointUrl)
         .withTaskBackoffTimeMillis(500)
         .withRegionName(regionName)
+        .withMetricsLevel(metricsLevel)
+        .withMetricsEnabledDimensions(metricsEnabledDimensions.asJava)
 
       // Update the Kinesis client lib config with timestamp
       // if InitialPositionInStream.AT_TIMESTAMP is passed
@@ -211,7 +216,7 @@ private[kinesis] class KinesisReceiver[T](
    * The KCL worker.shutdown() method stops the receiving/processing threads.
    * The KCL will do its best to drain and checkpoint any in-flight records upon shutdown.
    */
-  override def onStop() {
+  override def onStop(): Unit = {
     if (workerThread != null) {
       if (worker != null) {
         worker.shutdown()
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index 8c6a399dd763e..b35573e92e168 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -51,7 +51,7 @@ private[kinesis] class KinesisRecordProcessor[T](receiver: KinesisReceiver[T], w
    *
    * @param shardId assigned by the KCL to this particular RecordProcessor.
    */
-  override def initialize(shardId: String) {
+  override def initialize(shardId: String): Unit = {
     this.shardId = shardId
     logInfo(s"Initialized workerId $workerId with shardId $shardId")
   }
@@ -65,7 +65,8 @@ private[kinesis] class KinesisRecordProcessor[T](receiver: KinesisReceiver[T], w
    * @param checkpointer used to update Kinesis when this batch has been processed/stored
    *   in the DStream
    */
-  override def processRecords(batch: List[Record], checkpointer: IRecordProcessorCheckpointer) {
+  override def processRecords(batch: List[Record],
+      checkpointer: IRecordProcessorCheckpointer): Unit = {
     if (!receiver.isStopped()) {
       try {
         // Limit the number of processed records from Kinesis stream. This is because the KCL cannot
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
deleted file mode 100644
index c60b9896a3473..0000000000000
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtils.scala
+++ /dev/null
@@ -1,632 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.streaming.kinesis
-
-import scala.reflect.ClassTag
-
-import com.amazonaws.regions.RegionUtils
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
-import com.amazonaws.services.kinesis.model.Record
-
-import org.apache.spark.api.java.function.{Function => JFunction}
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.streaming.{Duration, StreamingContext}
-import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext}
-import org.apache.spark.streaming.dstream.ReceiverInputDStream
-
-object KinesisUtils {
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param ssc StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   *
-   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T: ClassTag](
-      ssc: StreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: Record => T): ReceiverInputDStream[T] = {
-    val cleanedHandler = ssc.sc.clean(messageHandler)
-    // Setting scope to override receiver stream's scope of "receiver stream"
-    ssc.withNamedScope("kinesis stream") {
-      new KinesisInputDStream[T](ssc, streamName, endpointUrl, validateRegion(regionName),
-        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
-        kinesisAppName, checkpointInterval, storageLevel,
-        cleanedHandler, DefaultCredentials, None, None)
-    }
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param ssc StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  // scalastyle:off
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T: ClassTag](
-      ssc: StreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: Record => T,
-      awsAccessKeyId: String,
-      awsSecretKey: String): ReceiverInputDStream[T] = {
-    // scalastyle:on
-    val cleanedHandler = ssc.sc.clean(messageHandler)
-    ssc.withNamedScope("kinesis stream") {
-      val kinesisCredsProvider = BasicCredentials(
-        awsAccessKeyId = awsAccessKeyId,
-        awsSecretKey = awsSecretKey)
-      new KinesisInputDStream[T](ssc, streamName, endpointUrl, validateRegion(regionName),
-        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
-        kinesisAppName, checkpointInterval, storageLevel,
-        cleanedHandler, kinesisCredsProvider, None, None)
-    }
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param ssc StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param stsAssumeRoleArn ARN of IAM role to assume when using STS sessions to read from
-   *                         Kinesis stream.
-   * @param stsSessionName Name to uniquely identify STS sessions if multiple principals assume
-   *                       the same role.
-   * @param stsExternalId External ID that can be used to validate against the assumed IAM role's
-   *                      trust policy.
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  // scalastyle:off
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T: ClassTag](
-      ssc: StreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: Record => T,
-      awsAccessKeyId: String,
-      awsSecretKey: String,
-      stsAssumeRoleArn: String,
-      stsSessionName: String,
-      stsExternalId: String): ReceiverInputDStream[T] = {
-    // scalastyle:on
-    val cleanedHandler = ssc.sc.clean(messageHandler)
-    ssc.withNamedScope("kinesis stream") {
-      val kinesisCredsProvider = STSCredentials(
-        stsRoleArn = stsAssumeRoleArn,
-        stsSessionName = stsSessionName,
-        stsExternalId = Option(stsExternalId),
-        longLivedCreds = BasicCredentials(
-          awsAccessKeyId = awsAccessKeyId,
-          awsSecretKey = awsSecretKey))
-      new KinesisInputDStream[T](ssc, streamName, endpointUrl, validateRegion(regionName),
-        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
-        kinesisAppName, checkpointInterval, storageLevel,
-        cleanedHandler, kinesisCredsProvider, None, None)
-    }
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param ssc StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   *
-   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream(
-      ssc: StreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel): ReceiverInputDStream[Array[Byte]] = {
-    // Setting scope to override receiver stream's scope of "receiver stream"
-    ssc.withNamedScope("kinesis stream") {
-      new KinesisInputDStream[Array[Byte]](ssc, streamName, endpointUrl, validateRegion(regionName),
-        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
-        kinesisAppName, checkpointInterval, storageLevel,
-        KinesisInputDStream.defaultMessageHandler, DefaultCredentials, None, None)
-    }
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param ssc StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream(
-      ssc: StreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      awsAccessKeyId: String,
-      awsSecretKey: String): ReceiverInputDStream[Array[Byte]] = {
-    ssc.withNamedScope("kinesis stream") {
-      val kinesisCredsProvider = BasicCredentials(
-        awsAccessKeyId = awsAccessKeyId,
-        awsSecretKey = awsSecretKey)
-      new KinesisInputDStream[Array[Byte]](ssc, streamName, endpointUrl, validateRegion(regionName),
-        KinesisInitialPositions.fromKinesisInitialPosition(initialPositionInStream),
-        kinesisAppName, checkpointInterval, storageLevel,
-        KinesisInputDStream.defaultMessageHandler, kinesisCredsProvider, None, None)
-    }
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param jssc Java StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   * @param recordClass Class of the records in DStream
-   *
-   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T](
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: JFunction[Record, T],
-      recordClass: Class[T]): JavaReceiverInputDStream[T] = {
-    implicit val recordCmt: ClassTag[T] = ClassTag(recordClass)
-    val cleanedHandler = jssc.sparkContext.clean(messageHandler.call(_))
-    createStream[T](jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-      initialPositionInStream, checkpointInterval, storageLevel, cleanedHandler)
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param jssc Java StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   * @param recordClass Class of the records in DStream
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  // scalastyle:off
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T](
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: JFunction[Record, T],
-      recordClass: Class[T],
-      awsAccessKeyId: String,
-      awsSecretKey: String): JavaReceiverInputDStream[T] = {
-    // scalastyle:on
-    implicit val recordCmt: ClassTag[T] = ClassTag(recordClass)
-    val cleanedHandler = jssc.sparkContext.clean(messageHandler.call(_))
-    createStream[T](jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-      initialPositionInStream, checkpointInterval, storageLevel, cleanedHandler,
-      awsAccessKeyId, awsSecretKey)
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param jssc Java StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param messageHandler A custom message handler that can generate a generic output from a
-   *                       Kinesis `Record`, which contains both message data, and metadata.
-   * @param recordClass Class of the records in DStream
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param stsAssumeRoleArn ARN of IAM role to assume when using STS sessions to read from
-   *                         Kinesis stream.
-   * @param stsSessionName Name to uniquely identify STS sessions if multiple princpals assume
-   *                       the same role.
-   * @param stsExternalId External ID that can be used to validate against the assumed IAM role's
-   *                      trust policy.
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  // scalastyle:off
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream[T](
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      messageHandler: JFunction[Record, T],
-      recordClass: Class[T],
-      awsAccessKeyId: String,
-      awsSecretKey: String,
-      stsAssumeRoleArn: String,
-      stsSessionName: String,
-      stsExternalId: String): JavaReceiverInputDStream[T] = {
-    // scalastyle:on
-    implicit val recordCmt: ClassTag[T] = ClassTag(recordClass)
-    val cleanedHandler = jssc.sparkContext.clean(messageHandler.call(_))
-    createStream[T](jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-      initialPositionInStream, checkpointInterval, storageLevel, cleanedHandler,
-      awsAccessKeyId, awsSecretKey, stsAssumeRoleArn, stsSessionName, stsExternalId)
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param jssc Java StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   *
-   * @note The AWS credentials will be discovered using the DefaultAWSCredentialsProviderChain
-   * on the workers. See AWS documentation to understand how DefaultAWSCredentialsProviderChain
-   * gets the AWS credentials.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream(
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel
-    ): JavaReceiverInputDStream[Array[Byte]] = {
-    createStream[Array[Byte]](jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-      initialPositionInStream, checkpointInterval, storageLevel,
-      KinesisInputDStream.defaultMessageHandler(_))
-  }
-
-  /**
-   * Create an input stream that pulls messages from a Kinesis stream.
-   * This uses the Kinesis Client Library (KCL) to pull messages from Kinesis.
-   *
-   * @param jssc Java StreamingContext object
-   * @param kinesisAppName  Kinesis application name used by the Kinesis Client Library
-   *                        (KCL) to update DynamoDB
-   * @param streamName   Kinesis stream name
-   * @param endpointUrl  Url of Kinesis service (e.g., https://kinesis.us-east-1.amazonaws.com)
-   * @param regionName   Name of region used by the Kinesis Client Library (KCL) to update
-   *                     DynamoDB (lease coordination and checkpointing) and CloudWatch (metrics)
-   * @param initialPositionInStream  In the absence of Kinesis checkpoint info, this is the
-   *                                 worker's initial starting position in the stream.
-   *                                 The values are either the beginning of the stream
-   *                                 per Kinesis' limit of 24 hours
-   *                                 (InitialPositionInStream.TRIM_HORIZON) or
-   *                                 the tip of the stream (InitialPositionInStream.LATEST).
-   * @param checkpointInterval  Checkpoint interval for Kinesis checkpointing.
-   *                            See the Kinesis Spark Streaming documentation for more
-   *                            details on the different types of checkpoints.
-   * @param storageLevel Storage level to use for storing the received objects.
-   *                     StorageLevel.MEMORY_AND_DISK_2 is recommended.
-   * @param awsAccessKeyId  AWS AccessKeyId (if null, will use DefaultAWSCredentialsProviderChain)
-   * @param awsSecretKey  AWS SecretKey (if null, will use DefaultAWSCredentialsProviderChain)
-   *
-   * @note The given AWS credentials will get saved in DStream checkpoints if checkpointing
-   * is enabled. Make sure that your checkpoint directory is secure.
-   */
-  @deprecated("Use KinesisInputDStream.builder instead", "2.2.0")
-  def createStream(
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: InitialPositionInStream,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      awsAccessKeyId: String,
-      awsSecretKey: String): JavaReceiverInputDStream[Array[Byte]] = {
-    createStream[Array[Byte]](jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-      initialPositionInStream, checkpointInterval, storageLevel,
-      KinesisInputDStream.defaultMessageHandler(_), awsAccessKeyId, awsSecretKey)
-  }
-
-  private def validateRegion(regionName: String): String = {
-    Option(RegionUtils.getRegion(regionName)).map { _.getName }.getOrElse {
-      throw new IllegalArgumentException(s"Region name '$regionName' is not valid")
-    }
-  }
-}
-
-/**
- * This is a helper class that wraps the methods in KinesisUtils into more Python-friendly class and
- * function so that it can be easily instantiated and called from Python's KinesisUtils.
- */
-private class KinesisUtilsPythonHelper {
-
-  def getInitialPositionInStream(initialPositionInStream: Int): InitialPositionInStream = {
-    initialPositionInStream match {
-      case 0 => InitialPositionInStream.LATEST
-      case 1 => InitialPositionInStream.TRIM_HORIZON
-      case _ => throw new IllegalArgumentException(
-        "Illegal InitialPositionInStream. Please use " +
-          "InitialPositionInStream.LATEST or InitialPositionInStream.TRIM_HORIZON")
-    }
-  }
-
-  // scalastyle:off
-  def createStream(
-      jssc: JavaStreamingContext,
-      kinesisAppName: String,
-      streamName: String,
-      endpointUrl: String,
-      regionName: String,
-      initialPositionInStream: Int,
-      checkpointInterval: Duration,
-      storageLevel: StorageLevel,
-      awsAccessKeyId: String,
-      awsSecretKey: String,
-      stsAssumeRoleArn: String,
-      stsSessionName: String,
-      stsExternalId: String): JavaReceiverInputDStream[Array[Byte]] = {
-    // scalastyle:on
-    if (!(stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null)
-        && !(stsAssumeRoleArn == null && stsSessionName == null && stsExternalId == null)) {
-      throw new IllegalArgumentException("stsAssumeRoleArn, stsSessionName, and stsExtenalId " +
-        "must all be defined or all be null")
-    }
-
-    if (stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null) {
-      validateAwsCreds(awsAccessKeyId, awsSecretKey)
-      KinesisUtils.createStream(jssc.ssc, kinesisAppName, streamName, endpointUrl, regionName,
-        getInitialPositionInStream(initialPositionInStream), checkpointInterval, storageLevel,
-        KinesisInputDStream.defaultMessageHandler(_), awsAccessKeyId, awsSecretKey,
-        stsAssumeRoleArn, stsSessionName, stsExternalId)
-    } else {
-      validateAwsCreds(awsAccessKeyId, awsSecretKey)
-      if (awsAccessKeyId == null && awsSecretKey == null) {
-        KinesisUtils.createStream(jssc, kinesisAppName, streamName, endpointUrl, regionName,
-          getInitialPositionInStream(initialPositionInStream), checkpointInterval, storageLevel)
-      } else {
-        KinesisUtils.createStream(jssc, kinesisAppName, streamName, endpointUrl, regionName,
-          getInitialPositionInStream(initialPositionInStream), checkpointInterval, storageLevel,
-          awsAccessKeyId, awsSecretKey)
-      }
-    }
-  }
-
-  // Throw IllegalArgumentException unless both values are null or neither are.
-  private def validateAwsCreds(awsAccessKeyId: String, awsSecretKey: String) {
-    if (awsAccessKeyId == null && awsSecretKey != null) {
-      throw new IllegalArgumentException("awsSecretKey is set but awsAccessKeyId is null")
-    }
-    if (awsAccessKeyId != null && awsSecretKey == null) {
-      throw new IllegalArgumentException("awsAccessKeyId is set but awsSecretKey is null")
-    }
-  }
-}
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
new file mode 100644
index 0000000000000..c89dedd3366d1
--- /dev/null
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.streaming.kinesis
+
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.streaming.Duration
+import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaStreamingContext}
+
+/**
+ * This is a helper class that wraps the methods in KinesisUtils into more Python-friendly class and
+ * function so that it can be easily instantiated and called from Python's KinesisUtils.
+ */
+private class KinesisUtilsPythonHelper {
+
+  // scalastyle:off
+  def createStream(
+      jssc: JavaStreamingContext,
+      kinesisAppName: String,
+      streamName: String,
+      endpointUrl: String,
+      regionName: String,
+      initialPositionInStream: Int,
+      checkpointInterval: Duration,
+      storageLevel: StorageLevel,
+      awsAccessKeyId: String,
+      awsSecretKey: String,
+      stsAssumeRoleArn: String,
+      stsSessionName: String,
+      stsExternalId: String): JavaReceiverInputDStream[Array[Byte]] = {
+    // scalastyle:on
+    if (!(stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null)
+        && !(stsAssumeRoleArn == null && stsSessionName == null && stsExternalId == null)) {
+      throw new IllegalArgumentException("stsAssumeRoleArn, stsSessionName, and stsExtenalId " +
+        "must all be defined or all be null")
+    }
+    if (awsAccessKeyId == null && awsSecretKey != null) {
+      throw new IllegalArgumentException("awsSecretKey is set but awsAccessKeyId is null")
+    }
+    if (awsAccessKeyId != null && awsSecretKey == null) {
+      throw new IllegalArgumentException("awsAccessKeyId is set but awsSecretKey is null")
+    }
+
+    val kinesisInitialPosition = initialPositionInStream match {
+      case 0 => InitialPositionInStream.LATEST
+      case 1 => InitialPositionInStream.TRIM_HORIZON
+      case _ => throw new IllegalArgumentException(
+        "Illegal InitialPositionInStream. Please use " +
+          "InitialPositionInStream.LATEST or InitialPositionInStream.TRIM_HORIZON")
+    }
+
+    val builder = KinesisInputDStream.builder.
+      streamingContext(jssc).
+      checkpointAppName(kinesisAppName).
+      streamName(streamName).
+      endpointUrl(endpointUrl).
+      regionName(regionName).
+      initialPosition(KinesisInitialPositions.fromKinesisInitialPosition(kinesisInitialPosition)).
+      checkpointInterval(checkpointInterval).
+      storageLevel(storageLevel)
+
+    if (stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null) {
+      val kinesisCredsProvider = STSCredentials(
+        stsAssumeRoleArn, stsSessionName, Option(stsExternalId),
+        BasicCredentials(awsAccessKeyId, awsSecretKey))
+      builder.
+        kinesisCredentials(kinesisCredsProvider).
+        buildWithMessageHandler(KinesisInputDStream.defaultMessageHandler)
+    } else {
+      if (awsAccessKeyId == null && awsSecretKey == null) {
+        builder.build()
+      } else {
+        builder.kinesisCredentials(BasicCredentials(awsAccessKeyId, awsSecretKey)).build()
+      }
+    }
+  }
+
+}
diff --git a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java b/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java
deleted file mode 100644
index b37b087467926..0000000000000
--- a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisStreamSuite.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming.kinesis;
-
-import com.amazonaws.services.kinesis.model.Record;
-import org.junit.Test;
-
-import org.apache.spark.api.java.function.Function;
-import org.apache.spark.storage.StorageLevel;
-import org.apache.spark.streaming.Duration;
-import org.apache.spark.streaming.LocalJavaStreamingContext;
-import org.apache.spark.streaming.api.java.JavaDStream;
-
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
-
-/**
- * Demonstrate the use of the KinesisUtils Java API
- */
-public class JavaKinesisStreamSuite extends LocalJavaStreamingContext {
-  @Test
-  public void testKinesisStream() {
-    String dummyEndpointUrl = KinesisTestUtils.defaultEndpointUrl();
-    String dummyRegionName = KinesisTestUtils.getRegionNameByEndpoint(dummyEndpointUrl);
-
-    // Tests the API, does not actually test data receiving
-    JavaDStream<byte[]> kinesisStream = KinesisUtils.createStream(ssc, "myAppName", "mySparkStream",
-        dummyEndpointUrl, dummyRegionName, InitialPositionInStream.LATEST, new Duration(2000),
-        StorageLevel.MEMORY_AND_DISK_2());
-    ssc.stop();
-  }
-
-  @Test
-  public void testAwsCreds() {
-    String dummyEndpointUrl = KinesisTestUtils.defaultEndpointUrl();
-    String dummyRegionName = KinesisTestUtils.getRegionNameByEndpoint(dummyEndpointUrl);
-
-    // Tests the API, does not actually test data receiving
-    JavaDStream<byte[]> kinesisStream = KinesisUtils.createStream(ssc, "myAppName", "mySparkStream",
-        dummyEndpointUrl, dummyRegionName, InitialPositionInStream.LATEST, new Duration(2000),
-        StorageLevel.MEMORY_AND_DISK_2(), "fakeAccessKey", "fakeSecretKey");
-    ssc.stop();
-  }
-
-  private static Function<Record, String> handler = new Function<Record, String>() {
-    @Override
-    public String call(Record record) {
-      return record.getPartitionKey() + "-" + record.getSequenceNumber();
-    }
-  };
-
-  @Test
-  public void testCustomHandler() {
-    // Tests the API, does not actually test data receiving
-    JavaDStream<String> kinesisStream = KinesisUtils.createStream(ssc, "testApp", "mySparkStream",
-        "https://kinesis.us-west-2.amazonaws.com", "us-west-2", InitialPositionInStream.LATEST,
-        new Duration(2000), StorageLevel.MEMORY_AND_DISK_2(), handler, String.class);
-
-    ssc.stop();
-  }
-
-  @Test
-  public void testCustomHandlerAwsCreds() {
-    // Tests the API, does not actually test data receiving
-    JavaDStream<String> kinesisStream = KinesisUtils.createStream(ssc, "testApp", "mySparkStream",
-        "https://kinesis.us-west-2.amazonaws.com", "us-west-2", InitialPositionInStream.LATEST,
-        new Duration(2000), StorageLevel.MEMORY_AND_DISK_2(), handler, String.class,
-        "fakeAccessKey", "fakeSecretKey");
-
-    ssc.stop();
-  }
-
-  @Test
-  public void testCustomHandlerAwsStsCreds() {
-    // Tests the API, does not actually test data receiving
-    JavaDStream<String> kinesisStream = KinesisUtils.createStream(ssc, "testApp", "mySparkStream",
-        "https://kinesis.us-west-2.amazonaws.com", "us-west-2", InitialPositionInStream.LATEST,
-        new Duration(2000), StorageLevel.MEMORY_AND_DISK_2(), handler, String.class,
-        "fakeAccessKey", "fakeSecretKey", "fakeSTSRoleArn", "fakeSTSSessionName",
-        "fakeSTSExternalId");
-
-    ssc.stop();
-  }
-}
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
index ac0e6a8429d06..3e88e956ec237 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
@@ -28,7 +28,7 @@ import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.streaming.{Duration, TestSuiteBase}
 import org.apache.spark.util.ManualClock
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
index 1c81298a7c201..8dc4de1aa3609 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
@@ -27,7 +27,7 @@ trait KinesisFunSuite extends SparkFunSuite  {
   import KinesisTestUtils._
 
   /** Run the test if environment variable is set or ignore the test */
-  def testIfEnabled(testName: String)(testBody: => Unit) {
+  def testIfEnabled(testName: String)(testBody: => Unit): Unit = {
     if (shouldRunTests) {
       test(testName)(testBody)
     } else {
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
index 361520e292266..8b0d73c96da73 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
@@ -19,9 +19,11 @@ package org.apache.spark.streaming.kinesis
 
 import java.util.Calendar
 
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import collection.JavaConverters._
+import com.amazonaws.services.kinesis.clientlibrary.lib.worker.{InitialPositionInStream, KinesisClientLibConfiguration}
+import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel
 import org.scalatest.BeforeAndAfterEach
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{Duration, Seconds, StreamingContext, TestSuiteBase}
@@ -82,6 +84,8 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
     assert(dstream.kinesisCreds == DefaultCredentials)
     assert(dstream.dynamoDBCreds == None)
     assert(dstream.cloudWatchCreds == None)
+    assert(dstream.metricsLevel == DEFAULT_METRICS_LEVEL)
+    assert(dstream.metricsEnabledDimensions == DEFAULT_METRICS_ENABLED_DIMENSIONS)
   }
 
   test("should propagate custom non-auth values to KinesisInputDStream") {
@@ -94,6 +98,9 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
     val customKinesisCreds = mock[SparkAWSCredentials]
     val customDynamoDBCreds = mock[SparkAWSCredentials]
     val customCloudWatchCreds = mock[SparkAWSCredentials]
+    val customMetricsLevel = MetricsLevel.NONE
+    val customMetricsEnabledDimensions =
+      KinesisClientLibConfiguration.METRICS_ALWAYS_ENABLED_DIMENSIONS.asScala.toSet
 
     val dstream = builder
       .endpointUrl(customEndpointUrl)
@@ -105,6 +112,8 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
       .kinesisCredentials(customKinesisCreds)
       .dynamoDBCredentials(customDynamoDBCreds)
       .cloudWatchCredentials(customCloudWatchCreds)
+      .metricsLevel(customMetricsLevel)
+      .metricsEnabledDimensions(customMetricsEnabledDimensions)
       .build()
     assert(dstream.endpointUrl == customEndpointUrl)
     assert(dstream.regionName == customRegion)
@@ -115,6 +124,8 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
     assert(dstream.kinesisCreds == customKinesisCreds)
     assert(dstream.dynamoDBCreds == Option(customDynamoDBCreds))
     assert(dstream.cloudWatchCreds == Option(customCloudWatchCreds))
+    assert(dstream.metricsLevel == customMetricsLevel)
+    assert(dstream.metricsEnabledDimensions == customMetricsEnabledDimensions)
 
     // Testing with AtTimestamp
     val cal = Calendar.getInstance()
@@ -132,6 +143,8 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
       .kinesisCredentials(customKinesisCreds)
       .dynamoDBCredentials(customDynamoDBCreds)
       .cloudWatchCredentials(customCloudWatchCreds)
+      .metricsLevel(customMetricsLevel)
+      .metricsEnabledDimensions(customMetricsEnabledDimensions)
       .build()
     assert(dstreamAtTimestamp.endpointUrl == customEndpointUrl)
     assert(dstreamAtTimestamp.regionName == customRegion)
@@ -145,6 +158,8 @@ class KinesisInputDStreamBuilderSuite extends TestSuiteBase with BeforeAndAfterE
     assert(dstreamAtTimestamp.kinesisCreds == customKinesisCreds)
     assert(dstreamAtTimestamp.dynamoDBCreds == Option(customDynamoDBCreds))
     assert(dstreamAtTimestamp.cloudWatchCreds == Option(customCloudWatchCreds))
+    assert(dstreamAtTimestamp.metricsLevel == customMetricsLevel)
+    assert(dstreamAtTimestamp.metricsEnabledDimensions == customMetricsEnabledDimensions)
   }
 
   test("old Api should throw UnsupportedOperationExceptionexception with AT_TIMESTAMP") {
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index 52690847418ef..470a8cecc8fd9 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -27,7 +27,7 @@ import com.amazonaws.services.kinesis.model.Record
 import org.mockito.ArgumentMatchers.{anyList, anyString, eq => meq}
 import org.mockito.Mockito.{never, times, verify, when}
 import org.scalatest.{BeforeAndAfter, Matchers}
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.streaming.{Duration, TestSuiteBase}
 
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
index 51ee7fd213de5..eee62d25e62bb 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
@@ -21,7 +21,6 @@ import scala.collection.mutable
 import scala.concurrent.duration._
 import scala.util.Random
 
-import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
 import com.amazonaws.services.kinesis.model.Record
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
 import org.scalatest.Matchers._
@@ -31,7 +30,7 @@ import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
-import org.apache.spark.streaming._
+import org.apache.spark.streaming.{LocalStreamingContext, _}
 import org.apache.spark.streaming.dstream.ReceiverInputDStream
 import org.apache.spark.streaming.kinesis.KinesisInitialPositions.Latest
 import org.apache.spark.streaming.kinesis.KinesisReadConfigurations._
@@ -41,7 +40,7 @@ import org.apache.spark.streaming.scheduler.ReceivedBlockInfo
 import org.apache.spark.util.Utils
 
 abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFunSuite
-  with Eventually with BeforeAndAfter with BeforeAndAfterAll {
+  with LocalStreamingContext with Eventually with BeforeAndAfter with BeforeAndAfterAll {
 
   // This is the name that KCL will use to save metadata to DynamoDB
   private val appName = s"KinesisStreamSuite-${math.abs(Random.nextLong())}"
@@ -54,15 +53,9 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
   private val dummyAWSSecretKey = "dummySecretKey"
 
   private var testUtils: KinesisTestUtils = null
-  private var ssc: StreamingContext = null
   private var sc: SparkContext = null
 
   override def beforeAll(): Unit = {
-    val conf = new SparkConf()
-      .setMaster("local[4]")
-      .setAppName("KinesisStreamSuite") // Setting Spark app name to Kinesis app name
-    sc = new SparkContext(conf)
-
     runIfTestsEnabled("Prepare KinesisTestUtils") {
       testUtils = new KPLBasedKinesisTestUtils()
       testUtils.createStream()
@@ -71,12 +64,6 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
 
   override def afterAll(): Unit = {
     try {
-      if (ssc != null) {
-        ssc.stop()
-      }
-      if (sc != null) {
-        sc.stop()
-      }
       if (testUtils != null) {
         // Delete the Kinesis stream as well as the DynamoDB table generated by
         // Kinesis Client Library when consuming the stream
@@ -88,34 +75,36 @@ abstract class KinesisStreamTests(aggregateTestData: Boolean) extends KinesisFun
     }
   }
 
-  before {
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    val conf = new SparkConf()
+      .setMaster("local[4]")
+      .setAppName("KinesisStreamSuite") // Setting Spark app name to Kinesis app name
+    sc = new SparkContext(conf)
     ssc = new StreamingContext(sc, batchDuration)
   }
 
-  after {
-    if (ssc != null) {
-      ssc.stop(stopSparkContext = false)
-      ssc = null
-    }
-    if (testUtils != null) {
-      testUtils.deleteDynamoDBTable(appName)
+  override def afterEach(): Unit = {
+    try {
+      if (testUtils != null) {
+        testUtils.deleteDynamoDBTable(appName)
+      }
+    } finally {
+      super.afterEach()
     }
   }
 
-  test("KinesisUtils API") {
-    val kinesisStream1 = KinesisUtils.createStream(ssc, "myAppName", "mySparkStream",
-      dummyEndpointUrl, dummyRegionName,
-      InitialPositionInStream.LATEST, Seconds(2), StorageLevel.MEMORY_AND_DISK_2)
-    val kinesisStream2 = KinesisUtils.createStream(ssc, "myAppName", "mySparkStream",
-      dummyEndpointUrl, dummyRegionName,
-      InitialPositionInStream.LATEST, Seconds(2), StorageLevel.MEMORY_AND_DISK_2,
-      dummyAWSAccessKey, dummyAWSSecretKey)
-  }
-
   test("RDD generation") {
-    val inputStream = KinesisUtils.createStream(ssc, appName, "dummyStream",
-      dummyEndpointUrl, dummyRegionName, InitialPositionInStream.LATEST, Seconds(2),
-      StorageLevel.MEMORY_AND_DISK_2, dummyAWSAccessKey, dummyAWSSecretKey)
+    val inputStream = KinesisInputDStream.builder.
+      streamingContext(ssc).
+      checkpointAppName(appName).
+      streamName("dummyStream").
+      endpointUrl(dummyEndpointUrl).
+      regionName(dummyRegionName).initialPosition(new Latest()).
+      checkpointInterval(Seconds(2)).
+      storageLevel(StorageLevel.MEMORY_AND_DISK_2).
+      kinesisCredentials(BasicCredentials(dummyAWSAccessKey, dummyAWSSecretKey)).
+      build()
     assert(inputStream.isInstanceOf[KinesisInputDStream[Array[Byte]]])
 
     val kinesisStream = inputStream.asInstanceOf[KinesisInputDStream[Array[Byte]]]
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
index ecc37dcaad1fe..d733868908350 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
@@ -81,13 +81,13 @@ object Edge {
 
     override def copyElement(
         src: Array[Edge[ED]], srcPos: Int,
-        dst: Array[Edge[ED]], dstPos: Int) {
+        dst: Array[Edge[ED]], dstPos: Int): Unit = {
       dst(dstPos) = src(srcPos)
     }
 
     override def copyRange(
         src: Array[Edge[ED]], srcPos: Int,
-        dst: Array[Edge[ED]], dstPos: Int, length: Int) {
+        dst: Array[Edge[ED]], dstPos: Int, length: Int): Unit = {
       System.arraycopy(src, srcPos, dst, dstPos, length)
     }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala
index ef0b943fc3c38..4ff5b02daecbe 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala
@@ -30,7 +30,7 @@ object GraphXUtils {
   /**
    * Registers classes that GraphX uses with Kryo.
    */
-  def registerKryoClasses(conf: SparkConf) {
+  def registerKryoClasses(conf: SparkConf): Unit = {
     conf.registerKryoClasses(Array(
       classOf[Edge[Object]],
       classOf[(VertexId, Object)],
@@ -54,7 +54,7 @@ object GraphXUtils {
       mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
       reduceFunc: (A, A) => A,
       activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None): VertexRDD[A] = {
-    def sendMsg(ctx: EdgeContext[VD, ED, A]) {
+    def sendMsg(ctx: EdgeContext[VD, ED, A]): Unit = {
       mapFunc(ctx.toEdgeTriplet).foreach { kv =>
         val id = kv._1
         val msg = kv._2
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
index 0e6a340a680ba..8d03112a1c3dc 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
@@ -222,7 +222,7 @@ class EdgePartition[
    *
    * @param f an external state mutating user defined function.
    */
-  def foreach(f: Edge[ED] => Unit) {
+  def foreach(f: Edge[ED] => Unit): Unit = {
     iterator.foreach(f)
   }
 
@@ -495,7 +495,7 @@ private class AggregatingEdgeContext[VD, ED, A](
       srcId: VertexId, dstId: VertexId,
       localSrcId: Int, localDstId: Int,
       srcAttr: VD, dstAttr: VD,
-      attr: ED) {
+      attr: ED): Unit = {
     _srcId = srcId
     _dstId = dstId
     _localSrcId = localSrcId
@@ -505,13 +505,13 @@ private class AggregatingEdgeContext[VD, ED, A](
     _attr = attr
   }
 
-  def setSrcOnly(srcId: VertexId, localSrcId: Int, srcAttr: VD) {
+  def setSrcOnly(srcId: VertexId, localSrcId: Int, srcAttr: VD): Unit = {
     _srcId = srcId
     _localSrcId = localSrcId
     _srcAttr = srcAttr
   }
 
-  def setRest(dstId: VertexId, localDstId: Int, dstAttr: VD, attr: ED) {
+  def setRest(dstId: VertexId, localDstId: Int, dstAttr: VD, attr: ED): Unit = {
     _dstId = dstId
     _localDstId = localDstId
     _dstAttr = dstAttr
@@ -524,14 +524,14 @@ private class AggregatingEdgeContext[VD, ED, A](
   override def dstAttr: VD = _dstAttr
   override def attr: ED = _attr
 
-  override def sendToSrc(msg: A) {
+  override def sendToSrc(msg: A): Unit = {
     send(_localSrcId, msg)
   }
-  override def sendToDst(msg: A) {
+  override def sendToDst(msg: A): Unit = {
     send(_localDstId, msg)
   }
 
-  @inline private def send(localId: Int, msg: A) {
+  @inline private def send(localId: Int, msg: A): Unit = {
     if (bitset.get(localId)) {
       aggregates(localId) = mergeMsg(aggregates(localId), msg)
     } else {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
index 27c08c894a39f..c7868f85d1f76 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -30,7 +30,7 @@ class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag, VD: Cla
   private[this] val edges = new PrimitiveVector[Edge[ED]](size)
 
   /** Add a new edge to the partition. */
-  def add(src: VertexId, dst: VertexId, d: ED) {
+  def add(src: VertexId, dst: VertexId, d: ED): Unit = {
     edges += Edge(src, dst, d)
   }
 
@@ -90,7 +90,7 @@ class ExistingEdgePartitionBuilder[
   private[this] val edges = new PrimitiveVector[EdgeWithLocalIds[ED]](size)
 
   /** Add a new edge to the partition. */
-  def add(src: VertexId, dst: VertexId, localSrc: Int, localDst: Int, d: ED) {
+  def add(src: VertexId, dst: VertexId, localSrc: Int, localDst: Int, d: ED): Unit = {
     edges += EdgeWithLocalIds(src, dst, localSrc, localDst, d)
   }
 
@@ -153,13 +153,13 @@ private[impl] object EdgeWithLocalIds {
 
       override def copyElement(
           src: Array[EdgeWithLocalIds[ED]], srcPos: Int,
-          dst: Array[EdgeWithLocalIds[ED]], dstPos: Int) {
+          dst: Array[EdgeWithLocalIds[ED]], dstPos: Int): Unit = {
         dst(dstPos) = src(srcPos)
       }
 
       override def copyRange(
           src: Array[EdgeWithLocalIds[ED]], srcPos: Int,
-          dst: Array[EdgeWithLocalIds[ED]], dstPos: Int, length: Int) {
+          dst: Array[EdgeWithLocalIds[ED]], dstPos: Int, length: Int): Unit = {
         System.arraycopy(src, srcPos, dst, dstPos, length)
       }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
index 0a97ab492600d..8564597f4f135 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -103,15 +103,16 @@ class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
       (part, (e.srcId, e.dstId, e.attr))
     }
       .partitionBy(new HashPartitioner(numPartitions))
-      .mapPartitionsWithIndex( { (pid, iter) =>
-        val builder = new EdgePartitionBuilder[ED, VD]()(edTag, vdTag)
-        iter.foreach { message =>
-          val data = message._2
-          builder.add(data._1, data._2, data._3)
-        }
-        val edgePartition = builder.toEdgePartition
-        Iterator((pid, edgePartition))
-      }, preservesPartitioning = true)).cache()
+      .mapPartitionsWithIndex(
+        { (pid: Int, iter: Iterator[(PartitionID, (VertexId, VertexId, ED))]) =>
+          val builder = new EdgePartitionBuilder[ED, VD]()(edTag, vdTag)
+          iter.foreach { message =>
+            val data = message._2
+            builder.add(data._1, data._2, data._3)
+          }
+          val edgePartition = builder.toEdgePartition
+          Iterator((pid, edgePartition))
+        }, preservesPartitioning = true)).cache()
     GraphImpl.fromExistingRDDs(vertices.withEdges(newEdges), newEdges)
   }
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
index d2194d85bf525..e0d4dd3248734 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
@@ -58,7 +58,7 @@ class ReplicatedVertexView[VD: ClassTag, ED: ClassTag](
    * `vertices`. This operation modifies the `ReplicatedVertexView`, and callers can access `edges`
    * afterwards to obtain the upgraded view.
    */
-  def upgrade(vertices: VertexRDD[VD], includeSrc: Boolean, includeDst: Boolean) {
+  def upgrade(vertices: VertexRDD[VD], includeSrc: Boolean, includeDst: Boolean): Unit = {
     val shipSrc = includeSrc && !hasSrcId
     val shipDst = includeDst && !hasDstId
     if (shipSrc || shipDst) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
index 6453bbeae9f10..bef380dc12c23 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala
@@ -123,7 +123,7 @@ class RoutingTablePartition(
    */
   def foreachWithinEdgePartition
       (pid: PartitionID, includeSrc: Boolean, includeDst: Boolean)
-      (f: VertexId => Unit) {
+      (f: VertexId => Unit): Unit = {
     val (vidsCandidate, srcVids, dstVids) = routingTable(pid)
     val size = vidsCandidate.length
     if (includeSrc && includeDst) {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
index 2847a4e172d40..c508056fe3ae3 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala
@@ -98,7 +98,7 @@ object SVDPlusPlus {
         (ctx: EdgeContext[
           (Array[Double], Array[Double], Double, Double),
           Double,
-          (Array[Double], Array[Double], Double)]) {
+          (Array[Double], Array[Double], Double)]): Unit = {
       val (usr, itm) = (ctx.srcAttr, ctx.dstAttr)
       val (p, q) = (usr._1, itm._1)
       val rank = p.length
@@ -177,7 +177,7 @@ object SVDPlusPlus {
 
     // calculate error on training set
     def sendMsgTestF(conf: Conf, u: Double)
-        (ctx: EdgeContext[(Array[Double], Array[Double], Double, Double), Double, Double]) {
+        (ctx: EdgeContext[(Array[Double], Array[Double], Double, Double), Double, Double]): Unit = {
       val (usr, itm) = (ctx.srcAttr, ctx.dstAttr)
       val (p, q) = (usr._1, itm._1)
       var pred = u + usr._3 + itm._3 + blas.ddot(q.length, q, 1, usr._2, 1)
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index 2715137d19ebc..211b4d6e4c5d3 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -85,7 +85,7 @@ object TriangleCount {
     }
 
     // Edge function computes intersection of smaller vertex with larger vertex
-    def edgeFunc(ctx: EdgeContext[VertexSet, ED, Int]) {
+    def edgeFunc(ctx: EdgeContext[VertexSet, ED, Int]): Unit = {
       val (smallSet, largeSet) = if (ctx.srcAttr.size < ctx.dstAttr.size) {
         (ctx.srcAttr, ctx.dstAttr)
       } else {
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
index 5ece5ae5c359b..dc3cdc452a389 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
@@ -118,7 +118,7 @@ private[graphx] object BytecodeUtils {
       if (name == methodName) {
         new MethodVisitor(ASM7) {
           override def visitMethodInsn(
-              op: Int, owner: String, name: String, desc: String, itf: Boolean) {
+              op: Int, owner: String, name: String, desc: String, itf: Boolean): Unit = {
             if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
               if (!skipClass(owner)) {
                 methodsInvoked.add((Utils.classForName(owner.replace("/", ".")), name))
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala
index 972237da1cb28..e3b283649cb2c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala
@@ -71,7 +71,7 @@ class GraphXPrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassTag,
   }
 
   /** Set the value for a key */
-  def update(k: K, v: V) {
+  def update(k: K, v: V): Unit = {
     val pos = keySet.addWithoutResize(k) & OpenHashSet.POSITION_MASK
     _values(pos) = v
     keySet.rehashIfNeeded(k, grow, move)
@@ -80,7 +80,7 @@ class GraphXPrimitiveKeyOpenHashMap[@specialized(Long, Int) K: ClassTag,
 
 
   /** Set the value for a key */
-  def setMerge(k: K, v: V, mergeF: (V, V) => V) {
+  def setMerge(k: K, v: V, mergeF: (V, V) => V): Unit = {
     val pos = keySet.addWithoutResize(k)
     val ind = pos & OpenHashSet.POSITION_MASK
     if ((pos & OpenHashSet.NONEXISTENCE_MASK) != 0) { // if first add
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
index 84940d96b563f..32844104c1deb 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
@@ -26,8 +26,11 @@
 import java.util.Map;
 import java.util.concurrent.ThreadFactory;
 import java.util.concurrent.atomic.AtomicInteger;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import static org.apache.spark.launcher.CommandBuilderUtils.*;
+import static org.apache.spark.launcher.CommandBuilderUtils.join;
 
 /**
  * Launcher for Spark applications.
@@ -38,6 +41,8 @@
  */
 public class SparkLauncher extends AbstractLauncher<SparkLauncher> {
 
+  private static final Logger LOG = Logger.getLogger(SparkLauncher.class.getName());
+
   /** The Spark master. */
   public static final String SPARK_MASTER = "spark.master";
 
@@ -363,6 +368,9 @@ public SparkAppHandle startApplication(SparkAppHandle.Listener... listeners) thr
 
     String loggerName = getLoggerName();
     ProcessBuilder pb = createBuilder();
+    if (LOG.isLoggable(Level.FINE)) {
+      LOG.fine(String.format("Launching Spark application:%n%s", join(" ", pb.command())));
+    }
 
     boolean outputToLog = outputStream == null;
     boolean errorToLog = !redirectErrorStream && errorStream == null;
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 3479e0c3422bd..383c3f60a595b 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -348,7 +348,7 @@ private List<String> buildPySparkShellCommand(Map<String, String> env) throws IO
   }
 
   private List<String> buildSparkRCommand(Map<String, String> env) throws IOException {
-    if (!appArgs.isEmpty() && appArgs.get(0).endsWith(".R")) {
+    if (!appArgs.isEmpty() && (appArgs.get(0).endsWith(".R") || appArgs.get(0).endsWith(".r"))) {
       System.err.println(
         "Running R applications through 'sparkR' is not supported as of Spark 2.0.\n" +
         "Use ./bin/spark-submit <R file>");
@@ -390,9 +390,7 @@ boolean isClientMode(Map<String, String> userProps) {
     String userMaster = firstNonEmpty(master, userProps.get(SparkLauncher.SPARK_MASTER));
     String userDeployMode = firstNonEmpty(deployMode, userProps.get(SparkLauncher.DEPLOY_MODE));
     // Default master is "local[*]", so assume client mode in that case
-    return userMaster == null ||
-      "client".equals(userDeployMode) ||
-      (!userMaster.equals("yarn-cluster") && userDeployMode == null);
+    return userMaster == null || userDeployMode == null || "client".equals(userDeployMode);
   }
 
   /**
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
index 32a91b1789412..752e8d4c23f8b 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -250,6 +250,26 @@ public void testMissingAppResource() {
     new SparkSubmitCommandBuilder().buildSparkSubmitArgs();
   }
 
+  @Test
+  public void testIsClientMode() {
+    // Default master is "local[*]"
+    SparkSubmitCommandBuilder builder = newCommandBuilder(Collections.emptyList());
+    assertTrue("By default application run in local mode",
+      builder.isClientMode(Collections.emptyMap()));
+    // --master yarn or it can be any RM
+    List<String> sparkSubmitArgs = Arrays.asList(parser.MASTER, "yarn");
+    builder = newCommandBuilder(sparkSubmitArgs);
+    assertTrue("By default deploy mode is client", builder.isClientMode(Collections.emptyMap()));
+    // --master yarn and set spark.submit.deployMode to client
+    Map<String, String> userProps = new HashMap<>();
+    userProps.put("spark.submit.deployMode", "client");
+    assertTrue(builder.isClientMode(userProps));
+    // --master mesos --deploy-mode cluster
+    sparkSubmitArgs = Arrays.asList(parser.MASTER, "mesos", parser.DEPLOY_MODE, "cluster");
+    builder = newCommandBuilder(sparkSubmitArgs);
+    assertFalse(builder.isClientMode(Collections.emptyMap()));
+  }
+
   private void testCmdBuilder(boolean isDriver, boolean useDefaultPropertyFile) throws Exception {
     final String DRIVER_DEFAULT_PARAM = "-Ddriver-default";
     final String DRIVER_EXTRA_PARAM = "-Ddriver-extra";
diff --git a/licenses-binary/LICENSE-JLargeArrays.txt b/licenses-binary/LICENSE-JLargeArrays.txt
new file mode 100644
index 0000000000000..304e724556984
--- /dev/null
+++ b/licenses-binary/LICENSE-JLargeArrays.txt
@@ -0,0 +1,23 @@
+JLargeArrays
+Copyright (C) 2013 onward University of Warsaw, ICM
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-JTransforms.txt b/licenses-binary/LICENSE-JTransforms.txt
new file mode 100644
index 0000000000000..2f0589f76da7d
--- /dev/null
+++ b/licenses-binary/LICENSE-JTransforms.txt
@@ -0,0 +1,23 @@
+JTransforms
+Copyright (c) 2007 onward, Piotr Wendykier
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer. 
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-dnsjava.txt b/licenses-binary/LICENSE-dnsjava.txt
new file mode 100644
index 0000000000000..70ee5b12ff23f
--- /dev/null
+++ b/licenses-binary/LICENSE-dnsjava.txt
@@ -0,0 +1,24 @@
+Copyright (c) 1998-2011, Brian Wellington.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+  * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-jtransforms.html b/licenses-binary/LICENSE-jtransforms.html
deleted file mode 100644
index 351c17412357b..0000000000000
--- a/licenses-binary/LICENSE-jtransforms.html
+++ /dev/null
@@ -1,388 +0,0 @@
-
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en"><head><title>Mozilla Public License version 1.1</title>
-
-
-
-  <style type="text/css">
-    .very-strong{
-      text-transform:uppercase;
-    }
-    dt{
-      font-weight:bold;
-    }
-    dd p{
-      margin:0;
-    }
-  </style></head><body>
-<small> </small><h1><small>Mozilla Public License Version 1.1</small></h1>
-<small> </small><h2 id="section-1"><small>1. Definitions.</small></h2>
-<small> </small><dl>
-  <small>  <dt id="section-1.0.1">1.0.1. "Commercial Use"
-  </dt><dd>means distribution or otherwise making the Covered Code available to a third party.
-  </dd><dt id="section-1.1">1.1. "Contributor"
-  </dt><dd>means each entity that creates or contributes to the creation of Modifications.
-  </dd><dt id="section-1.2">1.2. "Contributor Version"
-  </dt><dd>means the combination of the Original Code, prior Modifications used by a Contributor,
-    and the Modifications made by that particular Contributor.
-  </dd><dt id="section-1.3">1.3. "Covered Code"
-  </dt><dd>means the Original Code or Modifications or the combination of the Original Code and
-    Modifications, in each case including portions thereof.
-  </dd><dt id="section-1.4">1.4. "Electronic Distribution Mechanism"
-  </dt><dd>means a mechanism generally accepted in the software development community for the
-    electronic transfer of data.
-  </dd><dt id="section-1.5">1.5. "Executable"
-  </dt><dd>means Covered Code in any form other than Source Code.
-  </dd><dt id="section-1.6">1.6. "Initial Developer"
-  </dt><dd>means the individual or entity identified as the Initial Developer in the Source Code
-    notice required by <a href="#exhibit-a">Exhibit A</a>.
-  </dd><dt id="section-1.7">1.7. "Larger Work"
-  </dt><dd>means a work which combines Covered Code or portions thereof with code not governed
-    by the terms of this License.
-  </dd><dt id="section-1.8">1.8. "License"
-  </dt><dd>means this document.
-  </dd><dt id="section-1.8.1">1.8.1. "Licensable"
-  </dt><dd>means having the right to grant, to the maximum extent possible, whether at the
-    time of the initial grant or subsequently acquired, any and all of the rights
-    conveyed herein.
-  </dd><dt id="section-1.9">1.9. "Modifications"
-  </dt><dd>
-    <p>means any addition to or deletion from the substance or structure of either the
-      Original Code or any previous Modifications. When Covered Code is released as a
-      series of files, a Modification is:
-    </p><ol type="a">
-    <li id="section-1.9-a">Any addition to or deletion from the contents of a file
-      containing Original Code or previous Modifications.
-    </li><li id="section-1.9-b">Any new file that contains any part of the Original Code or
-    previous Modifications.
-  </li></ol>
-  </dd><dt id="section-1.10">1.10. "Original Code"
-  </dt><dd>means Source Code of computer software code which is described in the Source Code
-    notice required by <a href="#exhibit-a">Exhibit A</a> as Original Code, and which,
-    at the time of its release under this License is not already Covered Code governed
-    by this License.
-  </dd><dt id="section-1.10.1">1.10.1. "Patent Claims"
-  </dt><dd>means any patent claim(s), now owned or hereafter acquired, including without
-    limitation, method, process, and apparatus claims, in any patent Licensable by
-    grantor.
-  </dd><dt id="section-1.11">1.11. "Source Code"
-  </dt><dd>means the preferred form of the Covered Code for making modifications to it,
-    including all modules it contains, plus any associated interface definition files,
-    scripts used to control compilation and installation of an Executable, or source
-    code differential comparisons against either the Original Code or another well known,
-    available Covered Code of the Contributor's choice. The Source Code can be in a
-    compressed or archival form, provided the appropriate decompression or de-archiving
-    software is widely available for no charge.
-  </dd><dt id="section-1.12">1.12. "You" (or "Your")
-  </dt><dd>means an individual or a legal entity exercising rights under, and complying with
-    all of the terms of, this License or a future version of this License issued under
-    <a href="#section-6.1">Section 6.1.</a> For legal entities, "You" includes any entity
-    which controls, is controlled by, or is under common control with You. For purposes of
-    this definition, "control" means (a) the power, direct or indirect, to cause the
-    direction or management of such entity, whether by contract or otherwise, or (b)
-    ownership of more than fifty percent (50%) of the outstanding shares or beneficial
-    ownership of such entity.
-  </dd></small></dl>
-<small> </small><h2 id="section-2"><small>2. Source Code License.</small></h2>
-<small> </small><h3 id="section-2.1"><small>2.1. The Initial Developer Grant.</small></h3>
-<small> </small><p><small>The Initial Developer hereby grants You a world-wide, royalty-free, non-exclusive
-  license, subject to third party intellectual property claims:
-</small></p><ol type="a">
-  <small>  </small><li id="section-2.1-a"><small>under intellectual property rights (other than patent or
-  trademark) Licensable by Initial Developer to use, reproduce, modify, display, perform,
-  sublicense and distribute the Original Code (or portions thereof) with or without
-  Modifications, and/or as part of a Larger Work; and
-</small></li><li id="section-2.1-b"><small>under Patents Claims infringed by the making, using or selling
-  of Original Code, to make, have made, use, practice, sell, and offer for sale, and/or
-  otherwise dispose of the Original Code (or portions thereof).
-</small></li><li id="section-2.1-c"><small>the licenses granted in this Section 2.1
-  (<a href="#section-2.1-a">a</a>) and (<a href="#section-2.1-b">b</a>) are effective on
-  the date Initial Developer first distributes Original Code under the terms of this
-  License.
-</small></li><li id="section-2.1-d"><small>Notwithstanding Section 2.1 (<a href="#section-2.1-b">b</a>)
-  above, no patent license is granted: 1) for code that You delete from the Original Code;
-  2) separate from the Original Code; or 3) for infringements caused by: i) the
-  modification of the Original Code or ii) the combination of the Original Code with other
-  software or devices.
-</small></li></ol>
-<small> </small><h3 id="section-2.2"><small>2.2. Contributor Grant.</small></h3>
-<small> </small><p><small>Subject to third party intellectual property claims, each Contributor hereby grants You
-  a world-wide, royalty-free, non-exclusive license
-</small></p><ol type="a">
-  <small>  </small><li id="section-2.2-a"><small>under intellectual property rights (other than patent or trademark)
-  Licensable by Contributor, to use, reproduce, modify, display, perform, sublicense and
-  distribute the Modifications created by such Contributor (or portions thereof) either on
-  an unmodified basis, with other Modifications, as Covered Code and/or as part of a Larger
-  Work; and
-</small></li><li id="section-2.2-b"><small>under Patent Claims infringed by the making, using, or selling of
-  Modifications made by that Contributor either alone and/or in combination with its
-  Contributor Version (or portions of such combination), to make, use, sell, offer for
-  sale, have made, and/or otherwise dispose of: 1) Modifications made by that Contributor
-  (or portions thereof); and 2) the combination of Modifications made by that Contributor
-  with its Contributor Version (or portions of such combination).
-</small></li><li id="section-2.2-c"><small>the licenses granted in Sections 2.2
-  (<a href="#section-2.2-a">a</a>) and 2.2 (<a href="#section-2.2-b">b</a>) are effective
-  on the date Contributor first makes Commercial Use of the Covered Code.
-</small></li><li id="section-2.2-d"><small>Notwithstanding Section 2.2 (<a href="#section-2.2-b">b</a>)
-  above, no patent license is granted: 1) for any code that Contributor has deleted from
-  the Contributor Version; 2) separate from the Contributor Version; 3) for infringements
-  caused by: i) third party modifications of Contributor Version or ii) the combination of
-  Modifications made by that Contributor with other software (except as part of the
-  Contributor Version) or other devices; or 4) under Patent Claims infringed by Covered Code
-  in the absence of Modifications made by that Contributor.
-</small></li></ol>
-<small> </small><h2 id="section-3"><small>3. Distribution Obligations.</small></h2>
-<small> </small><h3 id="section-3.1"><small>3.1. Application of License.</small></h3>
-<small> </small><p><small>The Modifications which You create or to which You contribute are governed by the terms
-  of this License, including without limitation Section <a href="#section-2.2">2.2</a>. The
-  Source Code version of Covered Code may be distributed only under the terms of this License
-  or a future version of this License released under Section <a href="#section-6.1">6.1</a>,
-  and You must include a copy of this License with every copy of the Source Code You
-  distribute. You may not offer or impose any terms on any Source Code version that alters or
-  restricts the applicable version of this License or the recipients' rights hereunder.
-  However, You may include an additional document offering the additional rights described in
-  Section <a href="#section-3.5">3.5</a>.
-</small></p><h3 id="section-3.2"><small>3.2. Availability of Source Code.</small></h3>
-<small> </small><p><small>Any Modification which You create or to which You contribute must be made available in
-  Source Code form under the terms of this License either on the same media as an Executable
-  version or via an accepted Electronic Distribution Mechanism to anyone to whom you made an
-  Executable version available; and if made available via Electronic Distribution Mechanism,
-  must remain available for at least twelve (12) months after the date it initially became
-  available, or at least six (6) months after a subsequent version of that particular
-  Modification has been made available to such recipients. You are responsible for ensuring
-  that the Source Code version remains available even if the Electronic Distribution
-  Mechanism is maintained by a third party.
-</small></p><h3 id="section-3.3"><small>3.3. Description of Modifications.</small></h3>
-<small> </small><p><small>You must cause all Covered Code to which You contribute to contain a file documenting the
-  changes You made to create that Covered Code and the date of any change. You must include a
-  prominent statement that the Modification is derived, directly or indirectly, from Original
-  Code provided by the Initial Developer and including the name of the Initial Developer in
-  (a) the Source Code, and (b) in any notice in an Executable version or related documentation
-  in which You describe the origin or ownership of the Covered Code.
-</small></p><h3 id="section-3.4"><small>3.4. Intellectual Property Matters</small></h3>
-<small> </small><h4 id="section-3.4-a"><small>(a) Third Party Claims</small></h4>
-<small> </small><p><small>If Contributor has knowledge that a license under a third party's intellectual property
-  rights is required to exercise the rights granted by such Contributor under Sections
-  <a href="#section-2.1">2.1</a> or <a href="#section-2.2">2.2</a>, Contributor must include a
-  text file with the Source Code distribution titled "LEGAL" which describes the claim and the
-  party making the claim in sufficient detail that a recipient will know whom to contact. If
-  Contributor obtains such knowledge after the Modification is made available as described in
-  Section <a href="#section-3.2">3.2</a>, Contributor shall promptly modify the LEGAL file in
-  all copies Contributor makes available thereafter and shall take other steps (such as
-  notifying appropriate mailing lists or newsgroups) reasonably calculated to inform those who
-  received the Covered Code that new knowledge has been obtained.
-</small></p><h4 id="section-3.4-b"><small>(b) Contributor APIs</small></h4>
-<small> </small><p><small>If Contributor's Modifications include an application programming interface and Contributor
-  has knowledge of patent licenses which are reasonably necessary to implement that
-  <abbr>API</abbr>, Contributor must also include this information in the
-  <strong class="very-strong">legal</strong> file.
-</small></p><h4 id="section-3.4-c"><small>(c) Representations.</small></h4>
-<small> </small><p><small>Contributor represents that, except as disclosed pursuant to Section 3.4
-  (<a href="#section-3.4-a">a</a>) above, Contributor believes that Contributor's Modifications
-  are Contributor's original creation(s) and/or Contributor has sufficient rights to grant the
-  rights conveyed by this License.
-</small></p><h3 id="section-3.5"><small>3.5. Required Notices.</small></h3>
-<small> </small><p><small>You must duplicate the notice in <a href="#exhibit-a">Exhibit A</a> in each file of the
-  Source Code. If it is not possible to put such notice in a particular Source Code file due to
-  its structure, then You must include such notice in a location (such as a relevant directory)
-  where a user would be likely to look for such a notice. If You created one or more
-  Modification(s) You may add your name as a Contributor to the notice described in
-  <a href="#exhibit-a">Exhibit A</a>. You must also duplicate this License in any documentation
-  for the Source Code where You describe recipients' rights or ownership rights relating to
-  Covered Code. You may choose to offer, and to charge a fee for, warranty, support, indemnity
-  or liability obligations to one or more recipients of Covered Code. However, You may do so
-  only on Your own behalf, and not on behalf of the Initial Developer or any Contributor. You
-  must make it absolutely clear than any such warranty, support, indemnity or liability
-  obligation is offered by You alone, and You hereby agree to indemnify the Initial Developer
-  and every Contributor for any liability incurred by the Initial Developer or such Contributor
-  as a result of warranty, support, indemnity or liability terms You offer.
-</small></p><h3 id="section-3.6"><small>3.6. Distribution of Executable Versions.</small></h3>
-<small> </small><p><small>You may distribute Covered Code in Executable form only if the requirements of Sections
-  <a href="#section-3.1">3.1</a>, <a href="#section-3.2">3.2</a>,
-  <a href="#section-3.3">3.3</a>, <a href="#section-3.4">3.4</a> and
-  <a href="#section-3.5">3.5</a> have been met for that Covered Code, and if You include a
-  notice stating that the Source Code version of the Covered Code is available under the terms
-  of this License, including a description of how and where You have fulfilled the obligations
-  of Section <a href="#section-3.2">3.2</a>. The notice must be conspicuously included in any
-  notice in an Executable version, related documentation or collateral in which You describe
-  recipients' rights relating to the Covered Code. You may distribute the Executable version of
-  Covered Code or ownership rights under a license of Your choice, which may contain terms
-  different from this License, provided that You are in compliance with the terms of this
-  License and that the license for the Executable version does not attempt to limit or alter the
-  recipient's rights in the Source Code version from the rights set forth in this License. If
-  You distribute the Executable version under a different license You must make it absolutely
-  clear that any terms which differ from this License are offered by You alone, not by the
-  Initial Developer or any Contributor. You hereby agree to indemnify the Initial Developer and
-  every Contributor for any liability incurred by the Initial Developer or such Contributor as
-  a result of any such terms You offer.
-</small></p><h3 id="section-3.7"><small>3.7. Larger Works.</small></h3>
-<small> </small><p><small>You may create a Larger Work by combining Covered Code with other code not governed by the
-  terms of this License and distribute the Larger Work as a single product. In such a case,
-  You must make sure the requirements of this License are fulfilled for the Covered Code.
-</small></p><h2 id="section-4"><small>4. Inability to Comply Due to Statute or Regulation.</small></h2>
-<small> </small><p><small>If it is impossible for You to comply with any of the terms of this License with respect to
-  some or all of the Covered Code due to statute, judicial order, or regulation then You must:
-  (a) comply with the terms of this License to the maximum extent possible; and (b) describe
-  the limitations and the code they affect. Such description must be included in the
-  <strong class="very-strong">legal</strong> file described in Section
-  <a href="#section-3.4">3.4</a> and must be included with all distributions of the Source Code.
-  Except to the extent prohibited by statute or regulation, such description must be
-  sufficiently detailed for a recipient of ordinary skill to be able to understand it.
-</small></p><h2 id="section-5"><small>5. Application of this License.</small></h2>
-<small> </small><p><small>This License applies to code to which the Initial Developer has attached the notice in
-  <a href="#exhibit-a">Exhibit A</a> and to related Covered Code.
-</small></p><h2 id="section-6"><small>6. Versions of the License.</small></h2>
-<small> </small><h3 id="section-6.1"><small>6.1. New Versions</small></h3>
-<small> </small><p><small>Netscape Communications Corporation ("Netscape") may publish revised and/or new versions
-  of the License from time to time. Each version will be given a distinguishing version number.
-</small></p><h3 id="section-6.2"><small>6.2. Effect of New Versions</small></h3>
-<small> </small><p><small>Once Covered Code has been published under a particular version of the License, You may
-  always continue to use it under the terms of that version. You may also choose to use such
-  Covered Code under the terms of any subsequent version of the License published by Netscape.
-  No one other than Netscape has the right to modify the terms applicable to Covered Code
-  created under this License.
-</small></p><h3 id="section-6.3"><small>6.3. Derivative Works</small></h3>
-<small> </small><p><small>If You create or use a modified version of this License (which you may only do in order to
-  apply it to code which is not already Covered Code governed by this License), You must (a)
-  rename Your license so that the phrases "Mozilla", "MOZILLAPL", "MOZPL", "Netscape", "MPL",
-  "NPL" or any confusingly similar phrase do not appear in your license (except to note that
-  your license differs from this License) and (b) otherwise make it clear that Your version of
-  the license contains terms which differ from the Mozilla Public License and Netscape Public
-  License. (Filling in the name of the Initial Developer, Original Code or Contributor in the
-  notice described in <a href="#exhibit-a">Exhibit A</a> shall not of themselves be deemed to
-  be modifications of this License.)
-</small></p><h2 id="section-7"><small>7. <strong class="very-strong">Disclaimer of warranty</strong></small></h2>
-<small> </small><p><small><strong class="very-strong">Covered code is provided under this license on an "as is"
-  basis, without warranty of any kind, either expressed or implied, including, without
-  limitation, warranties that the covered code is free of defects, merchantable, fit for a
-  particular purpose or non-infringing. The entire risk as to the quality and performance of
-  the covered code is with you. Should any covered code prove defective in any respect, you
-  (not the initial developer or any other contributor) assume the cost of any necessary
-  servicing, repair or correction. This disclaimer of warranty constitutes an essential part
-  of this license. No use of any covered code is authorized hereunder except under this
-  disclaimer.</strong>
-</small></p><h2 id="section-8"><small>8. Termination</small></h2>
-<small> </small><p id="section-8.1"><small>8.1. This License and the rights granted hereunder will terminate
-  automatically if You fail to comply with terms herein and fail to cure such breach
-  within 30 days of becoming aware of the breach. All sublicenses to the Covered Code which
-  are properly granted shall survive any termination of this License. Provisions which, by
-  their nature, must remain in effect beyond the termination of this License shall survive.
-</small></p><p id="section-8.2"><small>8.2. If You initiate litigation by asserting a patent infringement
-  claim (excluding declatory judgment actions) against Initial Developer or a Contributor
-  (the Initial Developer or Contributor against whom You file such action is referred to
-  as "Participant") alleging that:
-</small></p><ol type="a">
-  <small>  </small><li id="section-8.2-a"><small>such Participant's Contributor Version directly or indirectly
-  infringes any patent, then any and all rights granted by such Participant to You under
-  Sections <a href="#section-2.1">2.1</a> and/or <a href="#section-2.2">2.2</a> of this
-  License shall, upon 60 days notice from Participant terminate prospectively, unless if
-  within 60 days after receipt of notice You either: (i) agree in writing to pay
-  Participant a mutually agreeable reasonable royalty for Your past and future use of
-  Modifications made by such Participant, or (ii) withdraw Your litigation claim with
-  respect to the Contributor Version against such Participant. If within 60 days of
-  notice, a reasonable royalty and payment arrangement are not mutually agreed upon in
-  writing by the parties or the litigation claim is not withdrawn, the rights granted by
-  Participant to You under Sections <a href="#section-2.1">2.1</a> and/or
-  <a href="#section-2.2">2.2</a> automatically terminate at the expiration of the 60 day
-  notice period specified above.
-</small></li><li id="section-8.2-b"><small>any software, hardware, or device, other than such Participant's
-  Contributor Version, directly or indirectly infringes any patent, then any rights
-  granted to You by such Participant under Sections 2.1(<a href="#section-2.1-b">b</a>)
-  and 2.2(<a href="#section-2.2-b">b</a>) are revoked effective as of the date You first
-  made, used, sold, distributed, or had made, Modifications made by that Participant.
-</small></li></ol>
-<small> </small><p id="section-8.3"><small>8.3. If You assert a patent infringement claim against Participant
-  alleging that such Participant's Contributor Version directly or indirectly infringes
-  any patent where such claim is resolved (such as by license or settlement) prior to the
-  initiation of patent infringement litigation, then the reasonable value of the licenses
-  granted by such Participant under Sections <a href="#section-2.1">2.1</a> or
-  <a href="#section-2.2">2.2</a> shall be taken into account in determining the amount or
-  value of any payment or license.
-</small></p><p id="section-8.4"><small>8.4. In the event of termination under Sections
-  <a href="#section-8.1">8.1</a> or <a href="#section-8.2">8.2</a> above, all end user
-  license agreements (excluding distributors and resellers) which have been validly
-  granted by You or any distributor hereunder prior to termination shall survive
-  termination.
-</small></p><h2 id="section-9"><small>9. <strong class="very-strong">Limitation of liability</strong></small></h2>
-<small> </small><p><small><strong class="very-strong">Under no circumstances and under no legal theory, whether
-  tort (including negligence), contract, or otherwise, shall you, the initial developer,
-  any other contributor, or any distributor of covered code, or any supplier of any of
-  such parties, be liable to any person for any indirect, special, incidental, or
-  consequential damages of any character including, without limitation, damages for loss
-  of goodwill, work stoppage, computer failure or malfunction, or any and all other
-  commercial damages or losses, even if such party shall have been informed of the
-  possibility of such damages. This limitation of liability shall not apply to liability
-  for death or personal injury resulting from such party's negligence to the extent
-  applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion
-  or limitation of incidental or consequential damages, so this exclusion and limitation
-  may not apply to you.</strong>
-</small></p><h2 id="section-10"><small>10. <abbr title="United States">U.S.</abbr> government end users</small></h2>
-<small> </small><p><small>The Covered Code is a "commercial item," as that term is defined in 48
-  <abbr>C.F.R.</abbr> 2.101 (<abbr title="October">Oct.</abbr> 1995), consisting of
-  "commercial computer software" and "commercial computer software documentation," as such
-  terms are used in 48 <abbr>C.F.R.</abbr> 12.212 (<abbr title="September">Sept.</abbr>
-  1995). Consistent with 48 <abbr>C.F.R.</abbr> 12.212 and 48 <abbr>C.F.R.</abbr>
-  227.7202-1 through 227.7202-4 (June 1995), all <abbr>U.S.</abbr> Government End Users
-  acquire Covered Code with only those rights set forth herein.
-</small></p><h2 id="section-11"><small>11. Miscellaneous</small></h2>
-<small> </small><p><small>This License represents the complete agreement concerning subject matter hereof. If
-  any provision of this License is held to be unenforceable, such provision shall be
-  reformed only to the extent necessary to make it enforceable. This License shall be
-  governed by California law provisions (except to the extent applicable law, if any,
-  provides otherwise), excluding its conflict-of-law provisions. With respect to
-  disputes in which at least one party is a citizen of, or an entity chartered or
-  registered to do business in the United States of America, any litigation relating to
-  this License shall be subject to the jurisdiction of the Federal Courts of the
-  Northern District of California, with venue lying in Santa Clara County, California,
-  with the losing party responsible for costs, including without limitation, court
-  costs and reasonable attorneys' fees and expenses. The application of the United
-  Nations Convention on Contracts for the International Sale of Goods is expressly
-  excluded. Any law or regulation which provides that the language of a contract
-  shall be construed against the drafter shall not apply to this License.
-</small></p><h2 id="section-12"><small>12. Responsibility for claims</small></h2>
-<small> </small><p><small>As between Initial Developer and the Contributors, each party is responsible for
-  claims and damages arising, directly or indirectly, out of its utilization of rights
-  under this License and You agree to work with Initial Developer and Contributors to
-  distribute such responsibility on an equitable basis. Nothing herein is intended or
-  shall be deemed to constitute any admission of liability.
-</small></p><h2 id="section-13"><small>13. Multiple-licensed code</small></h2>
-<small> </small><p><small>Initial Developer may designate portions of the Covered Code as
-  "Multiple-Licensed". "Multiple-Licensed" means that the Initial Developer permits
-  you to utilize portions of the Covered Code under Your choice of the <abbr>MPL</abbr>
-  or the alternative licenses, if any, specified by the Initial Developer in the file
-  described in <a href="#exhibit-a">Exhibit A</a>.
-</small></p><h2 id="exhibit-a"><small>Exhibit A - Mozilla Public License.</small></h2>
-<small> </small><pre><small>"The contents of this file are subject to the Mozilla Public License
-Version 1.1 (the "License"); you may not use this file except in
-compliance with the License. You may obtain a copy of the License at
-http://www.mozilla.org/MPL/
-
-Software distributed under the License is distributed on an "AS IS"
-basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
-License for the specific language governing rights and limitations
-under the License.
-
-The Original Code is JTransforms.
-
-The Initial Developer of the Original Code is
-Piotr Wendykier, Emory University.
-Portions created by the Initial Developer are Copyright (C) 2007-2009
-the Initial Developer. All Rights Reserved.
-
-Alternatively, the contents of this file may be used under the terms of
-either the GNU General Public License Version 2 or later (the "GPL"), or
-the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
-in which case the provisions of the GPL or the LGPL are applicable instead
-of those above. If you wish to allow use of your version of this file only
-under the terms of either the GPL or the LGPL, and not to allow others to
-use your version of this file under the terms of the MPL, indicate your
-decision by deleting the provisions above and replace them with the notice
-and other provisions required by the GPL or the LGPL. If you do not delete
-the provisions above, a recipient may use your version of this file under
-the terms of any one of the MPL, the GPL or the LGPL.</small></pre>
-<small> </small><p><small>NOTE: The text of this Exhibit A may differ slightly from the text of
-  the notices in the Source Code files of the Original Code. You should
-  use the text of this Exhibit A rather than the text found in the
-  Original Code Source Code for Your Modifications.
-
-</small></p></body></html>
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-re2j.txt b/licenses-binary/LICENSE-re2j.txt
new file mode 100644
index 0000000000000..0dc3cd70bf1f7
--- /dev/null
+++ b/licenses-binary/LICENSE-re2j.txt
@@ -0,0 +1,32 @@
+This is a work derived from Russ Cox's RE2 in Go, whose license
+http://golang.org/LICENSE is as follows:
+
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+
+   * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in
+     the documentation and/or other materials provided with the
+     distribution.
+
+   * Neither the name of Google Inc. nor the names of its contributors
+     may be used to endorse or promote products derived from this
+     software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
index 2a0f8c11d0a50..e054a15fc9b75 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
@@ -302,7 +302,7 @@ private[spark] object BLAS extends Serializable {
    * @param x the vector x that contains the n elements.
    * @param A the symmetric matrix A. Size of n x n.
    */
-  def syr(alpha: Double, x: Vector, A: DenseMatrix) {
+  def syr(alpha: Double, x: Vector, A: DenseMatrix): Unit = {
     val mA = A.numRows
     val nA = A.numCols
     require(mA == nA, s"A is not a square matrix (and hence is not symmetric). A: $mA x $nA")
@@ -316,7 +316,7 @@ private[spark] object BLAS extends Serializable {
     }
   }
 
-  private def syr(alpha: Double, x: DenseVector, A: DenseMatrix) {
+  private def syr(alpha: Double, x: DenseVector, A: DenseMatrix): Unit = {
     val nA = A.numRows
     val mA = A.numCols
 
@@ -334,7 +334,7 @@ private[spark] object BLAS extends Serializable {
     }
   }
 
-  private def syr(alpha: Double, x: SparseVector, A: DenseMatrix) {
+  private def syr(alpha: Double, x: SparseVector, A: DenseMatrix): Unit = {
     val mA = A.numCols
     val xIndices = x.indices
     val xValues = x.values
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
index 6e43d60bd03a3..f437d66cddb54 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -178,6 +178,14 @@ sealed trait Vector extends Serializable {
    */
   @Since("2.0.0")
   def argmax: Int
+
+  /**
+   * Calculate the dot product of this vector with another.
+   *
+   * If `size` does not match an [[IllegalArgumentException]] is thrown.
+   */
+  @Since("3.0.0")
+  def dot(v: Vector): Double = BLAS.dot(this, v)
 }
 
 /**
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
index 332734bd28341..7d29d6dcea908 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/MatricesSuite.scala
@@ -21,7 +21,7 @@ import java.util.Random
 
 import breeze.linalg.{CSCMatrix, Matrix => BM}
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar._
+import org.scalatestplus.mockito.MockitoSugar._
 import scala.collection.mutable.{Map => MutableMap}
 
 import org.apache.spark.ml.SparkMLFunSuite
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
index 0a316f57f811b..c97dc2c3c06f8 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala
@@ -380,4 +380,27 @@ class VectorsSuite extends SparkMLFunSuite {
       Vectors.sparse(-1, Array((1, 2.0)))
     }
   }
+
+  test("dot product only supports vectors of same size") {
+    val vSize4 = Vectors.dense(arr)
+    val vSize1 = Vectors.zeros(1)
+    intercept[IllegalArgumentException]{ vSize1.dot(vSize4) }
+  }
+
+  test("dense vector dot product") {
+    val dv = Vectors.dense(arr)
+    assert(dv.dot(dv) === 0.26)
+  }
+
+  test("sparse vector dot product") {
+    val sv = Vectors.sparse(n, indices, values)
+    assert(sv.dot(sv) === 0.26)
+  }
+
+  test("mixed sparse and dense vector dot product") {
+    val sv = Vectors.sparse(n, indices, values)
+    val dv = Vectors.dense(arr)
+    assert(sv.dot(dv) === 0.26)
+    assert(dv.dot(sv) === 0.26)
+  }
 }
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..6f671405b8343
--- /dev/null
+++ b/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt
@@ -0,0 +1,12 @@
+================================================================================================
+VectorUDT de/serialization
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+serialize                                           269            292          13          0.0      269441.1       1.0X
+deserialize                                         164            191           9          0.0      164314.6       1.6X
+
+
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
index 169f4c60c748e..a0c853e99014b 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
@@ -2,12 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
-
-VectorUDT de/serialization:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-serialize                                      144 /  206          0.0      143979.7       1.0X
-deserialize                                    114 /  135          0.0      113802.6       1.3X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+serialize                                           271            294          12          0.0      271054.3       1.0X
+deserialize                                         190            192           2          0.0      189706.1       1.4X
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index 58815434cbdaf..9eac8ed22a3f6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml
 
 import org.apache.spark.annotation.{DeveloperApi, Since}
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
@@ -62,6 +62,39 @@ private[ml] trait PredictorParams extends Params
     }
     SchemaUtils.appendColumn(schema, $(predictionCol), DoubleType)
   }
+
+  /**
+   * Extract [[labelCol]], weightCol(if any) and [[featuresCol]] from the given dataset,
+   * and put it in an RDD with strong types.
+   */
+  protected def extractInstances(dataset: Dataset[_]): RDD[Instance] = {
+    val w = this match {
+      case p: HasWeightCol =>
+        if (isDefined(p.weightCol) && $(p.weightCol).nonEmpty) {
+          col($(p.weightCol)).cast(DoubleType)
+        } else {
+          lit(1.0)
+        }
+    }
+
+    dataset.select(col($(labelCol)).cast(DoubleType), w, col($(featuresCol))).rdd.map {
+      case Row(label: Double, weight: Double, features: Vector) =>
+        Instance(label, weight, features)
+    }
+  }
+
+  /**
+   * Extract [[labelCol]], weightCol(if any) and [[featuresCol]] from the given dataset,
+   * and put it in an RDD with strong types.
+   * Validate the output instances with the given function.
+   */
+  protected def extractInstances(dataset: Dataset[_],
+                                 validateInstance: Instance => Unit): RDD[Instance] = {
+    extractInstances(dataset).map { instance =>
+      validateInstance(instance)
+      instance
+    }
+  }
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
index b6b02e77909bd..9ac673078d4ad 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.classification
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams}
-import org.apache.spark.ml.feature.LabeledPoint
+import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param.shared.HasRawPredictionCol
 import org.apache.spark.ml.util.{MetadataUtils, SchemaUtils}
@@ -42,6 +42,22 @@ private[spark] trait ClassifierParams
     val parentSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType)
     SchemaUtils.appendColumn(parentSchema, $(rawPredictionCol), new VectorUDT)
   }
+
+  /**
+   * Extract [[labelCol]], weightCol(if any) and [[featuresCol]] from the given dataset,
+   * and put it in an RDD with strong types.
+   * Validates the label on the classifier is a valid integer in the range [0, numClasses).
+   */
+  protected def extractInstances(dataset: Dataset[_],
+                                 numClasses: Int): RDD[Instance] = {
+    val validateInstance = (instance: Instance) => {
+      val label = instance.label
+      require(label.toLong == label && label >= 0 && label < numClasses, s"Classifier was given" +
+        s" dataset with invalid label $label.  Labels must be integers in range" +
+        s" [0, $numClasses).")
+    }
+    extractInstances(dataset, validateInstance)
+  }
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 6bd8a26f5b1a8..2d0212f36fad4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -22,7 +22,7 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.feature.{Instance, LabeledPoint}
+import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
@@ -34,9 +34,8 @@ import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy}
 import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeModel}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit, udf}
-import org.apache.spark.sql.types.DoubleType
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.functions.{col, udf}
 
 /**
  * Decision tree learning algorithm (http://en.wikipedia.org/wiki/Decision_tree_learning)
@@ -116,9 +115,8 @@ class DecisionTreeClassifier @Since("1.4.0") (
       dataset: Dataset[_]): DecisionTreeClassificationModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
-    val categoricalFeatures: Map[Int, Int] =
-      MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
-    val numClasses: Int = getNumClasses(dataset)
+    val categoricalFeatures = MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
+    val numClasses = getNumClasses(dataset)
 
     if (isDefined(thresholds)) {
       require($(thresholds).length == numClasses, this.getClass.getSimpleName +
@@ -126,13 +124,7 @@ class DecisionTreeClassifier @Since("1.4.0") (
         s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
     }
     validateNumClasses(numClasses)
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
-    val instances =
-      dataset.select(col($(labelCol)).cast(DoubleType), w, col($(featuresCol))).rdd.map {
-        case Row(label: Double, weight: Double, features: Vector) =>
-          validateLabel(label, numClasses)
-          Instance(label, weight, features)
-      }
+    val instances = extractInstances(dataset, numClasses)
     val strategy = getOldStrategy(categoricalFeatures, numClasses)
     instr.logNumClasses(numClasses)
     instr.logParams(this, labelCol, featuresCol, predictionCol, rawPredictionCol,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 78503585261bf..e467228b4cc14 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -36,9 +36,7 @@ import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.linalg.VectorImplicits._
 import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit}
 
 /** Params for linear SVM Classifier. */
 private[classification] trait LinearSVCParams extends ClassifierParams with HasRegParam
@@ -161,12 +159,7 @@ class LinearSVC @Since("2.2.0") (
   override def copy(extra: ParamMap): LinearSVC = defaultCopy(extra)
 
   override protected def train(dataset: Dataset[_]): LinearSVCModel = instrumented { instr =>
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
-    val instances: RDD[Instance] =
-      dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd.map {
-        case Row(label: Double, weight: Double, features: Vector) =>
-          Instance(label, weight, features)
-      }
+    val instances = extractInstances(dataset)
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 0997c1e7b38d6..af6e2b39ecb60 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -40,9 +40,8 @@ import org.apache.spark.mllib.evaluation.{BinaryClassificationMetrics, Multiclas
 import org.apache.spark.mllib.linalg.VectorImplicits._
 import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
 import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit}
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.VersionUtils
@@ -492,12 +491,7 @@ class LogisticRegression @Since("1.2.0") (
   protected[spark] def train(
       dataset: Dataset[_],
       handlePersistence: Boolean): LogisticRegressionModel = instrumented { instr =>
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
-    val instances: RDD[Instance] =
-      dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd.map {
-        case Row(label: Double, weight: Double, features: Vector) =>
-          Instance(label, weight, features)
-      }
+    val instances = extractInstances(dataset)
 
     if (handlePersistence) instances.persist(StorageLevel.MEMORY_AND_DISK)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 47b8a8df637b9..41db6f3f44342 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -24,7 +24,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.ann.{FeedForwardTopology, FeedForwardTrainer}
 import org.apache.spark.ml.feature.OneHotEncoderModel
-import org.apache.spark.ml.linalg.{Vector, Vectors}
+import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index e97af0582d358..205f565aa2685 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -21,6 +21,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.PredictorParams
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.HasWeightCol
@@ -28,7 +29,7 @@ import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.sql.{Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit}
+import org.apache.spark.sql.functions.col
 
 /**
  * Params for Naive Bayes Classifiers.
@@ -137,17 +138,14 @@ class NaiveBayes @Since("1.5.0") (
         s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
     }
 
-    val modelTypeValue = $(modelType)
-    val requireValues: Vector => Unit = {
-      modelTypeValue match {
-        case Multinomial =>
-          requireNonnegativeValues
-        case Bernoulli =>
-          requireZeroOneBernoulliValues
-        case _ =>
-          // This should never happen.
-          throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}.")
-      }
+    val validateInstance = $(modelType) match {
+      case Multinomial =>
+        (instance: Instance) => requireNonnegativeValues(instance.features)
+      case Bernoulli =>
+        (instance: Instance) => requireZeroOneBernoulliValues(instance.features)
+      case _ =>
+        // This should never happen.
+        throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}.")
     }
 
     instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, rawPredictionCol,
@@ -155,17 +153,15 @@ class NaiveBayes @Since("1.5.0") (
 
     val numFeatures = dataset.select(col($(featuresCol))).head().getAs[Vector](0).size
     instr.logNumFeatures(numFeatures)
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
 
     // Aggregates term frequencies per label.
     // TODO: Calling aggregateByKey and collect creates two stages, we can implement something
     // TODO: similar to reduceByKeyLocally to save one stage.
-    val aggregated = dataset.select(col($(labelCol)), w, col($(featuresCol))).rdd
-      .map { row => (row.getDouble(0), (row.getDouble(1), row.getAs[Vector](2)))
-      }.aggregateByKey[(Double, DenseVector, Long)]((0.0, Vectors.zeros(numFeatures).toDense, 0L))(
+    val aggregated = extractInstances(dataset, validateInstance).map { instance =>
+      (instance.label, (instance.weight, instance.features))
+    }.aggregateByKey[(Double, DenseVector, Long)]((0.0, Vectors.zeros(numFeatures).toDense, 0L))(
       seqOp = {
          case ((weightSum, featureSum, count), (weight, features)) =>
-           requireValues(features)
            BLAS.axpy(weight, features, featureSum)
            (weightSum + weight, featureSum, count + 1)
       },
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 86caa1247e77f..979eb5e5448a8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -33,8 +33,8 @@ import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.linalg.{Matrices => OldMatrices, Matrix => OldMatrix,
   Vector => OldVector, Vectors => OldVectors}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SparkSession}
-import org.apache.spark.sql.functions.udf
+import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
+import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.{IntegerType, StructType}
 import org.apache.spark.storage.StorageLevel
 
@@ -111,28 +111,32 @@ class GaussianMixtureModel private[ml] (
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
 
-    var predictionColNames = Seq.empty[String]
-    var predictionColumns = Seq.empty[Column]
-
-    if ($(predictionCol).nonEmpty) {
-      val predUDF = udf((vector: Vector) => predict(vector))
-      predictionColNames :+= $(predictionCol)
-      predictionColumns :+= predUDF(DatasetUtils.columnToVector(dataset, getFeaturesCol))
-    }
+    val vectorCol = DatasetUtils.columnToVector(dataset, $(featuresCol))
+    var outputData = dataset
+    var numColsOutput = 0
 
     if ($(probabilityCol).nonEmpty) {
       val probUDF = udf((vector: Vector) => predictProbability(vector))
-      predictionColNames :+= $(probabilityCol)
-      predictionColumns :+= probUDF(DatasetUtils.columnToVector(dataset, getFeaturesCol))
+      outputData = outputData.withColumn($(probabilityCol), probUDF(vectorCol))
+      numColsOutput += 1
+    }
+
+    if ($(predictionCol).nonEmpty) {
+      if ($(probabilityCol).nonEmpty) {
+        val predUDF = udf((vector: Vector) => vector.argmax)
+        outputData = outputData.withColumn($(predictionCol), predUDF(col($(probabilityCol))))
+      } else {
+        val predUDF = udf((vector: Vector) => predict(vector))
+        outputData = outputData.withColumn($(predictionCol), predUDF(vectorCol))
+      }
+      numColsOutput += 1
     }
 
-    if (predictionColNames.nonEmpty) {
-      dataset.withColumns(predictionColNames, predictionColumns)
-    } else {
+    if (numColsOutput == 0) {
       this.logWarning(s"$uid: GaussianMixtureModel.transform() does nothing" +
         " because no output columns were set.")
-      dataset.toDF()
     }
+    outputData.toDF
   }
 
   @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
index 2a7b3c579b078..09e8e7b232f3a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@@ -59,6 +59,28 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
   @Since("1.2.0")
   def setMetricName(value: String): this.type = set(metricName, value)
 
+  /**
+   * param for number of bins to down-sample the curves (ROC curve, PR curve) in area
+   * computation. If 0, no down-sampling will occur.
+   * Default: 1000.
+   * @group expertParam
+   */
+  @Since("3.0.0")
+  val numBins: IntParam = new IntParam(this, "numBins", "Number of bins to down-sample " +
+    "the curves (ROC curve, PR curve) in area computation. If 0, no down-sampling will occur. " +
+    "Must be >= 0.",
+    ParamValidators.gtEq(0))
+
+  /** @group expertGetParam */
+  @Since("3.0.0")
+  def getNumBins: Int = $(numBins)
+
+  /** @group expertSetParam */
+  @Since("3.0.0")
+  def setNumBins(value: Int): this.type = set(numBins, value)
+
+  setDefault(numBins -> 1000)
+
   /** @group setParam */
   @Since("1.5.0")
   def setRawPredictionCol(value: String): this.type = set(rawPredictionCol, value)
@@ -94,7 +116,7 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
         case Row(rawPrediction: Double, label: Double, weight: Double) =>
           (rawPrediction, label, weight)
       }
-    val metrics = new BinaryClassificationMetrics(scoreAndLabelsWithWeights)
+    val metrics = new BinaryClassificationMetrics(scoreAndLabelsWithWeights, $(numBins))
     val metric = $(metricName) match {
       case "areaUnderROC" => metrics.areaUnderROC()
       case "areaUnderPR" => metrics.areaUnderPR()
@@ -104,10 +126,7 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
   }
 
   @Since("1.5.0")
-  override def isLargerBetter: Boolean = $(metricName) match {
-    case "areaUnderROC" => true
-    case "areaUnderPR" => true
-  }
+  override def isLargerBetter: Boolean = true
 
   @Since("1.4.1")
   override def copy(extra: ParamMap): BinaryClassificationEvaluator = defaultCopy(extra)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
index dd667a85fa598..b0cafefe420a3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.ml.evaluation
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
+import org.apache.spark.ml.param.{BooleanParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasWeightCol}
 import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
 import org.apache.spark.mllib.evaluation.RegressionMetrics
@@ -43,13 +43,14 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
    *  - `"mse"`: mean squared error
    *  - `"r2"`: R^2^ metric
    *  - `"mae"`: mean absolute error
+   *  - `"var"`: explained variance
    *
    * @group param
    */
   @Since("1.4.0")
   val metricName: Param[String] = {
-    val allowedParams = ParamValidators.inArray(Array("mse", "rmse", "r2", "mae"))
-    new Param(this, "metricName", "metric name in evaluation (mse|rmse|r2|mae)", allowedParams)
+    val allowedParams = ParamValidators.inArray(Array("mse", "rmse", "r2", "mae", "var"))
+    new Param(this, "metricName", "metric name in evaluation (mse|rmse|r2|mae|var)", allowedParams)
   }
 
   /** @group getParam */
@@ -60,6 +61,25 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
   @Since("1.4.0")
   def setMetricName(value: String): this.type = set(metricName, value)
 
+  /**
+   * param for whether the regression is through the origin.
+   * Default: false.
+   * @group expertParam
+   */
+  @Since("3.0.0")
+  val throughOrigin: BooleanParam = new BooleanParam(this, "throughOrigin",
+    "Whether the regression is through the origin.")
+
+  /** @group expertGetParam */
+  @Since("3.0.0")
+  def getThroughOrigin: Boolean = $(throughOrigin)
+
+  /** @group expertSetParam */
+  @Since("3.0.0")
+  def setThroughOrigin(value: Boolean): this.type = set(throughOrigin, value)
+
+  setDefault(throughOrigin -> false)
+
   /** @group setParam */
   @Since("1.4.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
@@ -86,22 +106,20 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
       .rdd
       .map { case Row(prediction: Double, label: Double, weight: Double) =>
         (prediction, label, weight) }
-    val metrics = new RegressionMetrics(predictionAndLabelsWithWeights)
-    val metric = $(metricName) match {
+    val metrics = new RegressionMetrics(predictionAndLabelsWithWeights, $(throughOrigin))
+    $(metricName) match {
       case "rmse" => metrics.rootMeanSquaredError
       case "mse" => metrics.meanSquaredError
       case "r2" => metrics.r2
       case "mae" => metrics.meanAbsoluteError
+      case "var" => metrics.explainedVariance
     }
-    metric
   }
 
   @Since("1.4.0")
   override def isLargerBetter: Boolean = $(metricName) match {
-    case "rmse" => false
-    case "mse" => false
-    case "r2" => true
-    case "mae" => false
+    case "r2" | "var" => true
+    case _ => false
   }
 
   @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
index 2b0862c60fdf7..c4daf64dfc5f0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
@@ -75,30 +75,40 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
     val schema = dataset.schema
     val inputType = schema($(inputCol)).dataType
     val td = $(threshold)
+    val metadata = outputSchema($(outputCol)).metadata
 
-    val binarizerDouble = udf { in: Double => if (in > td) 1.0 else 0.0 }
-    val binarizerVector = udf { (data: Vector) =>
-      val indices = ArrayBuilder.make[Int]
-      val values = ArrayBuilder.make[Double]
-
-      data.foreachActive { (index, value) =>
-        if (value > td) {
-          indices += index
-          values +=  1.0
+    val binarizerUDF = inputType match {
+      case DoubleType =>
+        udf { in: Double => if (in > td) 1.0 else 0.0 }
+
+      case _: VectorUDT if td >= 0 =>
+        udf { vector: Vector =>
+          val indices = ArrayBuilder.make[Int]
+          val values = ArrayBuilder.make[Double]
+          vector.foreachActive { (index, value) =>
+            if (value > td) {
+              indices += index
+              values +=  1.0
+            }
+          }
+          Vectors.sparse(vector.size, indices.result(), values.result()).compressed
         }
-      }
 
-      Vectors.sparse(data.size, indices.result(), values.result()).compressed
+      case _: VectorUDT if td < 0 =>
+        this.logWarning(s"Binarization operations on sparse dataset with negative threshold " +
+          s"$td will build a dense output, so take care when applying to sparse input.")
+        udf { vector: Vector =>
+          val values = Array.fill(vector.size)(1.0)
+          vector.foreachActive { (index, value) =>
+            if (value <= td) {
+              values(index) = 0.0
+            }
+          }
+          Vectors.dense(values).compressed
+        }
     }
 
-    val metadata = outputSchema($(outputCol)).metadata
-
-    inputType match {
-      case DoubleType =>
-        dataset.select(col("*"), binarizerDouble(col($(inputCol))).as($(outputCol), metadata))
-      case _: VectorUDT =>
-        dataset.select(col("*"), binarizerVector(col($(inputCol))).as($(outputCol), metadata))
-    }
+    dataset.withColumn($(outputCol), binarizerUDF(col($(inputCol))), metadata)
   }
 
   @Since("1.4.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
index 32d98151bdcff..84d6a536ccca8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.feature
 
-import edu.emory.mathcs.jtransforms.dct._
+import org.jtransforms.dct._
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.UnaryTransformer
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index 5bfaa3b7f3f52..f7a83cdd41a90 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -167,25 +167,38 @@ final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val ui
   @Since("2.3.0")
   def setOutputCols(value: Array[String]): this.type = set(outputCols, value)
 
-  private[feature] def getInOutCols: (Array[String], Array[String]) = {
-    require((isSet(inputCol) && isSet(outputCol) && !isSet(inputCols) && !isSet(outputCols)) ||
-      (!isSet(inputCol) && !isSet(outputCol) && isSet(inputCols) && isSet(outputCols)),
-      "QuantileDiscretizer only supports setting either inputCol/outputCol or" +
-        "inputCols/outputCols."
-    )
+  @Since("1.6.0")
+  override def transformSchema(schema: StructType): StructType = {
+    ParamValidators.checkSingleVsMultiColumnParams(this, Seq(outputCol),
+      Seq(outputCols))
 
     if (isSet(inputCol)) {
-      (Array($(inputCol)), Array($(outputCol)))
-    } else {
-      require($(inputCols).length == $(outputCols).length,
-        "inputCols number do not match outputCols")
-      ($(inputCols), $(outputCols))
+      require(!isSet(numBucketsArray),
+        s"numBucketsArray can't be set for single-column QuantileDiscretizer.")
     }
-  }
 
-  @Since("1.6.0")
-  override def transformSchema(schema: StructType): StructType = {
-    val (inputColNames, outputColNames) = getInOutCols
+    if (isSet(inputCols)) {
+      require(getInputCols.length == getOutputCols.length,
+        s"QuantileDiscretizer $this has mismatched Params " +
+          s"for multi-column transform.  Params (inputCols, outputCols) should have " +
+          s"equal lengths, but they have different lengths: " +
+          s"(${getInputCols.length}, ${getOutputCols.length}).")
+      if (isSet(numBucketsArray)) {
+        require(getInputCols.length == getNumBucketsArray.length,
+          s"QuantileDiscretizer $this has mismatched Params " +
+            s"for multi-column transform.  Params (inputCols, outputCols, numBucketsArray) " +
+            s"should have equal lengths, but they have different lengths: " +
+            s"(${getInputCols.length}, ${getOutputCols.length}, ${getNumBucketsArray.length}).")
+        require(!isSet(numBuckets),
+          s"exactly one of numBuckets, numBucketsArray Params to be set, but both are set." )
+      }
+    }
+
+    val (inputColNames, outputColNames) = if (isSet(inputCols)) {
+      ($(inputCols).toSeq, $(outputCols).toSeq)
+    } else {
+      (Seq($(inputCol)), Seq($(outputCol)))
+    }
     val existingFields = schema.fields
     var outputFields = existingFields
     inputColNames.zip(outputColNames).foreach { case (inputColName, outputColName) =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index 6c0d5fc70ab4e..df7d17059980b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -392,7 +392,7 @@ class RFormulaModel private[feature](
     }
   }
 
-  private def checkCanTransform(schema: StructType) {
+  private def checkCanTransform(schema: StructType): Unit = {
     val columnNames = schema.map(_.name)
     require(!columnNames.contains($(featuresCol)), "Features column already exists.")
     require(
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index fb7334d41ba44..bf6e8ec8f37b8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 import org.json4s.JsonDSL._
 
-import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContext}
+import org.apache.spark.{Dependency, Partitioner, ShuffleDependency, SparkContext, SparkException}
 import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
@@ -42,7 +42,7 @@ import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.linalg.CholeskyDecomposition
 import org.apache.spark.mllib.optimization.NNLS
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{DeterministicLevel, RDD}
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
@@ -564,6 +564,13 @@ object ALSModel extends MLReadable[ALSModel] {
  * r is greater than 0 and 0 if r is less than or equal to 0. The ratings then act as 'confidence'
  * values related to strength of indicated user
  * preferences rather than explicit ratings given to items.
+ *
+ * Note: the input rating dataset to the ALS implementation should be deterministic.
+ * Nondeterministic data can cause failure during fitting ALS model.
+ * For example, an order-sensitive operation like sampling after a repartition makes dataset
+ * output nondeterministic, like `dataset.repartition(2).sample(false, 0.5, 1618)`.
+ * Checkpointing sampled dataset or adding a sort before sampling can help make the dataset
+ * deterministic.
  */
 @Since("1.3.0")
 class ALS(@Since("1.4.0") override val uid: String) extends Estimator[ALSModel] with ALSParams
@@ -794,7 +801,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
      * Given a triangular matrix in the order of fillXtX above, compute the full symmetric square
      * matrix that it represents, storing it into destMatrix.
      */
-    private def fillAtA(triAtA: Array[Double], lambda: Double) {
+    private def fillAtA(triAtA: Array[Double], lambda: Double): Unit = {
       var i = 0
       var pos = 0
       var a = 0.0
@@ -1666,6 +1673,13 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
         }
     }
     val merged = srcOut.groupByKey(new ALSPartitioner(dstInBlocks.partitions.length))
+
+    // SPARK-28927: Nondeterministic RDDs causes inconsistent in/out blocks in case of rerun.
+    // It can cause runtime error when matching in/out user/item blocks.
+    val isBlockRDDNondeterministic =
+      dstInBlocks.outputDeterministicLevel == DeterministicLevel.INDETERMINATE ||
+        srcOutBlocks.outputDeterministicLevel == DeterministicLevel.INDETERMINATE
+
     dstInBlocks.join(merged).mapValues {
       case (InBlock(dstIds, srcPtrs, srcEncodedIndices, ratings), srcFactors) =>
         val sortedSrcFactors = new Array[FactorBlock](numSrcBlocks)
@@ -1686,7 +1700,19 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
             val encoded = srcEncodedIndices(i)
             val blockId = srcEncoder.blockId(encoded)
             val localIndex = srcEncoder.localIndex(encoded)
-            val srcFactor = sortedSrcFactors(blockId)(localIndex)
+            var srcFactor: Array[Float] = null
+            try {
+              srcFactor = sortedSrcFactors(blockId)(localIndex)
+            } catch {
+              case a: ArrayIndexOutOfBoundsException if isBlockRDDNondeterministic =>
+                val errMsg = "A failure detected when matching In/Out blocks of users/items. " +
+                  "Because at least one In/Out block RDD is found to be nondeterministic now, " +
+                  "the issue is probably caused by nondeterministic input data. You can try to " +
+                  "checkpoint training data to make it deterministic. If you do `repartition` + " +
+                  "`sample` or `randomSplit`, you can also try to sort it before `sample` or " +
+                  "`randomSplit` to make it deterministic."
+                throw new SparkException(errMsg, a)
+            }
             val rating = ratings(i)
             if (implicitPrefs) {
               // Extension to the original paper to handle rating < 0. confidence is a function
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 106be1b78af47..602b5fac20d3b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -23,7 +23,7 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{PredictionModel, Predictor}
-import org.apache.spark.ml.feature.{Instance, LabeledPoint}
+import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
@@ -34,9 +34,8 @@ import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy}
 import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeModel}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
+import org.apache.spark.sql.{Column, DataFrame, Dataset}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.DoubleType
 
 
 /**
@@ -118,12 +117,7 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
       dataset: Dataset[_]): DecisionTreeRegressionModel = instrumented { instr =>
     val categoricalFeatures: Map[Int, Int] =
       MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
-    val instances =
-      dataset.select(col($(labelCol)).cast(DoubleType), w, col($(featuresCol))).rdd.map {
-        case Row(label: Double, weight: Double, features: Vector) =>
-          Instance(label, weight, features)
-      }
+    val instances = extractInstances(dataset)
     val strategy = getOldStrategy(categoricalFeatures)
 
     instr.logPipelineStage(this)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index a226ca49e6deb..4dc0c247ce331 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1036,31 +1036,33 @@ class GeneralizedLinearRegressionModel private[ml] (
   }
 
   override protected def transformImpl(dataset: Dataset[_]): DataFrame = {
-    var predictionColNames = Seq.empty[String]
-    var predictionColumns = Seq.empty[Column]
-
     val offset = if (!hasOffsetCol) lit(0.0) else col($(offsetCol)).cast(DoubleType)
+    var outputData = dataset
+    var numColsOutput = 0
 
-    if ($(predictionCol).nonEmpty) {
-      val predictUDF = udf { (features: Vector, offset: Double) => predict(features, offset) }
-      predictionColNames :+= $(predictionCol)
-      predictionColumns :+= predictUDF(col($(featuresCol)), offset)
+    if (hasLinkPredictionCol) {
+      val predLinkUDF = udf((features: Vector, offset: Double) => predictLink(features, offset))
+      outputData = outputData
+        .withColumn($(linkPredictionCol), predLinkUDF(col($(featuresCol)), offset))
+      numColsOutput += 1
     }
 
-    if (hasLinkPredictionCol) {
-      val predictLinkUDF =
-        udf { (features: Vector, offset: Double) => predictLink(features, offset) }
-      predictionColNames :+= $(linkPredictionCol)
-      predictionColumns :+= predictLinkUDF(col($(featuresCol)), offset)
+    if ($(predictionCol).nonEmpty) {
+      if (hasLinkPredictionCol) {
+        val predUDF = udf((eta: Double) => familyAndLink.fitted(eta))
+        outputData = outputData.withColumn($(predictionCol), predUDF(col($(linkPredictionCol))))
+      } else {
+        val predUDF = udf((features: Vector, offset: Double) => predict(features, offset))
+        outputData = outputData.withColumn($(predictionCol), predUDF(col($(featuresCol)), offset))
+      }
+      numColsOutput += 1
     }
 
-    if (predictionColNames.nonEmpty) {
-      dataset.withColumns(predictionColNames, predictionColumns)
-    } else {
+    if (numColsOutput == 0) {
       this.logWarning(s"$uid: GeneralizedLinearRegressionModel.transform() does nothing" +
         " because no output columns were set.")
-      dataset.toDF()
     }
+    outputData.toDF
   }
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index abf75d70ea028..4c600eac26b37 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -43,7 +43,6 @@ import org.apache.spark.mllib.linalg.VectorImplicits._
 import org.apache.spark.mllib.regression.{LinearRegressionModel => OldLinearRegressionModel}
 import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
 import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
@@ -320,13 +319,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
   override protected def train(dataset: Dataset[_]): LinearRegressionModel = instrumented { instr =>
     // Extract the number of features before deciding optimization solver.
     val numFeatures = dataset.select(col($(featuresCol))).first().getAs[Vector](0).size
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0) else col($(weightCol))
 
-    val instances: RDD[Instance] = dataset.select(
-      col($(labelCol)), w, col($(featuresCol))).rdd.map {
-      case Row(label: Double, weight: Double, features: Vector) =>
-        Instance(label, weight, features)
-    }
+    val instances = extractInstances(dataset)
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
index c0a1683d3cb6f..314cf422be87e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala
@@ -28,7 +28,8 @@ import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams}
  * @tparam Learner  Concrete Estimator type
  * @tparam M  Concrete Model type
  */
-private[spark] abstract class Regressor[
+@DeveloperApi
+abstract class Regressor[
     FeaturesType,
     Learner <: Regressor[FeaturesType, Learner, M],
     M <: RegressionModel[FeaturesType, M]]
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
index 8f8a17171f980..6c194902a750b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
@@ -90,7 +90,7 @@ private[spark] class DecisionTreeMetadata(
    * Set number of splits for a continuous feature.
    * For a continuous feature, number of bins is number of splits plus 1.
    */
-  def setNumSplits(featureIndex: Int, numSplits: Int) {
+  def setNumSplits(featureIndex: Int, numSplits: Int): Unit = {
     require(isContinuous(featureIndex),
       s"Only number of bin for a continuous feature can be set.")
     numBins(featureIndex) = numSplits + 1
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
index 8cd4a7ca9493b..58a763257af20 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
@@ -205,21 +205,21 @@ private[spark] class OptionalInstrumentation private(
 
   protected override def logName: String = className
 
-  override def logInfo(msg: => String) {
+  override def logInfo(msg: => String): Unit = {
     instrumentation match {
       case Some(instr) => instr.logInfo(msg)
       case None => super.logInfo(msg)
     }
   }
 
-  override def logWarning(msg: => String) {
+  override def logWarning(msg: => String): Unit = {
     instrumentation match {
       case Some(instr) => instr.logWarning(msg)
       case None => super.logWarning(msg)
     }
   }
 
-  override def logError(msg: => String) {
+  override def logError(msg: => String): Unit = {
     instrumentation match {
       case Some(instr) => instr.logError(msg)
       case None => super.logError(msg)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
index 4617073f9decd..bafaafb720ed8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala
@@ -347,7 +347,6 @@ private[python] class PythonMLLibAPI extends Serializable {
       data: JavaRDD[Vector],
       k: Int,
       maxIterations: Int,
-      runs: Int,
       initializationMode: String,
       seed: java.lang.Long,
       initializationSteps: Int,
@@ -1312,7 +1311,7 @@ private[spark] abstract class SerDeBase {
       }
     }
 
-    private[python] def saveState(obj: Object, out: OutputStream, pickler: Pickler)
+    private[python] def saveState(obj: Object, out: OutputStream, pickler: Pickler): Unit
   }
 
   def dumps(obj: AnyRef): Array[Byte] = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
index d86aa01c9195a..df888bc3d5d51 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala
@@ -224,117 +224,11 @@ class LogisticRegressionWithSGD private[mllib] (
     .setMiniBatchFraction(miniBatchFraction)
   override protected val validators = List(DataValidators.binaryLabelValidator)
 
-  /**
-   * Construct a LogisticRegression object with default parameters: {stepSize: 1.0,
-   * numIterations: 100, regParm: 0.01, miniBatchFraction: 1.0}.
-   */
-  @Since("0.8.0")
-  @deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
-  def this() = this(1.0, 100, 0.01, 1.0)
-
   override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
     new LogisticRegressionModel(weights, intercept)
   }
 }
 
-/**
- * Top-level methods for calling Logistic Regression using Stochastic Gradient Descent.
- *
- * @note Labels used in Logistic Regression should be {0, 1}
- */
-@Since("0.8.0")
-@deprecated("Use ml.classification.LogisticRegression or LogisticRegressionWithLBFGS", "2.0.0")
-object LogisticRegressionWithSGD {
-  // NOTE(shivaram): We use multiple train methods instead of default arguments to support
-  // Java programs.
-
-  /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
-   * number of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate the gradient. The weights used in
-   * gradient descent are initialized using the initial weights provided.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
-   *        the number of features in the data.
-   *
-   * @note Labels used in Logistic Regression should be {0, 1}
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double,
-      initialWeights: Vector): LogisticRegressionModel = {
-    new LogisticRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction)
-      .run(input, initialWeights)
-  }
-
-  /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
-   * number of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate the gradient.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   *
-   * @note Labels used in Logistic Regression should be {0, 1}
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double): LogisticRegressionModel = {
-    new LogisticRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction)
-      .run(input)
-  }
-
-  /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
-   * number of iterations of gradient descent using the specified step size. We use the entire data
-   * set to update the gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param stepSize Step size to be used for each iteration of Gradient Descent.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LogisticRegressionModel which has the weights and offset from training.
-   *
-   * @note Labels used in Logistic Regression should be {0, 1}
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double): LogisticRegressionModel = {
-    train(input, numIterations, stepSize, 1.0)
-  }
-
-  /**
-   * Train a logistic regression model given an RDD of (label, features) pairs. We run a fixed
-   * number of iterations of gradient descent using a step size of 1.0. We use the entire data set
-   * to update the gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LogisticRegressionModel which has the weights and offset from training.
-   *
-   * @note Labels used in Logistic Regression should be {0, 1}
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int): LogisticRegressionModel = {
-    train(input, numIterations, 1.0, 1.0)
-  }
-}
-
 /**
  * Train a classification model for Multinomial/Binary Logistic Regression using
  * Limited-memory BFGS. Standard feature scaling and L2 regularization are used by default.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 4bb79bc69eef4..278d61d916735 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -479,58 +479,6 @@ object KMeans {
       .run(data)
   }
 
-  /**
-   * Trains a k-means model using the given set of parameters.
-   *
-   * @param data Training points as an `RDD` of `Vector` types.
-   * @param k Number of clusters to create.
-   * @param maxIterations Maximum number of iterations allowed.
-   * @param runs This param has no effect since Spark 2.0.0.
-   * @param initializationMode The initialization algorithm. This can either be "random" or
-   *                           "k-means||". (default: "k-means||")
-   * @param seed Random seed for cluster initialization. Default is to generate seed based
-   *             on system time.
-   */
-  @Since("1.3.0")
-  @deprecated("Use train method without 'runs'", "2.1.0")
-  def train(
-      data: RDD[Vector],
-      k: Int,
-      maxIterations: Int,
-      runs: Int,
-      initializationMode: String,
-      seed: Long): KMeansModel = {
-    new KMeans().setK(k)
-      .setMaxIterations(maxIterations)
-      .setInitializationMode(initializationMode)
-      .setSeed(seed)
-      .run(data)
-  }
-
-  /**
-   * Trains a k-means model using the given set of parameters.
-   *
-   * @param data Training points as an `RDD` of `Vector` types.
-   * @param k Number of clusters to create.
-   * @param maxIterations Maximum number of iterations allowed.
-   * @param runs This param has no effect since Spark 2.0.0.
-   * @param initializationMode The initialization algorithm. This can either be "random" or
-   *                           "k-means||". (default: "k-means||")
-   */
-  @Since("0.8.0")
-  @deprecated("Use train method without 'runs'", "2.1.0")
-  def train(
-      data: RDD[Vector],
-      k: Int,
-      maxIterations: Int,
-      runs: Int,
-      initializationMode: String): KMeansModel = {
-    new KMeans().setK(k)
-      .setMaxIterations(maxIterations)
-      .setInitializationMode(initializationMode)
-      .run(data)
-  }
-
   /**
    * Trains a k-means model using specified parameters and the default values for unspecified.
    */
@@ -544,21 +492,6 @@ object KMeans {
       .run(data)
   }
 
-  /**
-   * Trains a k-means model using specified parameters and the default values for unspecified.
-   */
-  @Since("0.8.0")
-  @deprecated("Use train method without 'runs'", "2.1.0")
-  def train(
-      data: RDD[Vector],
-      k: Int,
-      maxIterations: Int,
-      runs: Int): KMeansModel = {
-    new KMeans().setK(k)
-      .setMaxIterations(maxIterations)
-      .run(data)
-  }
-
   private[spark] def validateInitMode(initMode: String): Boolean = {
     initMode match {
       case KMeans.RANDOM => true
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index ff4ca0ac40fe2..c7d44e8752cd9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -269,7 +269,7 @@ class StreamingKMeans @Since("1.2.0") (
    * @param data DStream containing vector data
    */
   @Since("1.2.0")
-  def trainOn(data: DStream[Vector]) {
+  def trainOn(data: DStream[Vector]): Unit = {
     assertInitialized()
     data.foreachRDD { (rdd, time) =>
       model = model.update(rdd, decayFactor, timeUnit)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index d34a7ca6c9c7f..f4e2040569f48 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -81,7 +81,7 @@ class BinaryClassificationMetrics @Since("3.0.0") (
    * Unpersist intermediate RDDs used in the computation.
    */
   @Since("1.0.0")
-  def unpersist() {
+  def unpersist(): Unit = {
     cumulativeCounts.unpersist()
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
index 82f5b279846ba..b771e077b02ac 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala
@@ -44,17 +44,6 @@ class ChiSqSelectorModel @Since("1.3.0") (
 
   private val filterIndices = selectedFeatures.sorted
 
-  @deprecated("not intended for subclasses to use", "2.1.0")
-  protected def isSorted(array: Array[Int]): Boolean = {
-    var i = 1
-    val len = array.length
-    while (i < len) {
-      if (array(i) < array(i-1)) return false
-      i += 1
-    }
-    true
-  }
-
   /**
    * Applies transformation on a vector.
    *
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
index cb97742245689..1f5558dc2a50e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -285,7 +285,7 @@ private[spark] object BLAS extends Serializable with Logging {
    * @param x the vector x that contains the n elements.
    * @param A the symmetric matrix A. Size of n x n.
    */
-  def syr(alpha: Double, x: Vector, A: DenseMatrix) {
+  def syr(alpha: Double, x: Vector, A: DenseMatrix): Unit = {
     val mA = A.numRows
     val nA = A.numCols
     require(mA == nA, s"A is not a square matrix (and hence is not symmetric). A: $mA x $nA")
@@ -299,7 +299,7 @@ private[spark] object BLAS extends Serializable with Logging {
     }
   }
 
-  private def syr(alpha: Double, x: DenseVector, A: DenseMatrix) {
+  private def syr(alpha: Double, x: DenseVector, A: DenseMatrix): Unit = {
     val nA = A.numRows
     val mA = A.numCols
 
@@ -317,7 +317,7 @@ private[spark] object BLAS extends Serializable with Logging {
     }
   }
 
-  private def syr(alpha: Double, x: SparseVector, A: DenseMatrix) {
+  private def syr(alpha: Double, x: SparseVector, A: DenseMatrix): Unit = {
     val mA = A.numCols
     val xIndices = x.indices
     val xValues = x.values
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index e474cfa002fad..0304fd88dcd9f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -155,7 +155,7 @@ sealed trait Matrix extends Serializable {
    *          and column indices respectively with the type `Int`, and the final parameter is the
    *          corresponding value in the matrix with type `Double`.
    */
-  private[spark] def foreachActive(f: (Int, Int, Double) => Unit)
+  private[spark] def foreachActive(f: (Int, Int, Double) => Unit): Unit
 
   /**
    * Find the number of non-zero active values.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index b754fad0c1796..83a519326df75 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -204,6 +204,14 @@ sealed trait Vector extends Serializable {
    */
   @Since("2.0.0")
   def asML: newlinalg.Vector
+
+  /**
+   * Calculate the dot product of this vector with another.
+   *
+   * If `size` does not match an [[IllegalArgumentException]] is thrown.
+   */
+  @Since("3.0.0")
+  def dot(v: Vector): Double = BLAS.dot(this, v)
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
index 0d223de9b6f7e..f3b984948e483 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala
@@ -153,7 +153,7 @@ class CoordinateMatrix @Since("1.0.0") (
   }
 
   /** Determines the size by computing the max row/column index. */
-  private def computeSize() {
+  private def computeSize(): Unit = {
     // Reduce will throw an exception if `entries` is empty.
     val (m1, n1) = entries.map(entry => (entry.i, entry.j)).reduce { case ((i1, j1), (i2, j2)) =>
       (math.max(i1, i2), math.max(j1, j2))
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 43f48befd014f..f25d86b30631a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -770,7 +770,7 @@ class RowMatrix @Since("1.0.0") (
   }
 
   /** Updates or verifies the number of rows. */
-  private def updateNumRows(m: Long) {
+  private def updateNumRows(m: Long): Unit = {
     if (nRows <= 0) {
       nRows = m
     } else {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
index fa04f8eb5e796..d3b548832bb21 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala
@@ -107,7 +107,7 @@ class PoissonGenerator @Since("1.1.0") (
   override def nextValue(): Double = rng.sample()
 
   @Since("1.1.0")
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     rng.reseedRandomGenerator(seed)
   }
 
@@ -132,7 +132,7 @@ class ExponentialGenerator @Since("1.3.0") (
   override def nextValue(): Double = rng.sample()
 
   @Since("1.3.0")
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     rng.reseedRandomGenerator(seed)
   }
 
@@ -159,7 +159,7 @@ class GammaGenerator @Since("1.3.0") (
   override def nextValue(): Double = rng.sample()
 
   @Since("1.3.0")
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     rng.reseedRandomGenerator(seed)
   }
 
@@ -187,7 +187,7 @@ class LogNormalGenerator @Since("1.3.0") (
   override def nextValue(): Double = rng.sample()
 
   @Since("1.3.0")
-  override def setSeed(seed: Long) {
+  override def setSeed(seed: Long): Unit = {
     rng.reseedRandomGenerator(seed)
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
index 12870f819b147..f3f15ba0d0f2c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/ALS.scala
@@ -62,6 +62,13 @@ case class Rating @Since("0.8.0") (
  * r &gt; 0 and 0 if r &lt;= 0. The ratings then act as 'confidence' values related to strength of
  * indicated user
  * preferences rather than explicit ratings given to items.
+ *
+ * Note: the input rating RDD to the ALS implementation should be deterministic.
+ * Nondeterministic data can cause failure during fitting ALS model.
+ * For example, an order-sensitive operation like sampling after a repartition makes RDD
+ * output nondeterministic, like `rdd.repartition(2).sample(false, 0.5, 1618)`.
+ * Checkpointing sampled RDD or adding a sort before sampling can help make the RDD
+ * deterministic.
  */
 @Since("0.8.0")
 class ALS private (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index ead9f5b300375..47bb1fa9127a6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -24,7 +24,6 @@ import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.pmml.PMMLExportable
 import org.apache.spark.mllib.regression.impl.GLMRegressionModel
 import org.apache.spark.mllib.util.{Loader, Saveable}
-import org.apache.spark.rdd.RDD
 
 /**
  * Regression model trained using Lasso.
@@ -99,117 +98,7 @@ class LassoWithSGD private[mllib] (
     .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
 
-  /**
-   * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
-   * regParam: 0.01, miniBatchFraction: 1.0}.
-   */
-  @Since("0.8.0")
-  @deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
-    "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
-  def this() = this(1.0, 100, 0.01, 1.0)
-
   override protected def createModel(weights: Vector, intercept: Double) = {
     new LassoModel(weights, intercept)
   }
 }
-
-/**
- * Top-level methods for calling Lasso.
- *
- */
-@Since("0.8.0")
-@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 1.0. Note the default " +
-  "regParam is 0.01 for LassoWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
-object LassoWithSGD {
-
-  /**
-   * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient. The weights used
-   * in gradient descent are initialized using the initial weights provided.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size scaling to be used for the iterations of gradient descent.
-   * @param regParam Regularization parameter.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
-   *        the number of features in the data.
-   *
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double,
-      miniBatchFraction: Double,
-      initialWeights: Vector): LassoModel = {
-    new LassoWithSGD(stepSize, numIterations, regParam, miniBatchFraction)
-      .run(input, initialWeights)
-  }
-
-  /**
-   * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param regParam Regularization parameter.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double,
-      miniBatchFraction: Double): LassoModel = {
-    new LassoWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input)
-  }
-
-  /**
-   * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. We use the entire data set to
-   * update the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param stepSize Step size to be used for each iteration of Gradient Descent.
-   * @param regParam Regularization parameter.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LassoModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double): LassoModel = {
-    train(input, numIterations, stepSize, regParam, 1.0)
-  }
-
-  /**
-   * Train a Lasso model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
-   * compute the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LassoModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int): LassoModel = {
-    train(input, numIterations, 1.0, 0.01, 1.0)
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index cb08216fbf690..f68ebc17e294d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -24,7 +24,6 @@ import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.pmml.PMMLExportable
 import org.apache.spark.mllib.regression.impl.GLMRegressionModel
 import org.apache.spark.mllib.util.{Loader, Saveable}
-import org.apache.spark.rdd.RDD
 
 /**
  * Regression model trained using LinearRegression.
@@ -100,109 +99,8 @@ class LinearRegressionWithSGD private[mllib] (
     .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
 
-  /**
-   * Construct a LinearRegression object with default parameters: {stepSize: 1.0,
-   * numIterations: 100, miniBatchFraction: 1.0}.
-   */
-  @Since("0.8.0")
-  @deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
-  def this() = this(1.0, 100, 0.0, 1.0)
-
   override protected[mllib] def createModel(weights: Vector, intercept: Double) = {
     new LinearRegressionModel(weights, intercept)
   }
 }
 
-/**
- * Top-level methods for calling LinearRegression.
- *
- */
-@Since("0.8.0")
-@deprecated("Use ml.regression.LinearRegression or LBFGS", "2.0.0")
-object LinearRegressionWithSGD {
-
-  /**
-   * Train a Linear Regression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient. The weights used
-   * in gradient descent are initialized using the initial weights provided.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
-   *        the number of features in the data.
-   *
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double,
-      initialWeights: Vector): LinearRegressionModel = {
-    new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction)
-      .run(input, initialWeights)
-  }
-
-  /**
-   * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double): LinearRegressionModel = {
-    new LinearRegressionWithSGD(stepSize, numIterations, 0.0, miniBatchFraction).run(input)
-  }
-
-  /**
-   * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. We use the entire data set to
-   * compute the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param stepSize Step size to be used for each iteration of Gradient Descent.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LinearRegressionModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double): LinearRegressionModel = {
-    train(input, numIterations, stepSize, 1.0)
-  }
-
-  /**
-   * Train a LinearRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
-   * compute the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs. Each pair describes a row of the data
-   *              matrix A as well as the corresponding right hand side label y
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a LinearRegressionModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int): LinearRegressionModel = {
-    train(input, numIterations, 1.0, 1.0)
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 43c3154dd053b..1c3bdceab1d14 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -24,8 +24,6 @@ import org.apache.spark.mllib.optimization._
 import org.apache.spark.mllib.pmml.PMMLExportable
 import org.apache.spark.mllib.regression.impl.GLMRegressionModel
 import org.apache.spark.mllib.util.{Loader, Saveable}
-import org.apache.spark.rdd.RDD
-
 
 /**
  * Regression model trained using RidgeRegression.
@@ -100,113 +98,7 @@ class RidgeRegressionWithSGD private[mllib] (
     .setRegParam(regParam)
     .setMiniBatchFraction(miniBatchFraction)
 
-  /**
-   * Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100,
-   * regParam: 0.01, miniBatchFraction: 1.0}.
-   */
-  @Since("0.8.0")
-  @deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
-    "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
-  def this() = this(1.0, 100, 0.01, 1.0)
-
   override protected def createModel(weights: Vector, intercept: Double) = {
     new RidgeRegressionModel(weights, intercept)
   }
 }
-
-/**
- * Top-level methods for calling RidgeRegression.
- *
- */
-@Since("0.8.0")
-@deprecated("Use ml.regression.LinearRegression with elasticNetParam = 0.0. Note the default " +
-  "regParam is 0.01 for RidgeRegressionWithSGD, but is 0.0 for LinearRegression.", "2.0.0")
-object RidgeRegressionWithSGD {
-
-  /**
-   * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient. The weights used
-   * in gradient descent are initialized using the initial weights provided.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param regParam Regularization parameter.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   * @param initialWeights Initial set of weights to be used. Array should be equal in size to
-   *        the number of features in the data.
-   *
-   */
-  @Since("1.0.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double,
-      miniBatchFraction: Double,
-      initialWeights: Vector): RidgeRegressionModel = {
-    new RidgeRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(
-      input, initialWeights)
-  }
-
-  /**
-   * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. Each iteration uses
-   * `miniBatchFraction` fraction of the data to calculate a stochastic gradient.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @param stepSize Step size to be used for each iteration of gradient descent.
-   * @param regParam Regularization parameter.
-   * @param miniBatchFraction Fraction of data to be used per iteration.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double,
-      miniBatchFraction: Double): RidgeRegressionModel = {
-    new RidgeRegressionWithSGD(stepSize, numIterations, regParam, miniBatchFraction).run(input)
-  }
-
-  /**
-   * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using the specified step size. We use the entire data set to
-   * compute the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param stepSize Step size to be used for each iteration of Gradient Descent.
-   * @param regParam Regularization parameter.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a RidgeRegressionModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int,
-      stepSize: Double,
-      regParam: Double): RidgeRegressionModel = {
-    train(input, numIterations, stepSize, regParam, 1.0)
-  }
-
-  /**
-   * Train a RidgeRegression model given an RDD of (label, features) pairs. We run a fixed number
-   * of iterations of gradient descent using a step size of 1.0. We use the entire data set to
-   * compute the true gradient in each iteration.
-   *
-   * @param input RDD of (label, array of features) pairs.
-   * @param numIterations Number of iterations of gradient descent to run.
-   * @return a RidgeRegressionModel which has the weights and offset from training.
-   *
-   */
-  @Since("0.8.0")
-  def train(
-      input: RDD[LabeledPoint],
-      numIterations: Int): RidgeRegressionModel = {
-    train(input, numIterations, 1.0, 0.01, 1.0)
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
index 7f84be9f37822..b6eb10e9de00a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/KMeansDataGenerator.scala
@@ -65,7 +65,7 @@ object KMeansDataGenerator {
   }
 
   @Since("0.8.0")
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 6) {
       // scalastyle:off println
       println("Usage: KMeansGenerator " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index 58fd010e4905f..c218681b3375e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -189,7 +189,7 @@ object LinearDataGenerator {
   }
 
   @Since("0.8.0")
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       // scalastyle:off println
       println("Usage: LinearDataGenerator " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
index 68835bc79677f..7e9d9465441c9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala
@@ -65,7 +65,7 @@ object LogisticRegressionDataGenerator {
   }
 
   @Since("0.8.0")
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 5) {
       // scalastyle:off println
       println("Usage: LogisticRegressionGenerator " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
index 42c5bcdd39f76..7a308a5ec25c0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala
@@ -54,7 +54,7 @@ import org.apache.spark.rdd.RDD
 @Since("0.8.0")
 object MFDataGenerator {
   @Since("0.8.0")
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       // scalastyle:off println
       println("Usage: MFDataGenerator " +
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 6d15a6bb01e4e..9198334ba02a1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -173,7 +173,7 @@ object MLUtils extends Logging {
    * @see `org.apache.spark.mllib.util.MLUtils.loadLibSVMFile`
    */
   @Since("1.0.0")
-  def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: String) {
+  def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: String): Unit = {
     // TODO: allow to specify label precision and feature precision.
     val dataStr = data.map { case LabeledPoint(label, features) =>
       val sb = new StringBuilder(label.toString)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
index c9468606544db..9f6ba025aedde 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/SVMDataGenerator.scala
@@ -37,7 +37,7 @@ import org.apache.spark.rdd.RDD
 object SVMDataGenerator {
 
   @Since("0.8.0")
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length < 2) {
       // scalastyle:off println
       println("Usage: SVMGenerator " +
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
index b7956b6fd3e9a..69952f0b64ac2 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaDCTSuite.java
@@ -20,7 +20,7 @@
 import java.util.Arrays;
 import java.util.List;
 
-import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D;
+import org.jtransforms.dct.DoubleDCT_1D;
 
 import org.junit.Assert;
 import org.junit.Test;
diff --git a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
index c04e2e69541ba..208a5aaa2bb15 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/classification/JavaLogisticRegressionSuite.java
@@ -50,11 +50,8 @@ public void runLRUsingConstructor() {
     List<LabeledPoint> validationData =
       LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 17);
 
-    LogisticRegressionWithSGD lrImpl = new LogisticRegressionWithSGD();
+    LogisticRegressionWithSGD lrImpl = new LogisticRegressionWithSGD(1.0, 100, 1.0, 1.0);
     lrImpl.setIntercept(true);
-    lrImpl.optimizer().setStepSize(1.0)
-      .setRegParam(1.0)
-      .setNumIterations(100);
     LogisticRegressionModel model = lrImpl.run(testRDD.rdd());
 
     int numAccurate = validatePrediction(validationData, model);
@@ -72,8 +69,8 @@ public void runLRUsingStaticMethods() {
     List<LabeledPoint> validationData =
       LogisticRegressionSuite.generateLogisticInputAsList(A, B, nPoints, 17);
 
-    LogisticRegressionModel model = LogisticRegressionWithSGD.train(
-      testRDD.rdd(), 100, 1.0, 1.0);
+    LogisticRegressionModel model = new LogisticRegressionWithSGD(1.0, 100, 0.01, 1.0)
+        .run(testRDD.rdd());
 
     int numAccurate = validatePrediction(validationData, model);
     Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
index 270e636f82117..a9a8b7f2b88d6 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaKMeansSuite.java
@@ -42,11 +42,11 @@ public void runKMeansUsingStaticMethods() {
     Vector expectedCenter = Vectors.dense(1.0, 3.0, 4.0);
 
     JavaRDD<Vector> data = jsc.parallelize(points, 2);
-    KMeansModel model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.K_MEANS_PARALLEL());
+    KMeansModel model = KMeans.train(data.rdd(), 1, 1, KMeans.K_MEANS_PARALLEL());
     assertEquals(1, model.clusterCenters().length);
     assertEquals(expectedCenter, model.clusterCenters()[0]);
 
-    model = KMeans.train(data.rdd(), 1, 1, 1, KMeans.RANDOM());
+    model = KMeans.train(data.rdd(), 1, 1, KMeans.RANDOM());
     assertEquals(expectedCenter, model.clusterCenters()[0]);
   }
 
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
index 1458cc72bc17f..35ad24bc2a84f 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLassoSuite.java
@@ -51,10 +51,7 @@ public void runLassoUsingConstructor() {
     List<LabeledPoint> validationData =
       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
 
-    LassoWithSGD lassoSGDImpl = new LassoWithSGD();
-    lassoSGDImpl.optimizer().setStepSize(1.0)
-      .setRegParam(0.01)
-      .setNumIterations(20);
+    LassoWithSGD lassoSGDImpl = new LassoWithSGD(1.0, 20, 0.01, 1.0);
     LassoModel model = lassoSGDImpl.run(testRDD.rdd());
 
     int numAccurate = validatePrediction(validationData, model);
@@ -72,7 +69,7 @@ public void runLassoUsingStaticMethods() {
     List<LabeledPoint> validationData =
       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
 
-    LassoModel model = LassoWithSGD.train(testRDD.rdd(), 100, 1.0, 0.01, 1.0);
+    LassoModel model = new LassoWithSGD(1.0, 100, 0.01, 1.0).run(testRDD.rdd());
 
     int numAccurate = validatePrediction(validationData, model);
     Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
index 86c723aa00746..7e87588c4f0f6 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaLinearRegressionSuite.java
@@ -33,7 +33,7 @@ private static int validatePrediction(
       List<LabeledPoint> validationData, LinearRegressionModel model) {
     int numAccurate = 0;
     for (LabeledPoint point : validationData) {
-      Double prediction = model.predict(point.features());
+      double prediction = model.predict(point.features());
       // A prediction is off if the prediction is more than 0.5 away from expected value.
       if (Math.abs(prediction - point.label()) <= 0.5) {
         numAccurate++;
@@ -53,7 +53,7 @@ public void runLinearRegressionUsingConstructor() {
     List<LabeledPoint> validationData =
       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
 
-    LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD();
+    LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0);
     linSGDImpl.setIntercept(true);
     LinearRegressionModel model = linSGDImpl.run(testRDD.rdd());
 
@@ -72,7 +72,8 @@ public void runLinearRegressionUsingStaticMethods() {
     List<LabeledPoint> validationData =
       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 17, 0.1);
 
-    LinearRegressionModel model = LinearRegressionWithSGD.train(testRDD.rdd(), 100);
+    LinearRegressionModel model = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0)
+        .run(testRDD.rdd());
 
     int numAccurate = validatePrediction(validationData, model);
     Assert.assertTrue(numAccurate > nPoints * 4.0 / 5.0);
@@ -85,7 +86,7 @@ public void testPredictJavaRDD() {
     double[] weights = {10, 10};
     JavaRDD<LabeledPoint> testRDD = jsc.parallelize(
       LinearDataGenerator.generateLinearInputAsList(A, weights, nPoints, 42, 0.1), 2).cache();
-    LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD();
+    LinearRegressionWithSGD linSGDImpl = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0);
     LinearRegressionModel model = linSGDImpl.run(testRDD.rdd());
     JavaRDD<Vector> vectors = testRDD.map(LabeledPoint::features);
     JavaRDD<Double> predictions = model.predict(vectors);
diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
index 5a9389c424b44..63441950cd18f 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java
@@ -34,7 +34,7 @@ private static double predictionError(List<LabeledPoint> validationData,
                                         RidgeRegressionModel model) {
     double errorSum = 0;
     for (LabeledPoint point : validationData) {
-      Double prediction = model.predict(point.features());
+      double prediction = model.predict(point.features());
       errorSum += (prediction - point.label()) * (prediction - point.label());
     }
     return errorSum / validationData.size();
@@ -60,11 +60,7 @@ public void runRidgeRegressionUsingConstructor() {
             new ArrayList<>(data.subList(0, numExamples)));
     List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples);
 
-    RidgeRegressionWithSGD ridgeSGDImpl = new RidgeRegressionWithSGD();
-    ridgeSGDImpl.optimizer()
-      .setStepSize(1.0)
-      .setRegParam(0.0)
-      .setNumIterations(200);
+    RidgeRegressionWithSGD ridgeSGDImpl = new RidgeRegressionWithSGD(1.0, 200, 0.0, 1.0);
     RidgeRegressionModel model = ridgeSGDImpl.run(testRDD.rdd());
     double unRegularizedErr = predictionError(validationData, model);
 
@@ -85,10 +81,12 @@ public void runRidgeRegressionUsingStaticMethods() {
             new ArrayList<>(data.subList(0, numExamples)));
     List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples);
 
-    RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0);
+    RidgeRegressionModel model = new RidgeRegressionWithSGD(1.0, 200, 0.0, 1.0)
+        .run(testRDD.rdd());
     double unRegularizedErr = predictionError(validationData, model);
 
-    model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.1);
+    model = new RidgeRegressionWithSGD(1.0, 200, 0.1, 1.0)
+        .run(testRDD.rdd());
     double regularizedErr = predictionError(validationData, model);
 
     Assert.assertTrue(regularizedErr < unRegularizedErr);
diff --git a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
index e2ee7c05ab399..f2343b7a88560 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
@@ -25,7 +25,7 @@ import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.when
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar.mock
+import org.scalatestplus.mockito.MockitoSugar.mock
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.param.ParamMap
diff --git a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
index 1183cb0617610..e6025a5a53ca6 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/PipelineSuite.scala
@@ -22,7 +22,7 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.Path
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar.mock
+import org.scalatestplus.mockito.MockitoSugar.mock
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.Pipeline.SharedReadWrite
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index 9f2053dcc91fc..3ebf8a83a892c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -44,7 +44,7 @@ class DecisionTreeClassifierSuite extends MLTest with DefaultReadWriteTest {
 
   private val seed = 42
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     categoricalDataPointsRDD =
       sc.parallelize(OldDecisionTreeSuite.generateCategoricalDataPoints()).map(_.asML)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index 467f13f808a01..af3dd201d3b51 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -55,7 +55,7 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
   private val eps: Double = 1e-5
   private val absEps: Double = 1e-8
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     data = sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100), 2)
       .map(_.asML)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
index 0f0954e5d8cac..f03ed0b76eb80 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -42,7 +42,7 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
   private var orderedLabeledPoints50_1000: RDD[LabeledPoint] = _
   private var orderedLabeledPoints5_20: RDD[LabeledPoint] = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     orderedLabeledPoints50_1000 =
       sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000))
@@ -56,7 +56,7 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
   // Tests calling train()
   /////////////////////////////////////////////////////////////////////////////
 
-  def binaryClassificationTestWithContinuousFeatures(rf: RandomForestClassifier) {
+  def binaryClassificationTestWithContinuousFeatures(rf: RandomForestClassifier): Unit = {
     val categoricalFeatures = Map.empty[Int, Int]
     val numClasses = 2
     val newRF = rf
diff --git a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
index c1a156959618e..f4f858c3e92dc 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/evaluation/RegressionEvaluatorSuite.scala
@@ -76,6 +76,10 @@ class RegressionEvaluatorSuite
     // mae
     evaluator.setMetricName("mae")
     assert(evaluator.evaluate(predictions) ~== 0.08399089 absTol 0.01)
+
+    // var
+    evaluator.setMetricName("var")
+    assert(evaluator.evaluate(predictions) ~== 63.6944519 absTol 0.01)
   }
 
   test("read/write") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
index 05d4a6ee2dabf..91bec50fb904f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
@@ -101,6 +101,20 @@ class BinarizerSuite extends MLTest with DefaultReadWriteTest {
     }
   }
 
+  test("Binarizer should support sparse vector with negative threshold") {
+    val data = Seq(
+      (Vectors.sparse(3, Array(1), Array(0.5)), Vectors.dense(Array(1.0, 1.0, 1.0))),
+      (Vectors.dense(Array(0.0, 0.5, 0.0)), Vectors.dense(Array(1.0, 1.0, 1.0))))
+    val df = data.toDF("feature", "expected")
+    val binarizer = new Binarizer()
+      .setInputCol("feature")
+      .setOutputCol("binarized_feature")
+      .setThreshold(-0.5)
+    binarizer.transform(df).select("binarized_feature", "expected").collect().foreach {
+      case Row(x: Vector, y: Vector) =>
+        assert(x == y, "The feature value is not correct after binarization.")
+    }
+  }
 
   test("read/write") {
     val t = new Binarizer()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
index 985e396000d05..079dabb3665be 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/DCTSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.feature
 
-import edu.emory.mathcs.jtransforms.dct.DoubleDCT_1D
+import org.jtransforms.dct.DoubleDCT_1D
 
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
index ae086d32d6d0b..6f6ab26cbac43 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.feature
 
 import org.apache.spark.ml.Pipeline
+import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.sql._
 
@@ -423,33 +424,92 @@ class QuantileDiscretizerSuite extends MLTest with DefaultReadWriteTest {
     assert(readDiscretizer.hasDefault(readDiscretizer.outputCol))
   }
 
-  test("Multiple Columns: Both inputCol and inputCols are set") {
+  test("Multiple Columns: Mismatched sizes of inputCols/outputCols") {
     val spark = this.spark
     import spark.implicits._
     val discretizer = new QuantileDiscretizer()
-      .setInputCol("input")
-      .setOutputCol("result")
+      .setInputCols(Array("input"))
+      .setOutputCols(Array("result1", "result2"))
       .setNumBuckets(3)
-      .setInputCols(Array("input1", "input2"))
     val df = sc.parallelize(Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
       .map(Tuple1.apply).toDF("input")
-    // When both inputCol and inputCols are set, we throw Exception.
     intercept[IllegalArgumentException] {
       discretizer.fit(df)
     }
   }
 
-  test("Multiple Columns: Mismatched sizes of inputCols / outputCols") {
+  test("Multiple Columns: Mismatched sizes of inputCols/numBucketsArray") {
     val spark = this.spark
     import spark.implicits._
     val discretizer = new QuantileDiscretizer()
-      .setInputCols(Array("input"))
+      .setInputCols(Array("input1", "input2"))
       .setOutputCols(Array("result1", "result2"))
-      .setNumBuckets(3)
+      .setNumBucketsArray(Array(2, 5, 10))
+    val data1 = Array(1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0)
+    val data2 = Array(1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0)
+    val df = data1.zip(data2).toSeq.toDF("input1", "input2")
+    intercept[IllegalArgumentException] {
+      discretizer.fit(df)
+    }
+  }
+
+  test("Multiple Columns: Set both of numBuckets/numBucketsArray") {
+    val spark = this.spark
+    import spark.implicits._
+    val discretizer = new QuantileDiscretizer()
+      .setInputCols(Array("input1", "input2"))
+      .setOutputCols(Array("result1", "result2"))
+      .setNumBucketsArray(Array(2, 5))
+      .setNumBuckets(2)
+    val data1 = Array(1.0, 3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 2.0, 2.0, 2.0)
+    val data2 = Array(1.0, 2.0, 3.0, 1.0, 1.0, 1.0, 1.0, 3.0, 2.0, 3.0)
+    val df = data1.zip(data2).toSeq.toDF("input1", "input2")
+    intercept[IllegalArgumentException] {
+      discretizer.fit(df)
+    }
+  }
+
+  test("Setting numBucketsArray for Single-Column QuantileDiscretizer") {
+    val spark = this.spark
+    import spark.implicits._
+    val discretizer = new QuantileDiscretizer()
+      .setInputCol("input")
+      .setOutputCol("result")
+      .setNumBucketsArray(Array(2, 5))
     val df = sc.parallelize(Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
       .map(Tuple1.apply).toDF("input")
     intercept[IllegalArgumentException] {
       discretizer.fit(df)
     }
   }
+
+  test("Assert exception is thrown if both multi-column and single-column params are set") {
+    val spark = this.spark
+    import spark.implicits._
+    val df = Seq((0.5, 0.3), (0.5, -0.4)).toDF("feature1", "feature2")
+    ParamsSuite.testExclusiveParams(new QuantileDiscretizer, df, ("inputCol", "feature1"),
+      ("inputCols", Array("feature1", "feature2")))
+    ParamsSuite.testExclusiveParams(new QuantileDiscretizer, df, ("inputCol", "feature1"),
+      ("outputCol", "result1"), ("outputCols", Array("result1", "result2")))
+    // this should fail because at least one of inputCol and inputCols must be set
+    ParamsSuite.testExclusiveParams(new QuantileDiscretizer, df, ("outputCol", "feature1"))
+  }
+
+  test("Setting inputCol without setting outputCol") {
+    val spark = this.spark
+    import spark.implicits._
+
+    val df = sc.parallelize(Array(1.0, 2.0, 3.0, 4.0, 5.0, 6.0))
+      .map(Tuple1.apply).toDF("input")
+    val numBuckets = 2
+    val discretizer = new QuantileDiscretizer()
+      .setInputCol("input")
+      .setNumBuckets(numBuckets)
+    val model = discretizer.fit(df)
+    val result = model.transform(df)
+
+    val observedNumBuckets = result.select(discretizer.getOutputCol).distinct.count
+    assert(observedNumBuckets === numBuckets,
+      "Observed number of buckets does not equal expected number of buckets.")
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala
index add1cc17ea057..efd56f7073a19 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/RFormulaParserSuite.scala
@@ -25,7 +25,7 @@ class RFormulaParserSuite extends SparkFunSuite {
       formula: String,
       label: String,
       terms: Seq[String],
-      schema: StructType = new StructType) {
+      schema: StructType = new StructType): Unit = {
     val resolved = RFormulaParser.parse(formula).resolve(schema)
     assert(resolved.label == label)
     val simpleTerms = terms.map { t =>
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
index 630e785e59507..49ebcb385640e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
@@ -40,7 +40,7 @@ class DecisionTreeRegressorSuite extends MLTest with DefaultReadWriteTest {
 
   private val seed = 42
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     categoricalDataPointsRDD =
       sc.parallelize(OldDecisionTreeSuite.generateCategoricalDataPoints().map(_.asML))
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index 884fe2d11bf5a..60007975c3b52 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -47,7 +47,7 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
   private var trainData: RDD[LabeledPoint] = _
   private var validationData: RDD[LabeledPoint] = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     data = sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 10, 100), 2)
       .map(_.asML)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
index c6dabd1b28829..0243e8d2335ee 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
@@ -38,7 +38,7 @@ class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{
 
   private var orderedLabeledPoints50_1000: RDD[LabeledPoint] = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     orderedLabeledPoints50_1000 =
       sc.parallelize(EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
@@ -49,7 +49,7 @@ class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{
   // Tests calling train()
   /////////////////////////////////////////////////////////////////////////////
 
-  def regressionTestWithContinuousFeatures(rf: RandomForestRegressor) {
+  def regressionTestWithContinuousFeatures(rf: RandomForestRegressor): Unit = {
     val categoricalFeaturesInfo = Map.empty[Int, Int]
     val newRF = rf
       .setImpurity("variance")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
index a63ab913f2c22..ae5e979983b4f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala
@@ -485,7 +485,8 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     }
   }
 
-  def binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(strategy: OldStrategy) {
+  def binaryClassificationTestWithContinuousFeaturesAndSubsampledFeatures(
+      strategy: OldStrategy): Unit = {
     val numFeatures = 50
     val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures, 1000)
     val rdd = sc.parallelize(arr).map(_.asML.toInstance)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
index 8a0a48ff6095b..90079c9848823 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
@@ -56,7 +56,7 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
     sc.setCheckpointDir(checkpointDir)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(new File(checkpointDir))
     } finally {
@@ -127,7 +127,7 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
     dataframe: DataFrame,
     transformer: Transformer,
     expectedMessagePart : String,
-    firstResultCol: String) {
+    firstResultCol: String): Unit = {
 
     withClue(s"""Expected message part "${expectedMessagePart}" is not found in DF test.""") {
       val exceptionOnDf = intercept[Throwable] {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index 5cf4377768516..d4e9da3c6263e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -206,7 +206,7 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
   def validatePrediction(
       predictions: Seq[Double],
       input: Seq[LabeledPoint],
-      expectedAcc: Double = 0.83) {
+      expectedAcc: Double = 0.83): Unit = {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       prediction != expected.label
     }
@@ -224,12 +224,8 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
 
     val testRDD = sc.parallelize(testData, 2)
     testRDD.cache()
-    val lr = new LogisticRegressionWithSGD().setIntercept(true)
-    lr.optimizer
-      .setStepSize(10.0)
-      .setRegParam(0.0)
-      .setNumIterations(20)
-      .setConvergenceTol(0.0005)
+    val lr = new LogisticRegressionWithSGD(10.0, 20, 0.0, 1.0).setIntercept(true)
+    lr.optimizer.setConvergenceTol(0.0005)
 
     val model = lr.run(testRDD)
 
@@ -300,11 +296,7 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
     testRDD.cache()
 
     // Use half as many iterations as the previous test.
-    val lr = new LogisticRegressionWithSGD().setIntercept(true)
-    lr.optimizer
-      .setStepSize(10.0)
-      .setRegParam(0.0)
-      .setNumIterations(10)
+    val lr = new LogisticRegressionWithSGD(10.0, 10, 0.0, 1.0).setIntercept(true)
 
     val model = lr.run(testRDD, initialWeights)
 
@@ -335,11 +327,7 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
     testRDD.cache()
 
     // Use half as many iterations as the previous test.
-    val lr = new LogisticRegressionWithSGD().setIntercept(true)
-    lr.optimizer.
-      setStepSize(1.0).
-      setNumIterations(10).
-      setRegParam(1.0)
+    val lr = new LogisticRegressionWithSGD(1.0, 10, 1.0, 1.0).setIntercept(true)
 
     val model = lr.run(testRDD, initialWeights)
 
@@ -916,7 +904,7 @@ class LogisticRegressionClusterSuite extends SparkFunSuite with LocalClusterSpar
     }.cache()
     // If we serialize data directly in the task closure, the size of the serialized task would be
     // greater than 1MB and hence Spark would throw an error.
-    val model = LogisticRegressionWithSGD.train(points, 2)
+    val model = new LogisticRegressionWithSGD(1.0, 2, 0.0, 1.0).run(points)
 
     val predictions = model.predict(points.map(_.features))
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index 725389813b3e2..47dac3ec29a5c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -91,7 +91,7 @@ class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   import NaiveBayes.{Multinomial, Bernoulli}
 
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]): Unit = {
     val numOfPredictions = predictions.zip(input).count {
       case (prediction, expected) =>
         prediction != expected.label
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
index 3676d9c5debc8..007b8ae6e1a6a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/SVMSuite.scala
@@ -62,7 +62,7 @@ object SVMSuite {
 
 class SVMSuite extends SparkFunSuite with MLlibTestSparkContext {
 
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]): Unit = {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       prediction != expected.label
     }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
index 5f797a60f09e6..7349e0319324a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/StreamingLogisticRegressionSuite.scala
@@ -23,23 +23,17 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.{LocalStreamingContext, TestSuiteBase}
 import org.apache.spark.streaming.dstream.DStream
 
-class StreamingLogisticRegressionSuite extends SparkFunSuite with TestSuiteBase {
+class StreamingLogisticRegressionSuite
+  extends SparkFunSuite
+  with LocalStreamingContext
+  with TestSuiteBase {
 
   // use longer wait time to ensure job completion
   override def maxWaitTimeMillis: Int = 30000
 
-  var ssc: StreamingContext = _
-
-  override def afterFunction() {
-    super.afterFunction()
-    if (ssc != null) {
-      ssc.stop()
-    }
-  }
-
   // Test if we can accurately learn B for Y = logistic(BX) on streaming data
   test("parameter accuracy") {
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
index c4bf5b27187f6..149a525a58ff6 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/KMeansSuite.scala
@@ -367,7 +367,7 @@ class KMeansClusterSuite extends SparkFunSuite with LocalClusterSparkContext {
     for (initMode <- Seq(KMeans.RANDOM, KMeans.K_MEANS_PARALLEL)) {
       // If we serialize data directly in the task closure, the size of the serialized task would be
       // greater than 1MB and hence Spark would throw an error.
-      val model = KMeans.train(points, 2, 2, 1, initMode)
+      val model = KMeans.train(points, 2, 2, initMode)
       val predictions = model.predict(points).collect()
       val cost = model.computeCost(points)
     }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
index a1ac10c06c697..415ac87275390 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
@@ -20,23 +20,14 @@ package org.apache.spark.mllib.clustering
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.util.TestingUtils._
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.{LocalStreamingContext, TestSuiteBase}
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.util.random.XORShiftRandom
 
-class StreamingKMeansSuite extends SparkFunSuite with TestSuiteBase {
+class StreamingKMeansSuite extends SparkFunSuite with LocalStreamingContext with TestSuiteBase {
 
   override def maxWaitTimeMillis: Int = 30000
 
-  var ssc: StreamingContext = _
-
-  override def afterFunction() {
-    super.afterFunction()
-    if (ssc != null) {
-      ssc.stop()
-    }
-  }
-
   test("accuracy for single center and equivalence to grand average") {
     // set parameters
     val numBatches = 10
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
index b4520d42fedf5..184c89c9eaaf9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala
@@ -24,7 +24,7 @@ import scala.reflect.ClassTag
 
 import breeze.linalg.{CSCMatrix, Matrix => BM}
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar._
+import org.scalatestplus.mockito.MockitoSugar._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config.Kryo._
@@ -39,7 +39,7 @@ class MatricesSuite extends SparkFunSuite {
 
     val ser = new KryoSerializer(conf).newInstance()
 
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index fee0b02bf8ed8..c0c5c5c7d98d5 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -42,7 +42,7 @@ class VectorsSuite extends SparkFunSuite with Logging {
     conf.set(KRYO_REGISTRATION_REQUIRED, true)
 
     val ser = new KryoSerializer(conf).newInstance()
-    def check[T: ClassTag](t: T) {
+    def check[T: ClassTag](t: T): Unit = {
       assert(ser.deserialize[T](ser.serialize(t)) === t)
     }
 
@@ -510,4 +510,27 @@ class VectorsSuite extends SparkFunSuite with Logging {
       Vectors.sparse(-1, Array((1, 2.0)))
     }
   }
+
+  test("dot product only supports vectors of same size") {
+    val vSize4 = Vectors.dense(arr)
+    val vSize1 = Vectors.zeros(1)
+    intercept[IllegalArgumentException]{ vSize1.dot(vSize4) }
+  }
+
+  test("dense vector dot product") {
+    val dv = Vectors.dense(arr)
+    assert(dv.dot(dv) === 0.26)
+  }
+
+  test("sparse vector dot product") {
+    val sv = Vectors.sparse(n, indices, values)
+    assert(sv.dot(sv) === 0.26)
+  }
+
+  test("mixed sparse and dense vector dot product") {
+    val sv = Vectors.sparse(n, indices, values)
+    val dv = Vectors.dense(arr)
+    assert(sv.dot(dv) === 0.26)
+    assert(dv.dot(sv) === 0.26)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
index f6a996940291c..9d7177e0a149e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
@@ -35,7 +35,7 @@ class BlockMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
   val numPartitions = 3
   var gridBasedMat: BlockMatrix = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val blocks: Seq[((Int, Int), Matrix)] = Seq(
       ((0, 0), new DenseMatrix(2, 2, Array(1.0, 0.0, 0.0, 2.0))),
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
index 37d75103d18d2..d197f06a393e8 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrixSuite.scala
@@ -29,7 +29,7 @@ class CoordinateMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
   val n = 4
   var mat: CoordinateMatrix = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val entries = sc.parallelize(Seq(
       (0, 0, 1.0),
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
index cca4eb4e4260e..e961d10711860 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrixSuite.scala
@@ -36,7 +36,7 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
   ).map(x => IndexedRow(x._1, x._2))
   var indexedRows: RDD[IndexedRow] = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     indexedRows = sc.parallelize(data, 2)
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
index a0c4c68243e67..0a4b11935580a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/RowMatrixSuite.scala
@@ -57,7 +57,7 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
   var denseMat: RowMatrix = _
   var sparseMat: RowMatrix = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     denseMat = new RowMatrix(sc.parallelize(denseData, 2))
     sparseMat = new RowMatrix(sc.parallelize(sparseData, 2))
@@ -213,7 +213,7 @@ class RowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
     brzNorm(v, 1.0) < 1e-6
   }
 
-  def assertColumnEqualUpToSign(A: BDM[Double], B: BDM[Double], k: Int) {
+  def assertColumnEqualUpToSign(A: BDM[Double], B: BDM[Double], k: Int): Unit = {
     assert(A.rows === B.rows)
     for (j <- 0 until k) {
       val aj = A(::, j)
@@ -338,7 +338,7 @@ class RowMatrixClusterSuite extends SparkFunSuite with LocalClusterSparkContext
 
   var mat: RowMatrix = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val m = 4
     val n = 200000
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
index b3bf5a2a8f2cc..a629c6951abcd 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomDataGeneratorSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.util.StatCounter
 
 class RandomDataGeneratorSuite extends SparkFunSuite {
 
-  def apiChecks(gen: RandomDataGenerator[Double]) {
+  def apiChecks(gen: RandomDataGenerator[Double]): Unit = {
     // resetting seed should generate the same sequence of random numbers
     gen.setSeed(42L)
     val array1 = (0 until 1000).map(_ => gen.nextValue())
@@ -56,7 +56,7 @@ class RandomDataGeneratorSuite extends SparkFunSuite {
   def distributionChecks(gen: RandomDataGenerator[Double],
       mean: Double = 0.0,
       stddev: Double = 1.0,
-      epsilon: Double = 0.01) {
+      epsilon: Double = 0.01): Unit = {
     for (seed <- 0 until 5) {
       gen.setSeed(seed.toLong)
       val sample = (0 until 100000).map { _ => gen.nextValue()}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
index 9b4dc29d326a1..470e1016dab39 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/random/RandomRDDsSuite.scala
@@ -38,7 +38,7 @@ class RandomRDDsSuite extends SparkFunSuite with MLlibTestSparkContext with Seri
       expectedNumPartitions: Int,
       expectedMean: Double,
       expectedStddev: Double,
-      epsilon: Double = 0.01) {
+      epsilon: Double = 0.01): Unit = {
     val stats = rdd.stats()
     assert(expectedSize === stats.count)
     assert(expectedNumPartitions === rdd.partitions.size)
@@ -53,7 +53,7 @@ class RandomRDDsSuite extends SparkFunSuite with MLlibTestSparkContext with Seri
       expectedNumPartitions: Int,
       expectedMean: Double,
       expectedStddev: Double,
-      epsilon: Double = 0.01) {
+      epsilon: Double = 0.01): Unit = {
     assert(expectedNumPartitions === rdd.partitions.size)
     val values = new ArrayBuffer[Double]()
     rdd.collect.foreach { vector => {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
index b08ad99f4f204..9be87db873dad 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/recommendation/ALSSuite.scala
@@ -224,7 +224,7 @@ class ALSSuite extends SparkFunSuite with MLlibTestSparkContext {
       negativeWeights: Boolean = false,
       numUserBlocks: Int = -1,
       numProductBlocks: Int = -1,
-      negativeFactors: Boolean = true) {
+      negativeFactors: Boolean = true): Unit = {
     // scalastyle:on
 
     val (sampledRatings, trueRatings, truePrefs) = ALSSuite.generateRatings(users, products,
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
index d96103d01e4ab..f336dac0ccb5d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LassoSuite.scala
@@ -33,7 +33,7 @@ private object LassoSuite {
 
 class LassoSuite extends SparkFunSuite with MLlibTestSparkContext {
 
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]): Unit = {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       // A prediction is off if the prediction is more than 0.5 away from expected value.
       math.abs(prediction - expected.label) > 0.5
@@ -55,8 +55,7 @@ class LassoSuite extends SparkFunSuite with MLlibTestSparkContext {
     }
     val testRDD = sc.parallelize(testData, 2).cache()
 
-    val ls = new LassoWithSGD()
-    ls.optimizer.setStepSize(1.0).setRegParam(0.01).setNumIterations(40)
+    val ls = new LassoWithSGD(1.0, 40, 0.01, 1.0)
 
     val model = ls.run(testRDD)
     val weight0 = model.weights(0)
@@ -99,8 +98,8 @@ class LassoSuite extends SparkFunSuite with MLlibTestSparkContext {
 
     val testRDD = sc.parallelize(testData, 2).cache()
 
-    val ls = new LassoWithSGD()
-    ls.optimizer.setStepSize(1.0).setRegParam(0.01).setNumIterations(40).setConvergenceTol(0.0005)
+    val ls = new LassoWithSGD(1.0, 40, 0.01, 1.0)
+    ls.optimizer.setConvergenceTol(0.0005)
 
     val model = ls.run(testRDD, initialWeights)
     val weight0 = model.weights(0)
@@ -153,7 +152,7 @@ class LassoClusterSuite extends SparkFunSuite with LocalClusterSparkContext {
     }.cache()
     // If we serialize data directly in the task closure, the size of the serialized task would be
     // greater than 1MB and hence Spark would throw an error.
-    val model = LassoWithSGD.train(points, 2)
+    val model = new LassoWithSGD(1.0, 2, 0.01, 1.0).run(points)
     val predictions = model.predict(points.map(_.features))
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
index 0694079b9df9e..be0834d0fd7df 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LinearRegressionSuite.scala
@@ -33,7 +33,7 @@ private object LinearRegressionSuite {
 
 class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
 
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]): Unit = {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       // A prediction is off if the prediction is more than 0.5 away from expected value.
       math.abs(prediction - expected.label) > 0.5
@@ -46,7 +46,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
   test("linear regression") {
     val testRDD = sc.parallelize(LinearDataGenerator.generateLinearInput(
       3.0, Array(10.0, 10.0), 100, 42), 2).cache()
-    val linReg = new LinearRegressionWithSGD().setIntercept(true)
+    val linReg = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0).setIntercept(true)
     linReg.optimizer.setNumIterations(1000).setStepSize(1.0)
 
     val model = linReg.run(testRDD)
@@ -72,7 +72,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
   test("linear regression without intercept") {
     val testRDD = sc.parallelize(LinearDataGenerator.generateLinearInput(
       0.0, Array(10.0, 10.0), 100, 42), 2).cache()
-    val linReg = new LinearRegressionWithSGD().setIntercept(false)
+    val linReg = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0).setIntercept(false)
     linReg.optimizer.setNumIterations(1000).setStepSize(1.0)
 
     val model = linReg.run(testRDD)
@@ -103,7 +103,7 @@ class LinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
       val sv = Vectors.sparse(10000, Seq((0, v(0)), (9999, v(1))))
       LabeledPoint(label, sv)
     }.cache()
-    val linReg = new LinearRegressionWithSGD().setIntercept(false)
+    val linReg = new LinearRegressionWithSGD(1.0, 100, 0.0, 1.0).setIntercept(false)
     linReg.optimizer.setNumIterations(1000).setStepSize(1.0)
 
     val model = linReg.run(sparseRDD)
@@ -160,7 +160,7 @@ class LinearRegressionClusterSuite extends SparkFunSuite with LocalClusterSparkC
     }.cache()
     // If we serialize data directly in the task closure, the size of the serialized task would be
     // greater than 1MB and hence Spark would throw an error.
-    val model = LinearRegressionWithSGD.train(points, 2)
+    val model = new LinearRegressionWithSGD(1.0, 2, 0.0, 1.0).run(points)
     val predictions = model.predict(points.map(_.features))
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
index 815be32d2e510..2d6aec184ad9d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/RidgeRegressionSuite.scala
@@ -60,18 +60,13 @@ class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {
     val validationRDD = sc.parallelize(validationData, 2).cache()
 
     // First run without regularization.
-    val linearReg = new LinearRegressionWithSGD()
-    linearReg.optimizer.setNumIterations(200)
-                       .setStepSize(1.0)
+    val linearReg = new LinearRegressionWithSGD(1.0, 200, 0.0, 1.0)
 
     val linearModel = linearReg.run(testRDD)
     val linearErr = predictionError(
         linearModel.predict(validationRDD.map(_.features)).collect(), validationData)
 
-    val ridgeReg = new RidgeRegressionWithSGD()
-    ridgeReg.optimizer.setNumIterations(200)
-                      .setRegParam(0.1)
-                      .setStepSize(1.0)
+    val ridgeReg = new RidgeRegressionWithSGD(1.0, 200, 0.1, 1.0)
     val ridgeModel = ridgeReg.run(testRDD)
     val ridgeErr = predictionError(
         ridgeModel.predict(validationRDD.map(_.features)).collect(), validationData)
@@ -110,7 +105,7 @@ class RidgeRegressionClusterSuite extends SparkFunSuite with LocalClusterSparkCo
     }.cache()
     // If we serialize data directly in the task closure, the size of the serialized task would be
     // greater than 1MB and hence Spark would throw an error.
-    val model = RidgeRegressionWithSGD.train(points, 2)
+    val model = new RidgeRegressionWithSGD(1.0, 2, 0.01, 1.0).run(points)
     val predictions = model.predict(points.map(_.features))
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
index eaeaa3fc1e68d..8e2d7d10f2ce2 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionSuite.scala
@@ -22,31 +22,25 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.LinearDataGenerator
-import org.apache.spark.streaming.{StreamingContext, TestSuiteBase}
+import org.apache.spark.streaming.{LocalStreamingContext, TestSuiteBase}
 import org.apache.spark.streaming.dstream.DStream
 
-class StreamingLinearRegressionSuite extends SparkFunSuite with TestSuiteBase {
+class StreamingLinearRegressionSuite
+  extends SparkFunSuite
+  with LocalStreamingContext
+  with TestSuiteBase {
 
   // use longer wait time to ensure job completion
   override def maxWaitTimeMillis: Int = 20000
 
-  var ssc: StreamingContext = _
-
-  override def afterFunction() {
-    super.afterFunction()
-    if (ssc != null) {
-      ssc.stop()
-    }
-  }
-
   // Assert that two values are equal within tolerance epsilon
-  def assertEqual(v1: Double, v2: Double, epsilon: Double) {
+  def assertEqual(v1: Double, v2: Double, epsilon: Double): Unit = {
     def errorMessage = v1.toString + " did not equal " + v2.toString
     assert(math.abs(v1-v2) <= epsilon, errorMessage)
   }
 
   // Assert that model predictions are correct
-  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]) {
+  def validatePrediction(predictions: Seq[Double], input: Seq[LabeledPoint]): Unit = {
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       // A prediction is off if the prediction is more than 0.5 away from expected value.
       math.abs(prediction - expected.label) > 0.5
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
index 88b9d4c039ba9..b738236473230 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
@@ -437,7 +437,7 @@ object DecisionTreeSuite extends SparkFunSuite {
   def validateClassifier(
       model: DecisionTreeModel,
       input: Seq[LabeledPoint],
-      requiredAccuracy: Double) {
+      requiredAccuracy: Double): Unit = {
     val predictions = input.map(x => model.predict(x.features))
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       prediction != expected.label
@@ -450,7 +450,7 @@ object DecisionTreeSuite extends SparkFunSuite {
   def validateRegressor(
       model: DecisionTreeModel,
       input: Seq[LabeledPoint],
-      requiredMSE: Double) {
+      requiredMSE: Double): Unit = {
     val predictions = input.map(x => model.predict(x.features))
     val squaredError = predictions.zip(input).map { case (prediction, expected) =>
       val err = prediction - expected.label
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
index d43e62bb65535..e04d7b7c327a8 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/EnsembleTestHelper.scala
@@ -37,7 +37,7 @@ object EnsembleTestHelper {
       numCols: Int,
       expectedMean: Double,
       expectedStddev: Double,
-      epsilon: Double) {
+      epsilon: Double): Unit = {
     val values = new mutable.ArrayBuffer[Double]()
     data.foreach { row =>
       assert(row.size == numCols)
@@ -51,7 +51,7 @@ object EnsembleTestHelper {
   def validateClassifier(
       model: TreeEnsembleModel,
       input: Seq[LabeledPoint],
-      requiredAccuracy: Double) {
+      requiredAccuracy: Double): Unit = {
     val predictions = input.map(x => model.predict(x.features))
     val numOffPredictions = predictions.zip(input).count { case (prediction, expected) =>
       prediction != expected.label
@@ -68,7 +68,7 @@ object EnsembleTestHelper {
       model: TreeEnsembleModel,
       input: Seq[LabeledPoint],
       required: Double,
-      metricName: String = "mse") {
+      metricName: String = "mse"): Unit = {
     val predictions = input.map(x => model.predict(x.features))
     val errors = predictions.zip(input).map { case (prediction, point) =>
       point.label - prediction
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
index bec61ba6a003c..b1a385a576cea 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/RandomForestSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.util.Utils
  * Test suite for [[RandomForest]].
  */
 class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
-  def binaryClassificationTestWithContinuousFeatures(strategy: Strategy) {
+  def binaryClassificationTestWithContinuousFeatures(strategy: Strategy): Unit = {
     val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
     val rdd = sc.parallelize(arr)
     val numTrees = 1
@@ -68,7 +68,7 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext {
     binaryClassificationTestWithContinuousFeatures(strategy)
   }
 
-  def regressionTestWithContinuousFeatures(strategy: Strategy) {
+  def regressionTestWithContinuousFeatures(strategy: Strategy): Unit = {
     val arr = EnsembleTestHelper.generateOrderedLabeledPoints(numFeatures = 50, 1000)
     val rdd = sc.parallelize(arr)
     val numTrees = 1
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
index 2853b752cb85c..79d4785fd6fa7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/LocalClusterSparkContext.scala
@@ -25,7 +25,7 @@ import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 trait LocalClusterSparkContext extends BeforeAndAfterAll { self: Suite =>
   @transient var sc: SparkContext = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     val conf = new SparkConf()
       .setMaster("local-cluster[2, 1, 1024]")
@@ -34,7 +34,7 @@ trait LocalClusterSparkContext extends BeforeAndAfterAll { self: Suite =>
     sc = new SparkContext(conf)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       if (sc != null) {
         sc.stop()
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
index 720237bd2dddd..f9a3cd088314e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLlibTestSparkContext.scala
@@ -31,7 +31,7 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite =>
   @transient var sc: SparkContext = _
   @transient var checkpointDir: String = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     spark = SparkSession.builder
       .master("local[2]")
@@ -43,7 +43,7 @@ trait MLlibTestSparkContext extends TempDirectory { self: Suite =>
     sc.setCheckpointDir(checkpointDir)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(new File(checkpointDir))
       SparkSession.clearActiveSession()
diff --git a/pom.xml b/pom.xml
index 6c474f5f7a3e7..9c2aa9de85ce6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -125,7 +125,7 @@
     <hadoop.version>2.7.4</hadoop.version>
     <protobuf.version>2.5.0</protobuf.version>
     <yarn.version>${hadoop.version}</yarn.version>
-    <zookeeper.version>3.4.6</zookeeper.version>
+    <zookeeper.version>3.4.14</zookeeper.version>
     <curator.version>2.7.1</curator.version>
     <okapi.version>0.4.2</okapi.version>
     <hive.group>org.spark-project.hive</hive.group>
@@ -139,7 +139,7 @@
     <kafka.version>2.3.0</kafka.version>
     <derby.version>10.12.1.1</derby.version>
     <parquet.version>1.10.1</parquet.version>
-    <orc.version>1.5.5</orc.version>
+    <orc.version>1.5.6</orc.version>
     <orc.classifier>nohive</orc.classifier>
     <hive.parquet.group>com.twitter</hive.parquet.group>
     <hive.parquet.version>1.6.0</hive.parquet.version>
@@ -164,14 +164,14 @@
     <commons.math3.version>3.4.1</commons.math3.version>
     <!-- managed up from 3.2.1 for SPARK-11652 -->
     <commons.collections.version>3.2.2</commons.collections.version>
-    <scala.version>2.12.8</scala.version>
+    <scala.version>2.12.10</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
     <scalafmt.parameters>--diff --test</scalafmt.parameters>
     <!-- for now, not running scalafmt as part of default verify pipeline -->
     <scalafmt.skip>true</scalafmt.skip>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.9.9</fasterxml.jackson.version>
-    <fasterxml.jackson.databind.version>2.9.9.3</fasterxml.jackson.databind.version>
+    <fasterxml.jackson.version>2.9.10</fasterxml.jackson.version>
+    <fasterxml.jackson.databind.version>2.9.10</fasterxml.jackson.databind.version>
     <snappy.version>1.1.7.3</snappy.version>
     <netlib.java.version>1.1.2</netlib.java.version>
     <commons-codec.version>1.10</commons-codec.version>
@@ -240,7 +240,7 @@
     -->
     <spark.test.home>${session.executionRootDirectory}</spark.test.home>
 
-    <CodeCacheSize>512m</CodeCacheSize>
+    <CodeCacheSize>1g</CodeCacheSize>
   </properties>
   <repositories>
     <repository>
@@ -620,7 +620,7 @@
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.4.2-1</version>
+        <version>1.4.3-1</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>
@@ -786,14 +786,8 @@
       <dependency>
         <groupId>org.scalanlp</groupId>
         <artifactId>breeze_${scala.binary.version}</artifactId>
-        <version>0.13.2</version>
+        <version>1.0</version>
         <exclusions>
-          <!-- This is included as a compile-scoped dependency by jtransforms, which is
-               a dependency of breeze. -->
-          <exclusion>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-          </exclusion>
           <exclusion>
             <groupId>org.apache.commons</groupId>
             <artifactId>commons-math3</artifactId>
@@ -839,7 +833,7 @@
       <dependency>
         <groupId>org.scala-lang.modules</groupId>
         <artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>
-        <version>1.1.0</version>
+        <version>1.1.2</version>
       </dependency>
       <dependency>
         <groupId>jline</groupId>
@@ -849,7 +843,7 @@
       <dependency>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest_${scala.binary.version}</artifactId>
-        <version>3.0.5</version>
+        <version>3.0.8</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -867,7 +861,7 @@
       <dependency>
         <groupId>org.scalacheck</groupId>
         <artifactId>scalacheck_${scala.binary.version}</artifactId>
-        <version>1.13.5</version>
+        <version>1.14.2</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -1343,6 +1337,10 @@
             <groupId>io.netty</groupId>
             <artifactId>netty</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>com.github.spotbugs</groupId>
+            <artifactId>spotbugs-annotations</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
@@ -2002,75 +2000,6 @@
         </exclusions>
       </dependency>
 
-      <dependency>
-        <groupId>${hive.group}</groupId>
-        <artifactId>hive-contrib</artifactId>
-        <version>${hive.version}</version>
-        <scope>test</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-exec</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-serde</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-shims</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>commons-codec</groupId>
-            <artifactId>commons-codec</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-api</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>${hive.group}.hcatalog</groupId>
-        <artifactId>hive-hcatalog-core</artifactId>
-        <version>${hive.version}</version>
-        <scope>test</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-exec</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-metastore</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-cli</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>${hive.group}</groupId>
-            <artifactId>hive-common</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.google.guava</groupId>
-            <artifactId>guava</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.codehaus.jackson</groupId>
-            <artifactId>jackson-mapper-asl</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-
       <dependency>
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-core</artifactId>
@@ -2287,6 +2216,17 @@
                 </rules>
               </configuration>
             </execution>
+            <execution>
+              <id>enforce-no-duplicate-dependencies</id>
+              <goals>
+                <goal>enforce</goal>
+              </goals>
+              <configuration>
+                <rules>
+                  <banDuplicatePomDependencyVersions/>
+                </rules>
+              </configuration>
+            </execution>
           </executions>
         </plugin>
         <plugin>
@@ -2974,7 +2914,6 @@
       <properties>
         <hadoop.version>3.2.0</hadoop.version>
         <curator.version>2.13.0</curator.version>
-        <zookeeper.version>3.4.13</zookeeper.version>
         <hive.group>org.apache.hive</hive.group>
         <hive.classifier>core</hive.classifier>
         <hive.version>${hive23.version}</hive.version>
@@ -3053,6 +2992,19 @@
     <profile>
       <id>scala-2.12</id>
     </profile>
+    
+    <profile>
+      <id>scala-2.13</id>
+      <dependencyManagement>
+        <dependencies>
+          <dependency>
+            <groupId>org.scala-lang.modules</groupId>
+            <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+            <version>0.2.0</version>
+          </dependency>
+        </dependencies>
+      </dependencyManagement>
+    </profile>
 
     <!--
      This is a profile to enable the use of the ASF snapshot and staging repositories
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 04bec14c42c6f..bc607b5722f32 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -36,6 +36,23 @@ object MimaExcludes {
 
   // Exclude rules for 3.0.x
   lazy val v30excludes = v24excludes ++ Seq(
+    // [SPARK-28091[CORE] Extend Spark metrics system with user-defined metrics using executor plugins
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ExecutorPlugin.init"),
+
+    // [SPARK-][SQL][CORE][MLLIB] Remove more old deprecated items in Spark 3
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext.createExternalTable"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.createExternalTable"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.KMeans.train"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.mllib.clustering.KMeans.train"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.classification.LogisticRegressionWithSGD$"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.LogisticRegressionWithSGD.this"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.feature.ChiSqSelectorModel.isSorted"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD$"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.RidgeRegressionWithSGD.this"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.LassoWithSGD.this"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.regression.LassoWithSGD$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.mllib.regression.LinearRegressionWithSGD$"),
+
     // [SPARK-28486][CORE][PYTHON] Map PythonBroadcast's data file to a BroadcastBlock to avoid delete by GC
     ProblemFilters.exclude[InaccessibleMethodProblem]("java.lang.Object.finalize"),
 
@@ -403,7 +420,35 @@ object MimaExcludes {
 
     // [SPARK-25382][SQL][PYSPARK] Remove ImageSchema.readImages in 3.0
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.image.ImageSchema.readImages"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.image.ImageSchema.readImages")
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.image.ImageSchema.readImages"),
+
+    // [SPARK-25341][CORE] Support rolling back a shuffle map stage and re-generate the shuffle files
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.shuffle.sort.UnsafeShuffleWriter.this"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleIndexBlockId.copy$default$2"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleIndexBlockId.copy"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleIndexBlockId.this"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleDataBlockId.copy$default$2"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleDataBlockId.copy"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleDataBlockId.this"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleBlockId.copy$default$2"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockId.copy"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockId.this"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleIndexBlockId.apply"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleDataBlockId.apply"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockId.apply"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleIndexBlockId.mapId"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleDataBlockId.mapId"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleBlockId.mapId"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.FetchFailed.mapId"),
+    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.FetchFailed$"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.FetchFailed.apply"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.FetchFailed.copy$default$5"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.FetchFailed.copy"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.FetchFailed.copy$default$3"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.FetchFailed.this"),
+
+    // [SPARK-28957][SQL] Copy any "spark.hive.foo=bar" spark properties into hadoop conf as "hive.foo=bar"
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.SparkHadoopUtil.appendS3AndSparkHadoopConfigurations")
   )
 
   // Exclude rules for 2.4.x
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 25c2fb4af5c34..88f756b75789b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -223,7 +223,7 @@ object SparkBuild extends PomBuild {
       .map(file),
     incOptions := incOptions.value.withNameHashing(true),
     publishMavenStyle := true,
-    unidocGenjavadocVersion := "0.13",
+    unidocGenjavadocVersion := "0.14",
 
     // Override SBT's default resolvers:
     resolvers := Seq(
@@ -980,7 +980,8 @@ object TestSettings {
     javaOptions in Test ++= System.getProperties.asScala.filter(_._1.startsWith("spark"))
       .map { case (k,v) => s"-D$k=$v" }.toSeq,
     javaOptions in Test += "-ea",
-    javaOptions in Test ++= "-Xmx4g -Xss4m"
+    // SPARK-29282 This is for consistency between JDK8 and JDK11.
+    javaOptions in Test ++= "-Xmx4g -Xss4m -XX:+UseParallelGC -XX:-UseDynamicNumberOfGCThreads"
       .split(" ").toSeq,
     javaOptions += "-Xmx3g",
     // Exclude tags defined in a system property
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index 70c0b27a6aa33..76a5bd0d645ba 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -113,7 +113,7 @@ def wrapper(self, *args, **kwargs):
 
 
 # for back compatibility
-from pyspark.sql import SQLContext, HiveContext, Row
+from pyspark.sql import SQLContext, Row
 
 __all__ = [
     "SparkConf", "SparkContext", "SparkFiles", "RDD", "StorageLevel", "Broadcast",
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index a97d409e7328c..803d857055dc0 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -21,10 +21,9 @@
 from tempfile import NamedTemporaryFile
 import threading
 
-from pyspark.cloudpickle import print_exec
 from pyspark.java_gateway import local_connect_and_auth
 from pyspark.serializers import ChunkedStream, pickle_protocol
-from pyspark.util import _exception_message
+from pyspark.util import _exception_message, print_exec
 
 if sys.version < '3':
     import cPickle as pickle
diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py
index 54d745cbcc7f9..7df5f6c748ad1 100644
--- a/python/pyspark/cloudpickle.py
+++ b/python/pyspark/cloudpickle.py
@@ -591,6 +591,8 @@ def save_function_tuple(self, func):
             state['annotations'] = func.__annotations__
         if hasattr(func, '__qualname__'):
             state['qualname'] = func.__qualname__
+        if hasattr(func, '__kwdefaults__'):
+            state['kwdefaults'] = func.__kwdefaults__
         save(state)
         write(pickle.TUPLE)
         write(pickle.REDUCE)  # applies _fill_function on the tuple
@@ -666,6 +668,15 @@ def extract_func_data(self, func):
         # multiple invokations are bound to the same Cloudpickler.
         base_globals = self.globals_ref.setdefault(id(func.__globals__), {})
 
+        if base_globals == {}:
+            # Add module attributes used to resolve relative imports
+            # instructions inside func.
+            for k in ["__package__", "__name__", "__path__", "__file__"]:
+                # Some built-in functions/methods such as object.__new__  have
+                # their __globals__ set to None in PyPy
+                if func.__globals__ is not None and k in func.__globals__:
+                    base_globals[k] = func.__globals__[k]
+
         return (code, f_globals, defaults, closure, dct, base_globals)
 
     def save_builtin_function(self, obj):
@@ -979,43 +990,6 @@ def _restore_attr(obj, attr):
     return obj
 
 
-def _get_module_builtins():
-    return pickle.__builtins__
-
-
-def print_exec(stream):
-    ei = sys.exc_info()
-    traceback.print_exception(ei[0], ei[1], ei[2], None, stream)
-
-
-def _modules_to_main(modList):
-    """Force every module in modList to be placed into main"""
-    if not modList:
-        return
-
-    main = sys.modules['__main__']
-    for modname in modList:
-        if type(modname) is str:
-            try:
-                mod = __import__(modname)
-            except Exception:
-                sys.stderr.write('warning: could not import %s\n.  '
-                                 'Your function may unexpectedly error due to this import failing;'
-                                 'A version mismatch is likely.  Specific error was:\n' % modname)
-                print_exec(sys.stderr)
-            else:
-                setattr(main, mod.__name__, mod)
-
-
-# object generators:
-def _genpartial(func, args, kwds):
-    if not args:
-        args = ()
-    if not kwds:
-        kwds = {}
-    return partial(func, *args, **kwds)
-
-
 def _gen_ellipsis():
     return Ellipsis
 
@@ -1103,6 +1077,8 @@ def _fill_function(*args):
         func.__module__ = state['module']
     if 'qualname' in state:
         func.__qualname__ = state['qualname']
+    if 'kwdefaults' in state:
+        func.__kwdefaults__ = state['kwdefaults']
 
     cells = func.__closure__
     if cells is not None:
@@ -1188,15 +1164,6 @@ def _is_dynamic(module):
         return False
 
 
-"""Constructors for 3rd party libraries
-Note: These can never be renamed due to client compatibility issues"""
-
-
-def _getobject(modname, attribute):
-    mod = __import__(modname, fromlist=[attribute])
-    return mod.__dict__[attribute]
-
-
 """ Use copy_reg to extend global pickle definitions """
 
 if sys.version_info < (3, 4):  # pragma: no branch
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index d689d1d034bc7..4d140f91f032e 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -950,7 +950,7 @@ def addPyFile(self, path):
     def setCheckpointDir(self, dirName):
         """
         Set the directory under which RDDs are going to be checkpointed. The
-        directory must be a HDFS path if running on a cluster.
+        directory must be an HDFS path if running on a cluster.
         """
         self._jsc.sc().setCheckpointDir(dirName)
 
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 3ae2338aa77e6..e2e313c7f9252 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -26,8 +26,8 @@
     DecisionTreeRegressionModel, GBTParams, HasVarianceImpurity, RandomForestParams, \
     TreeEnsembleModel
 from pyspark.ml.util import *
-from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams
-from pyspark.ml.wrapper import JavaWrapper
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, \
+    JavaPredictor, JavaPredictorParams, JavaPredictionModel, JavaWrapper
 from pyspark.ml.common import inherit_doc, _java2py, _py2java
 from pyspark.ml.linalg import Vectors
 from pyspark.sql import DataFrame
@@ -47,14 +47,43 @@
            'OneVsRest', 'OneVsRestModel']
 
 
+class JavaClassifierParams(HasRawPredictionCol, JavaPredictorParams):
+    """
+    (Private) Java Classifier Params for classification tasks.
+    """
+    pass
+
+
+@inherit_doc
+class JavaClassifier(JavaPredictor, JavaClassifierParams):
+    """
+    Java Classifier for classification tasks.
+    Classes are indexed {0, 1, ..., numClasses - 1}.
+    """
+
+    @since("3.0.0")
+    def setRawPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`rawPredictionCol`.
+        """
+        return self._set(rawPredictionCol=value)
+
+
 @inherit_doc
-class JavaClassificationModel(JavaPredictionModel):
+class JavaClassificationModel(JavaPredictionModel, JavaClassifierParams):
     """
-    (Private) Java Model produced by a ``Classifier``.
+    Java Model produced by a ``Classifier``.
     Classes are indexed {0, 1, ..., numClasses - 1}.
     To be mixed in with class:`pyspark.ml.JavaModel`
     """
 
+    @since("3.0.0")
+    def setRawPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`rawPredictionCol`.
+        """
+        return self._set(rawPredictionCol=value)
+
     @property
     @since("2.1.0")
     def numClasses(self):
@@ -64,10 +93,60 @@ def numClasses(self):
         return self._call_java("numClasses")
 
 
+class JavaProbabilisticClassifierParams(HasProbabilityCol, HasThresholds, JavaClassifierParams):
+    """
+    (Private) Java Probabilistic Classifier Params for classification tasks.
+    """
+    pass
+
+
+@inherit_doc
+class JavaProbabilisticClassifier(JavaClassifier, JavaProbabilisticClassifierParams):
+    """
+    Java Probabilistic Classifier for classification tasks.
+    """
+
+    @since("3.0.0")
+    def setProbabilityCol(self, value):
+        """
+        Sets the value of :py:attr:`probabilityCol`.
+        """
+        return self._set(probabilityCol=value)
+
+    @since("3.0.0")
+    def setThresholds(self, value):
+        """
+        Sets the value of :py:attr:`thresholds`.
+        """
+        return self._set(thresholds=value)
+
+
+@inherit_doc
+class JavaProbabilisticClassificationModel(JavaClassificationModel,
+                                           JavaProbabilisticClassifierParams):
+    """
+    Java Model produced by a ``ProbabilisticClassifier``.
+    """
+
+    @since("3.0.0")
+    def setProbabilityCol(self, value):
+        """
+        Sets the value of :py:attr:`probabilityCol`.
+        """
+        return self._set(probabilityCol=value)
+
+    @since("3.0.0")
+    def setThresholds(self, value):
+        """
+        Sets the value of :py:attr:`thresholds`.
+        """
+        return self._set(thresholds=value)
+
+
 @inherit_doc
-class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
-                HasRegParam, HasTol, HasRawPredictionCol, HasFitIntercept, HasStandardization,
-                HasWeightCol, HasAggregationDepth, HasThreshold, JavaMLWritable, JavaMLReadable):
+class LinearSVC(JavaClassifier, HasMaxIter, HasRegParam, HasTol,
+                HasFitIntercept, HasStandardization, HasWeightCol, HasAggregationDepth,
+                HasThreshold, JavaMLWritable, JavaMLReadable):
     """
     `Linear SVM Classifier <https://en.wikipedia.org/wiki/Support_vector_machine#Linear_SVM>`_
 
@@ -81,6 +160,10 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
     ...     Row(label=0.0, features=Vectors.dense(1.0, 2.0, 3.0))]).toDF()
     >>> svm = LinearSVC(maxIter=5, regParam=0.01)
     >>> model = svm.fit(df)
+    >>> model.setPredictionCol("newPrediction")
+    LinearSVC...
+    >>> model.getPredictionCol()
+    'newPrediction'
     >>> model.coefficients
     DenseVector([0.0, -0.2792, -0.1833])
     >>> model.intercept
@@ -90,8 +173,10 @@ class LinearSVC(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, Ha
     >>> model.numFeatures
     3
     >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, -1.0, -1.0))]).toDF()
+    >>> model.predict(test0.head().features)
+    1.0
     >>> result = model.transform(test0).head()
-    >>> result.prediction
+    >>> result.newPrediction
     1.0
     >>> result.rawPrediction
     DenseVector([-1.4831, 1.4831])
@@ -156,7 +241,7 @@ def _create_model(self, java_model):
         return LinearSVCModel(java_model)
 
 
-class LinearSVCModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
+class LinearSVCModel(JavaClassificationModel, JavaMLWritable, JavaMLReadable):
     """
     Model fitted by LinearSVC.
 
@@ -181,8 +266,7 @@ def intercept(self):
 
 
 @inherit_doc
-class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
-                         HasRegParam, HasTol, HasProbabilityCol, HasRawPredictionCol,
+class LogisticRegression(JavaProbabilisticClassifier, HasMaxIter, HasRegParam, HasTol,
                          HasElasticNetParam, HasFitIntercept, HasStandardization, HasThresholds,
                          HasWeightCol, HasAggregationDepth, JavaMLWritable, JavaMLReadable):
     """
@@ -198,6 +282,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     ...     Row(label=0.0, weight=4.0, features=Vectors.dense(3.0, 3.0))]).toDF()
     >>> blor = LogisticRegression(regParam=0.01, weightCol="weight")
     >>> blorModel = blor.fit(bdf)
+    >>> blorModel.setFeaturesCol("features")
+    LogisticRegressionModel...
+    >>> blorModel.setProbabilityCol("newProbability")
+    LogisticRegressionModel...
+    >>> blorModel.getProbabilityCol()
+    'newProbability'
     >>> blorModel.coefficients
     DenseVector([-1.080..., -0.646...])
     >>> blorModel.intercept
@@ -211,10 +301,12 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     >>> mlorModel.interceptVector
     DenseVector([0.04..., -0.42..., 0.37...])
     >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 1.0))]).toDF()
+    >>> blorModel.predict(test0.head().features)
+    1.0
     >>> result = blorModel.transform(test0).head()
     >>> result.prediction
     1.0
-    >>> result.probability
+    >>> result.newProbability
     DenseVector([0.02..., 0.97...])
     >>> result.rawPrediction
     DenseVector([-3.54..., 3.54...])
@@ -481,7 +573,7 @@ def getUpperBoundsOnIntercepts(self):
         return self.getOrDefault(self.upperBoundsOnIntercepts)
 
 
-class LogisticRegressionModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable,
+class LogisticRegressionModel(JavaProbabilisticClassificationModel, JavaMLWritable, JavaMLReadable,
                               HasTrainingSummary):
     """
     Model fitted by LogisticRegression.
@@ -872,8 +964,7 @@ def getImpurity(self):
 
 
 @inherit_doc
-class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeightCol,
-                             HasPredictionCol, HasProbabilityCol, HasRawPredictionCol,
+class DecisionTreeClassifier(JavaProbabilisticClassifier, HasWeightCol,
                              DecisionTreeParams, TreeClassifierParams, HasCheckpointInterval,
                              HasSeed, JavaMLWritable, JavaMLReadable):
     """
@@ -892,6 +983,10 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeig
     >>> td = si_model.transform(df)
     >>> dt = DecisionTreeClassifier(maxDepth=2, labelCol="indexed", leafCol="leafId")
     >>> model = dt.fit(td)
+    >>> model.getLabelCol()
+    'indexed'
+    >>> model.setFeaturesCol("features")
+    DecisionTreeClassificationModel...
     >>> model.numNodes
     3
     >>> model.depth
@@ -905,6 +1000,8 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeig
     >>> print(model.toDebugString)
     DecisionTreeClassificationModel (uid=...) of depth 1 with 3 nodes...
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
@@ -1031,8 +1128,8 @@ def setImpurity(self, value):
 
 
 @inherit_doc
-class DecisionTreeClassificationModel(DecisionTreeModel, JavaClassificationModel, JavaMLWritable,
-                                      JavaMLReadable):
+class DecisionTreeClassificationModel(DecisionTreeModel, JavaProbabilisticClassificationModel,
+                                      JavaMLWritable, JavaMLReadable):
     """
     Model fitted by DecisionTreeClassifier.
 
@@ -1062,9 +1159,8 @@ def featureImportances(self):
 
 
 @inherit_doc
-class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
-                             HasRawPredictionCol, HasProbabilityCol,
-                             RandomForestParams, TreeClassifierParams, HasCheckpointInterval,
+class RandomForestClassifier(JavaProbabilisticClassifier, HasSeed, RandomForestParams,
+                             TreeClassifierParams, HasCheckpointInterval,
                              JavaMLWritable, JavaMLReadable):
     """
     `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
@@ -1085,17 +1181,27 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
     >>> rf = RandomForestClassifier(numTrees=3, maxDepth=2, labelCol="indexed", seed=42,
     ...     leafCol="leafId")
     >>> model = rf.fit(td)
+    >>> model.getLabelCol()
+    'indexed'
+    >>> model.setFeaturesCol("features")
+    RandomForestClassificationModel...
+    >>> model.setRawPredictionCol("newRawPrediction")
+    RandomForestClassificationModel...
+    >>> model.getRawPredictionCol()
+    'newRawPrediction'
     >>> model.featureImportances
     SparseVector(1, {0: 1.0})
     >>> allclose(model.treeWeights, [1.0, 1.0, 1.0])
     True
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
     >>> numpy.argmax(result.probability)
     0
-    >>> numpy.argmax(result.rawPrediction)
+    >>> numpy.argmax(result.newRawPrediction)
     0
     >>> result.leafId
     DenseVector([0.0, 0.0, 0.0])
@@ -1231,8 +1337,8 @@ def setFeatureSubsetStrategy(self, value):
         return self._set(featureSubsetStrategy=value)
 
 
-class RandomForestClassificationModel(TreeEnsembleModel, JavaClassificationModel, JavaMLWritable,
-                                      JavaMLReadable):
+class RandomForestClassificationModel(TreeEnsembleModel, JavaProbabilisticClassificationModel,
+                                      JavaMLWritable, JavaMLReadable):
     """
     Model fitted by RandomForestClassifier.
 
@@ -1284,9 +1390,8 @@ def getLossType(self):
 
 
 @inherit_doc
-class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                    GBTClassifierParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
-                    JavaMLReadable):
+class GBTClassifier(JavaProbabilisticClassifier, GBTClassifierParams, HasCheckpointInterval,
+                    HasSeed, JavaMLWritable, JavaMLReadable):
     """
     `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
     learning algorithm for classification.
@@ -1318,11 +1423,21 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
     >>> gbt.getFeatureSubsetStrategy()
     'all'
     >>> model = gbt.fit(td)
+    >>> model.getLabelCol()
+    'indexed'
+    >>> model.setFeaturesCol("features")
+    GBTClassificationModel...
+    >>> model.setThresholds([0.3, 0.7])
+    GBTClassificationModel...
+    >>> model.getThresholds()
+    [0.3, 0.7]
     >>> model.featureImportances
     SparseVector(1, {0: 1.0})
     >>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
     True
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
@@ -1485,8 +1600,8 @@ def setValidationIndicatorCol(self, value):
         return self._set(validationIndicatorCol=value)
 
 
-class GBTClassificationModel(TreeEnsembleModel, JavaClassificationModel, JavaMLWritable,
-                             JavaMLReadable):
+class GBTClassificationModel(TreeEnsembleModel, JavaProbabilisticClassificationModel,
+                             JavaMLWritable, JavaMLReadable):
     """
     Model fitted by GBTClassifier.
 
@@ -1527,8 +1642,8 @@ def evaluateEachIteration(self, dataset):
 
 
 @inherit_doc
-class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol,
-                 HasRawPredictionCol, HasThresholds, HasWeightCol, JavaMLWritable, JavaMLReadable):
+class NaiveBayes(JavaProbabilisticClassifier, HasThresholds, HasWeightCol,
+                 JavaMLWritable, JavaMLReadable):
     """
     Naive Bayes Classifiers.
     It supports both Multinomial and Bernoulli NB. `Multinomial NB
@@ -1547,11 +1662,19 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
     ...     Row(label=1.0, weight=1.0, features=Vectors.dense([1.0, 0.0]))])
     >>> nb = NaiveBayes(smoothing=1.0, modelType="multinomial", weightCol="weight")
     >>> model = nb.fit(df)
+    >>> model.setFeaturesCol("features")
+    NaiveBayes_...
+    >>> model.setLabelCol("newLabel")
+    NaiveBayes_...
+    >>> model.getLabelCol()
+    'newLabel'
     >>> model.pi
     DenseVector([-0.81..., -0.58...])
     >>> model.theta
     DenseMatrix(2, 2, [-0.91..., -0.51..., -0.40..., -1.09...], 1)
     >>> test0 = sc.parallelize([Row(features=Vectors.dense([1.0, 0.0]))]).toDF()
+    >>> model.predict(test0.head().features)
+    1.0
     >>> result = model.transform(test0).head()
     >>> result.prediction
     1.0
@@ -1651,7 +1774,7 @@ def getModelType(self):
         return self.getOrDefault(self.modelType)
 
 
-class NaiveBayesModel(JavaModel, JavaClassificationModel, JavaMLWritable, JavaMLReadable):
+class NaiveBayesModel(JavaProbabilisticClassificationModel, JavaMLWritable, JavaMLReadable):
     """
     Model fitted by NaiveBayes.
 
@@ -1676,10 +1799,8 @@ def theta(self):
 
 
 @inherit_doc
-class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                                     HasMaxIter, HasTol, HasSeed, HasStepSize, HasSolver,
-                                     JavaMLWritable, JavaMLReadable, HasProbabilityCol,
-                                     HasRawPredictionCol):
+class MultilayerPerceptronClassifier(JavaProbabilisticClassifier, HasMaxIter, HasTol, HasSeed,
+                                     HasStepSize, HasSolver, JavaMLWritable, JavaMLReadable):
     """
     Classifier trainer based on the Multilayer Perceptron.
     Each layer has sigmoid activation function, output layer has softmax.
@@ -1694,6 +1815,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     ...     (0.0, Vectors.dense([1.0, 1.0]))], ["label", "features"])
     >>> mlp = MultilayerPerceptronClassifier(maxIter=100, layers=[2, 2, 2], blockSize=1, seed=123)
     >>> model = mlp.fit(df)
+    >>> model.setFeaturesCol("features")
+    MultilayerPerceptronClassifier...
     >>> model.layers
     [2, 2, 2]
     >>> model.weights.size
@@ -1701,6 +1824,8 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
     >>> testDF = spark.createDataFrame([
     ...     (Vectors.dense([1.0, 0.0]),),
     ...     (Vectors.dense([0.0, 0.0]),)], ["features"])
+    >>> model.predict(testDF.head().features)
+    1.0
     >>> model.transform(testDF).select("features", "prediction").show()
     +---------+----------+
     | features|prediction|
@@ -1839,7 +1964,7 @@ def getInitialWeights(self):
         return self.getOrDefault(self.initialWeights)
 
 
-class MultilayerPerceptronClassificationModel(JavaModel, JavaClassificationModel, JavaMLWritable,
+class MultilayerPerceptronClassificationModel(JavaProbabilisticClassificationModel, JavaMLWritable,
                                               JavaMLReadable):
     """
     Model fitted by MultilayerPerceptronClassifier.
@@ -1864,23 +1989,13 @@ def weights(self):
         return self._call_java("weights")
 
 
-class OneVsRestParams(HasFeaturesCol, HasLabelCol, HasWeightCol, HasPredictionCol,
-                      HasRawPredictionCol):
+class OneVsRestParams(JavaClassifierParams, HasWeightCol):
     """
     Parameters for OneVsRest and OneVsRestModel.
     """
 
     classifier = Param(Params._dummy(), "classifier", "base binary classifier")
 
-    @since("2.0.0")
-    def setClassifier(self, value):
-        """
-        Sets the value of :py:attr:`classifier`.
-
-        .. note:: Only LogisticRegression and NaiveBayes are supported now.
-        """
-        return self._set(classifier=value)
-
     @since("2.0.0")
     def getClassifier(self):
         """
@@ -1906,6 +2021,8 @@ class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, Java
     >>> ovr = OneVsRest(classifier=lr)
     >>> ovr.getRawPredictionCol()
     'rawPrediction'
+    >>> ovr.setPredictionCol("newPrediction")
+    OneVsRest...
     >>> model = ovr.fit(df)
     >>> model.models[0].coefficients
     DenseVector([0.5..., -1.0..., 3.4..., 4.2...])
@@ -1916,21 +2033,21 @@ class OneVsRest(Estimator, OneVsRestParams, HasParallelism, JavaMLReadable, Java
     >>> [x.intercept for x in model.models]
     [-2.7..., -2.5..., -1.3...]
     >>> test0 = sc.parallelize([Row(features=Vectors.dense(-1.0, 0.0, 1.0, 1.0))]).toDF()
-    >>> model.transform(test0).head().prediction
+    >>> model.transform(test0).head().newPrediction
     0.0
     >>> test1 = sc.parallelize([Row(features=Vectors.sparse(4, [0], [1.0]))]).toDF()
-    >>> model.transform(test1).head().prediction
+    >>> model.transform(test1).head().newPrediction
     2.0
     >>> test2 = sc.parallelize([Row(features=Vectors.dense(0.5, 0.4, 0.3, 0.2))]).toDF()
-    >>> model.transform(test2).head().prediction
+    >>> model.transform(test2).head().newPrediction
     0.0
     >>> model_path = temp_path + "/ovr_model"
     >>> model.save(model_path)
     >>> model2 = OneVsRestModel.load(model_path)
-    >>> model2.transform(test0).head().prediction
+    >>> model2.transform(test0).head().newPrediction
     0.0
     >>> model.transform(test2).columns
-    ['features', 'rawPrediction', 'prediction']
+    ['features', 'rawPrediction', 'newPrediction']
 
     .. versionadded:: 2.0.0
     """
@@ -1959,6 +2076,13 @@ def setParams(self, featuresCol="features", labelCol="label", predictionCol="pre
         kwargs = self._input_kwargs
         return self._set(**kwargs)
 
+    @since("2.0.0")
+    def setClassifier(self, value):
+        """
+        Sets the value of :py:attr:`classifier`.
+        """
+        return self._set(classifier=value)
+
     def _fit(self, dataset):
         labelCol = self.getLabelCol()
         featuresCol = self.getFeaturesCol()
@@ -2212,7 +2336,8 @@ def _from_java(cls, java_stage):
         classifier = JavaParams._from_java(java_stage.getClassifier())
         models = [JavaParams._from_java(model) for model in java_stage.models()]
         py_stage = cls(models=models).setPredictionCol(predictionCol).setLabelCol(labelCol)\
-            .setFeaturesCol(featuresCol).setClassifier(classifier)
+            .setFeaturesCol(featuresCol)
+        py_stage._set(classifier=classifier)
         py_stage._resetUid(java_stage.uid())
         return py_stage
 
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 9b21aacacd710..cbbbd36955dc0 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -95,7 +95,28 @@ def numIter(self):
         return self._call_java("numIter")
 
 
-class GaussianMixtureModel(JavaModel, JavaMLWritable, JavaMLReadable, HasTrainingSummary):
+@inherit_doc
+class _GaussianMixtureParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol,
+                             HasProbabilityCol, HasTol):
+    """
+    Params for :py:class:`GaussianMixture` and :py:class:`GaussianMixtureModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "Number of independent Gaussians in the mixture model. " +
+              "Must be > 1.", typeConverter=TypeConverters.toInt)
+
+    @since("2.0.0")
+    def getK(self):
+        """
+        Gets the value of `k`
+        """
+        return self.getOrDefault(self.k)
+
+
+class GaussianMixtureModel(JavaModel, _GaussianMixtureParams, JavaMLWritable, JavaMLReadable,
+                           HasTrainingSummary):
     """
     Model fitted by GaussianMixture.
 
@@ -135,10 +156,23 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
+    @since("3.0.0")
+    def predict(self, value):
+        """
+        Predict label for the given features.
+        """
+        return self._call_java("predict", value)
+
+    @since("3.0.0")
+    def predictProbability(self, value):
+        """
+        Predict probability for the given features.
+        """
+        return self._call_java("predictProbability", value)
+
 
 @inherit_doc
-class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIter, HasTol, HasSeed,
-                      HasProbabilityCol, JavaMLWritable, JavaMLReadable):
+class GaussianMixture(JavaEstimator, _GaussianMixtureParams, JavaMLWritable, JavaMLReadable):
     """
     GaussianMixture clustering.
     This class performs expectation maximization for multivariate Gaussian
@@ -169,6 +203,14 @@ class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     >>> gm = GaussianMixture(k=3, tol=0.0001,
     ...                      maxIter=10, seed=10)
     >>> model = gm.fit(df)
+    >>> model.getFeaturesCol()
+    'features'
+    >>> model.setPredictionCol("newPrediction")
+    GaussianMixture...
+    >>> model.predict(df.head().features)
+    2
+    >>> model.predictProbability(df.head().features)
+    DenseVector([0.0, 0.4736, 0.5264])
     >>> model.hasSummary
     True
     >>> summary = model.summary
@@ -185,11 +227,11 @@ class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     Row(mean=DenseVector([0.825, 0.8675]))
     >>> model.gaussiansDF.select("cov").head()
     Row(cov=DenseMatrix(2, 2, [0.0056, -0.0051, -0.0051, 0.0046], False))
-    >>> transformed = model.transform(df).select("features", "prediction")
+    >>> transformed = model.transform(df).select("features", "newPrediction")
     >>> rows = transformed.collect()
-    >>> rows[4].prediction == rows[5].prediction
+    >>> rows[4].newPrediction == rows[5].newPrediction
     True
-    >>> rows[2].prediction == rows[3].prediction
+    >>> rows[2].newPrediction == rows[3].newPrediction
     True
     >>> gmm_path = temp_path + "/gmm"
     >>> gm.save(gmm_path)
@@ -211,9 +253,6 @@ class GaussianMixture(JavaEstimator, HasFeaturesCol, HasPredictionCol, HasMaxIte
     .. versionadded:: 2.0.0
     """
 
-    k = Param(Params._dummy(), "k", "Number of independent Gaussians in the mixture model. " +
-              "Must be > 1.", typeConverter=TypeConverters.toInt)
-
     @keyword_only
     def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
                  probabilityCol="probability", tol=0.01, maxIter=100, seed=None):
@@ -251,13 +290,6 @@ def setK(self, value):
         """
         return self._set(k=value)
 
-    @since("2.0.0")
-    def getK(self):
-        """
-        Gets the value of `k`
-        """
-        return self.getOrDefault(self.k)
-
 
 class GaussianMixtureSummary(ClusteringSummary):
     """
@@ -308,7 +340,49 @@ def trainingCost(self):
         return self._call_java("trainingCost")
 
 
-class KMeansModel(JavaModel, GeneralJavaMLWritable, JavaMLReadable, HasTrainingSummary):
+@inherit_doc
+class _KMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasTol,
+                    HasDistanceMeasure):
+    """
+    Params for :py:class:`KMeans` and :py:class:`KMeansModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "The number of clusters to create. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    initMode = Param(Params._dummy(), "initMode",
+                     "The initialization algorithm. This can be either \"random\" to " +
+                     "choose random points as initial cluster centers, or \"k-means||\" " +
+                     "to use a parallel variant of k-means++",
+                     typeConverter=TypeConverters.toString)
+    initSteps = Param(Params._dummy(), "initSteps", "The number of steps for k-means|| " +
+                      "initialization mode. Must be > 0.", typeConverter=TypeConverters.toInt)
+
+    @since("1.5.0")
+    def getK(self):
+        """
+        Gets the value of `k`
+        """
+        return self.getOrDefault(self.k)
+
+    @since("1.5.0")
+    def getInitMode(self):
+        """
+        Gets the value of `initMode`
+        """
+        return self.getOrDefault(self.initMode)
+
+    @since("1.5.0")
+    def getInitSteps(self):
+        """
+        Gets the value of `initSteps`
+        """
+        return self.getOrDefault(self.initSteps)
+
+
+class KMeansModel(JavaModel, _KMeansParams, GeneralJavaMLWritable, JavaMLReadable,
+                  HasTrainingSummary):
     """
     Model fitted by KMeans.
 
@@ -333,10 +407,16 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
+    @since("3.0.0")
+    def predict(self, value):
+        """
+        Predict label for the given features.
+        """
+        return self._call_java("predict", value)
+
 
 @inherit_doc
-class KMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol, HasMaxIter,
-             HasTol, HasSeed, JavaMLWritable, JavaMLReadable):
+class KMeans(JavaEstimator, _KMeansParams, JavaMLWritable, JavaMLReadable):
     """
     K-means clustering with a k-means++ like initialization mode
     (the k-means|| algorithm by Bahmani et al).
@@ -347,14 +427,20 @@ class KMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol
     >>> df = spark.createDataFrame(data, ["features"])
     >>> kmeans = KMeans(k=2, seed=1)
     >>> model = kmeans.fit(df)
+    >>> model.getDistanceMeasure()
+    'euclidean'
+    >>> model.setPredictionCol("newPrediction")
+    KMeans...
+    >>> model.predict(df.head().features)
+    0
     >>> centers = model.clusterCenters()
     >>> len(centers)
     2
-    >>> transformed = model.transform(df).select("features", "prediction")
+    >>> transformed = model.transform(df).select("features", "newPrediction")
     >>> rows = transformed.collect()
-    >>> rows[0].prediction == rows[1].prediction
+    >>> rows[0].newPrediction == rows[1].newPrediction
     True
-    >>> rows[2].prediction == rows[3].prediction
+    >>> rows[2].newPrediction == rows[3].newPrediction
     True
     >>> model.hasSummary
     True
@@ -383,16 +469,6 @@ class KMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol
     .. versionadded:: 1.5.0
     """
 
-    k = Param(Params._dummy(), "k", "The number of clusters to create. Must be > 1.",
-              typeConverter=TypeConverters.toInt)
-    initMode = Param(Params._dummy(), "initMode",
-                     "The initialization algorithm. This can be either \"random\" to " +
-                     "choose random points as initial cluster centers, or \"k-means||\" " +
-                     "to use a parallel variant of k-means++",
-                     typeConverter=TypeConverters.toString)
-    initSteps = Param(Params._dummy(), "initSteps", "The number of steps for k-means|| " +
-                      "initialization mode. Must be > 0.", typeConverter=TypeConverters.toInt)
-
     @keyword_only
     def __init__(self, featuresCol="features", predictionCol="prediction", k=2,
                  initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None,
@@ -434,13 +510,6 @@ def setK(self, value):
         """
         return self._set(k=value)
 
-    @since("1.5.0")
-    def getK(self):
-        """
-        Gets the value of `k`
-        """
-        return self.getOrDefault(self.k)
-
     @since("1.5.0")
     def setInitMode(self, value):
         """
@@ -448,13 +517,6 @@ def setInitMode(self, value):
         """
         return self._set(initMode=value)
 
-    @since("1.5.0")
-    def getInitMode(self):
-        """
-        Gets the value of `initMode`
-        """
-        return self.getOrDefault(self.initMode)
-
     @since("1.5.0")
     def setInitSteps(self, value):
         """
@@ -462,13 +524,6 @@ def setInitSteps(self, value):
         """
         return self._set(initSteps=value)
 
-    @since("1.5.0")
-    def getInitSteps(self):
-        """
-        Gets the value of `initSteps`
-        """
-        return self.getOrDefault(self.initSteps)
-
     @since("2.4.0")
     def setDistanceMeasure(self, value):
         """
@@ -476,15 +531,40 @@ def setDistanceMeasure(self, value):
         """
         return self._set(distanceMeasure=value)
 
-    @since("2.4.0")
-    def getDistanceMeasure(self):
+
+@inherit_doc
+class _BisectingKMeansParams(HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol,
+                             HasDistanceMeasure):
+    """
+    Params for :py:class:`BisectingKMeans` and :py:class:`BisectingKMeansModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "The desired number of leaf clusters. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    minDivisibleClusterSize = Param(Params._dummy(), "minDivisibleClusterSize",
+                                    "The minimum number of points (if >= 1.0) or the minimum " +
+                                    "proportion of points (if < 1.0) of a divisible cluster.",
+                                    typeConverter=TypeConverters.toFloat)
+
+    @since("2.0.0")
+    def getK(self):
         """
-        Gets the value of `distanceMeasure`
+        Gets the value of `k` or its default value.
         """
-        return self.getOrDefault(self.distanceMeasure)
+        return self.getOrDefault(self.k)
+
+    @since("2.0.0")
+    def getMinDivisibleClusterSize(self):
+        """
+        Gets the value of `minDivisibleClusterSize` or its default value.
+        """
+        return self.getOrDefault(self.minDivisibleClusterSize)
 
 
-class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable, HasTrainingSummary):
+class BisectingKMeansModel(JavaModel, _BisectingKMeansParams, JavaMLWritable, JavaMLReadable,
+                           HasTrainingSummary):
     """
     Model fitted by BisectingKMeans.
 
@@ -524,10 +604,16 @@ def summary(self):
             raise RuntimeError("No training summary available for this %s" %
                                self.__class__.__name__)
 
+    @since("3.0.0")
+    def predict(self, value):
+        """
+        Predict label for the given features.
+        """
+        return self._call_java("predict", value)
+
 
 @inherit_doc
-class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPredictionCol,
-                      HasMaxIter, HasSeed, JavaMLWritable, JavaMLReadable):
+class BisectingKMeans(JavaEstimator, _BisectingKMeansParams, JavaMLWritable, JavaMLReadable):
     """
     A bisecting k-means algorithm based on the paper "A comparison of document clustering
     techniques" by Steinbach, Karypis, and Kumar, with modification to fit Spark.
@@ -544,6 +630,12 @@ class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPred
     >>> df = spark.createDataFrame(data, ["features"])
     >>> bkm = BisectingKMeans(k=2, minDivisibleClusterSize=1.0)
     >>> model = bkm.fit(df)
+    >>> model.getMaxIter()
+    20
+    >>> model.setPredictionCol("newPrediction")
+    BisectingKMeans...
+    >>> model.predict(df.head().features)
+    0
     >>> centers = model.clusterCenters()
     >>> len(centers)
     2
@@ -558,11 +650,11 @@ class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPred
     [2, 2]
     >>> summary.trainingCost
     2.000...
-    >>> transformed = model.transform(df).select("features", "prediction")
+    >>> transformed = model.transform(df).select("features", "newPrediction")
     >>> rows = transformed.collect()
-    >>> rows[0].prediction == rows[1].prediction
+    >>> rows[0].newPrediction == rows[1].newPrediction
     True
-    >>> rows[2].prediction == rows[3].prediction
+    >>> rows[2].newPrediction == rows[3].newPrediction
     True
     >>> bkm_path = temp_path + "/bkm"
     >>> bkm.save(bkm_path)
@@ -584,13 +676,6 @@ class BisectingKMeans(JavaEstimator, HasDistanceMeasure, HasFeaturesCol, HasPred
     .. versionadded:: 2.0.0
     """
 
-    k = Param(Params._dummy(), "k", "The desired number of leaf clusters. Must be > 1.",
-              typeConverter=TypeConverters.toInt)
-    minDivisibleClusterSize = Param(Params._dummy(), "minDivisibleClusterSize",
-                                    "The minimum number of points (if >= 1.0) or the minimum " +
-                                    "proportion of points (if < 1.0) of a divisible cluster.",
-                                    typeConverter=TypeConverters.toFloat)
-
     @keyword_only
     def __init__(self, featuresCol="features", predictionCol="prediction", maxIter=20,
                  seed=None, k=4, minDivisibleClusterSize=1.0, distanceMeasure="euclidean"):
@@ -624,13 +709,6 @@ def setK(self, value):
         """
         return self._set(k=value)
 
-    @since("2.0.0")
-    def getK(self):
-        """
-        Gets the value of `k` or its default value.
-        """
-        return self.getOrDefault(self.k)
-
     @since("2.0.0")
     def setMinDivisibleClusterSize(self, value):
         """
@@ -638,13 +716,6 @@ def setMinDivisibleClusterSize(self, value):
         """
         return self._set(minDivisibleClusterSize=value)
 
-    @since("2.0.0")
-    def getMinDivisibleClusterSize(self):
-        """
-        Gets the value of `minDivisibleClusterSize` or its default value.
-        """
-        return self.getOrDefault(self.minDivisibleClusterSize)
-
     @since("2.4.0")
     def setDistanceMeasure(self, value):
         """
@@ -652,13 +723,6 @@ def setDistanceMeasure(self, value):
         """
         return self._set(distanceMeasure=value)
 
-    @since("2.4.0")
-    def getDistanceMeasure(self):
-        """
-        Gets the value of `distanceMeasure` or its default value.
-        """
-        return self.getOrDefault(self.distanceMeasure)
-
     def _create_model(self, java_model):
         return BisectingKMeansModel(java_model)
 
@@ -681,7 +745,126 @@ def trainingCost(self):
 
 
 @inherit_doc
-class LDAModel(JavaModel):
+class _LDAParams(HasMaxIter, HasFeaturesCol, HasSeed, HasCheckpointInterval):
+    """
+    Params for :py:class:`LDA` and :py:class:`LDAModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k", "The number of topics (clusters) to infer. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    optimizer = Param(Params._dummy(), "optimizer",
+                      "Optimizer or inference algorithm used to estimate the LDA model.  "
+                      "Supported: online, em", typeConverter=TypeConverters.toString)
+    learningOffset = Param(Params._dummy(), "learningOffset",
+                           "A (positive) learning parameter that downweights early iterations."
+                           " Larger values make early iterations count less",
+                           typeConverter=TypeConverters.toFloat)
+    learningDecay = Param(Params._dummy(), "learningDecay", "Learning rate, set as an"
+                          "exponential decay rate. This should be between (0.5, 1.0] to "
+                          "guarantee asymptotic convergence.", typeConverter=TypeConverters.toFloat)
+    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
+                            "Fraction of the corpus to be sampled and used in each iteration "
+                            "of mini-batch gradient descent, in range (0, 1].",
+                            typeConverter=TypeConverters.toFloat)
+    optimizeDocConcentration = Param(Params._dummy(), "optimizeDocConcentration",
+                                     "Indicates whether the docConcentration (Dirichlet parameter "
+                                     "for document-topic distribution) will be optimized during "
+                                     "training.", typeConverter=TypeConverters.toBoolean)
+    docConcentration = Param(Params._dummy(), "docConcentration",
+                             "Concentration parameter (commonly named \"alpha\") for the "
+                             "prior placed on documents' distributions over topics (\"theta\").",
+                             typeConverter=TypeConverters.toListFloat)
+    topicConcentration = Param(Params._dummy(), "topicConcentration",
+                               "Concentration parameter (commonly named \"beta\" or \"eta\") for "
+                               "the prior placed on topic' distributions over terms.",
+                               typeConverter=TypeConverters.toFloat)
+    topicDistributionCol = Param(Params._dummy(), "topicDistributionCol",
+                                 "Output column with estimates of the topic mixture distribution "
+                                 "for each document (often called \"theta\" in the literature). "
+                                 "Returns a vector of zeros for an empty document.",
+                                 typeConverter=TypeConverters.toString)
+    keepLastCheckpoint = Param(Params._dummy(), "keepLastCheckpoint",
+                               "(For EM optimizer) If using checkpointing, this indicates whether"
+                               " to keep the last checkpoint. If false, then the checkpoint will be"
+                               " deleted. Deleting the checkpoint can cause failures if a data"
+                               " partition is lost, so set this bit with care.",
+                               TypeConverters.toBoolean)
+
+    @since("2.0.0")
+    def getK(self):
+        """
+        Gets the value of :py:attr:`k` or its default value.
+        """
+        return self.getOrDefault(self.k)
+
+    @since("2.0.0")
+    def getOptimizer(self):
+        """
+        Gets the value of :py:attr:`optimizer` or its default value.
+        """
+        return self.getOrDefault(self.optimizer)
+
+    @since("2.0.0")
+    def getLearningOffset(self):
+        """
+        Gets the value of :py:attr:`learningOffset` or its default value.
+        """
+        return self.getOrDefault(self.learningOffset)
+
+    @since("2.0.0")
+    def getLearningDecay(self):
+        """
+        Gets the value of :py:attr:`learningDecay` or its default value.
+        """
+        return self.getOrDefault(self.learningDecay)
+
+    @since("2.0.0")
+    def getSubsamplingRate(self):
+        """
+        Gets the value of :py:attr:`subsamplingRate` or its default value.
+        """
+        return self.getOrDefault(self.subsamplingRate)
+
+    @since("2.0.0")
+    def getOptimizeDocConcentration(self):
+        """
+        Gets the value of :py:attr:`optimizeDocConcentration` or its default value.
+        """
+        return self.getOrDefault(self.optimizeDocConcentration)
+
+    @since("2.0.0")
+    def getDocConcentration(self):
+        """
+        Gets the value of :py:attr:`docConcentration` or its default value.
+        """
+        return self.getOrDefault(self.docConcentration)
+
+    @since("2.0.0")
+    def getTopicConcentration(self):
+        """
+        Gets the value of :py:attr:`topicConcentration` or its default value.
+        """
+        return self.getOrDefault(self.topicConcentration)
+
+    @since("2.0.0")
+    def getTopicDistributionCol(self):
+        """
+        Gets the value of :py:attr:`topicDistributionCol` or its default value.
+        """
+        return self.getOrDefault(self.topicDistributionCol)
+
+    @since("2.0.0")
+    def getKeepLastCheckpoint(self):
+        """
+        Gets the value of :py:attr:`keepLastCheckpoint` or its default value.
+        """
+        return self.getOrDefault(self.keepLastCheckpoint)
+
+
+@inherit_doc
+class LDAModel(JavaModel, _LDAParams):
     """
     Latent Dirichlet Allocation (LDA) model.
     This abstraction permits for different underlying representations,
@@ -836,8 +1019,7 @@ class LocalLDAModel(LDAModel, JavaMLReadable, JavaMLWritable):
 
 
 @inherit_doc
-class LDA(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInterval,
-          JavaMLReadable, JavaMLWritable):
+class LDA(JavaEstimator, _LDAParams, JavaMLReadable, JavaMLWritable):
     """
     Latent Dirichlet Allocation (LDA), a topic model designed for text documents.
 
@@ -864,6 +1046,8 @@ class LDA(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInter
     ...      [2, SparseVector(2, {0: 1.0})],], ["id", "features"])
     >>> lda = LDA(k=2, seed=1, optimizer="em")
     >>> model = lda.fit(df)
+    >>> model.getTopicDistributionCol()
+    'topicDistribution'
     >>> model.isDistributed()
     True
     >>> localModel = model.toLocal()
@@ -894,46 +1078,6 @@ class LDA(JavaEstimator, HasFeaturesCol, HasMaxIter, HasSeed, HasCheckpointInter
     .. versionadded:: 2.0.0
     """
 
-    k = Param(Params._dummy(), "k", "The number of topics (clusters) to infer. Must be > 1.",
-              typeConverter=TypeConverters.toInt)
-    optimizer = Param(Params._dummy(), "optimizer",
-                      "Optimizer or inference algorithm used to estimate the LDA model.  "
-                      "Supported: online, em", typeConverter=TypeConverters.toString)
-    learningOffset = Param(Params._dummy(), "learningOffset",
-                           "A (positive) learning parameter that downweights early iterations."
-                           " Larger values make early iterations count less",
-                           typeConverter=TypeConverters.toFloat)
-    learningDecay = Param(Params._dummy(), "learningDecay", "Learning rate, set as an"
-                          "exponential decay rate. This should be between (0.5, 1.0] to "
-                          "guarantee asymptotic convergence.", typeConverter=TypeConverters.toFloat)
-    subsamplingRate = Param(Params._dummy(), "subsamplingRate",
-                            "Fraction of the corpus to be sampled and used in each iteration "
-                            "of mini-batch gradient descent, in range (0, 1].",
-                            typeConverter=TypeConverters.toFloat)
-    optimizeDocConcentration = Param(Params._dummy(), "optimizeDocConcentration",
-                                     "Indicates whether the docConcentration (Dirichlet parameter "
-                                     "for document-topic distribution) will be optimized during "
-                                     "training.", typeConverter=TypeConverters.toBoolean)
-    docConcentration = Param(Params._dummy(), "docConcentration",
-                             "Concentration parameter (commonly named \"alpha\") for the "
-                             "prior placed on documents' distributions over topics (\"theta\").",
-                             typeConverter=TypeConverters.toListFloat)
-    topicConcentration = Param(Params._dummy(), "topicConcentration",
-                               "Concentration parameter (commonly named \"beta\" or \"eta\") for "
-                               "the prior placed on topic' distributions over terms.",
-                               typeConverter=TypeConverters.toFloat)
-    topicDistributionCol = Param(Params._dummy(), "topicDistributionCol",
-                                 "Output column with estimates of the topic mixture distribution "
-                                 "for each document (often called \"theta\" in the literature). "
-                                 "Returns a vector of zeros for an empty document.",
-                                 typeConverter=TypeConverters.toString)
-    keepLastCheckpoint = Param(Params._dummy(), "keepLastCheckpoint",
-                               "(For EM optimizer) If using checkpointing, this indicates whether"
-                               " to keep the last checkpoint. If false, then the checkpoint will be"
-                               " deleted. Deleting the checkpoint can cause failures if a data"
-                               " partition is lost, so set this bit with care.",
-                               TypeConverters.toBoolean)
-
     @keyword_only
     def __init__(self, featuresCol="features", maxIter=20, seed=None, checkpointInterval=10,
                  k=10, optimizer="online", learningOffset=1024.0, learningDecay=0.51,
@@ -992,13 +1136,6 @@ def setK(self, value):
         """
         return self._set(k=value)
 
-    @since("2.0.0")
-    def getK(self):
-        """
-        Gets the value of :py:attr:`k` or its default value.
-        """
-        return self.getOrDefault(self.k)
-
     @since("2.0.0")
     def setOptimizer(self, value):
         """
@@ -1011,13 +1148,6 @@ def setOptimizer(self, value):
         """
         return self._set(optimizer=value)
 
-    @since("2.0.0")
-    def getOptimizer(self):
-        """
-        Gets the value of :py:attr:`optimizer` or its default value.
-        """
-        return self.getOrDefault(self.optimizer)
-
     @since("2.0.0")
     def setLearningOffset(self, value):
         """
@@ -1029,13 +1159,6 @@ def setLearningOffset(self, value):
         """
         return self._set(learningOffset=value)
 
-    @since("2.0.0")
-    def getLearningOffset(self):
-        """
-        Gets the value of :py:attr:`learningOffset` or its default value.
-        """
-        return self.getOrDefault(self.learningOffset)
-
     @since("2.0.0")
     def setLearningDecay(self, value):
         """
@@ -1047,13 +1170,6 @@ def setLearningDecay(self, value):
         """
         return self._set(learningDecay=value)
 
-    @since("2.0.0")
-    def getLearningDecay(self):
-        """
-        Gets the value of :py:attr:`learningDecay` or its default value.
-        """
-        return self.getOrDefault(self.learningDecay)
-
     @since("2.0.0")
     def setSubsamplingRate(self, value):
         """
@@ -1065,13 +1181,6 @@ def setSubsamplingRate(self, value):
         """
         return self._set(subsamplingRate=value)
 
-    @since("2.0.0")
-    def getSubsamplingRate(self):
-        """
-        Gets the value of :py:attr:`subsamplingRate` or its default value.
-        """
-        return self.getOrDefault(self.subsamplingRate)
-
     @since("2.0.0")
     def setOptimizeDocConcentration(self, value):
         """
@@ -1083,13 +1192,6 @@ def setOptimizeDocConcentration(self, value):
         """
         return self._set(optimizeDocConcentration=value)
 
-    @since("2.0.0")
-    def getOptimizeDocConcentration(self):
-        """
-        Gets the value of :py:attr:`optimizeDocConcentration` or its default value.
-        """
-        return self.getOrDefault(self.optimizeDocConcentration)
-
     @since("2.0.0")
     def setDocConcentration(self, value):
         """
@@ -1101,13 +1203,6 @@ def setDocConcentration(self, value):
         """
         return self._set(docConcentration=value)
 
-    @since("2.0.0")
-    def getDocConcentration(self):
-        """
-        Gets the value of :py:attr:`docConcentration` or its default value.
-        """
-        return self.getOrDefault(self.docConcentration)
-
     @since("2.0.0")
     def setTopicConcentration(self, value):
         """
@@ -1119,13 +1214,6 @@ def setTopicConcentration(self, value):
         """
         return self._set(topicConcentration=value)
 
-    @since("2.0.0")
-    def getTopicConcentration(self):
-        """
-        Gets the value of :py:attr:`topicConcentration` or its default value.
-        """
-        return self.getOrDefault(self.topicConcentration)
-
     @since("2.0.0")
     def setTopicDistributionCol(self, value):
         """
@@ -1137,13 +1225,6 @@ def setTopicDistributionCol(self, value):
         """
         return self._set(topicDistributionCol=value)
 
-    @since("2.0.0")
-    def getTopicDistributionCol(self):
-        """
-        Gets the value of :py:attr:`topicDistributionCol` or its default value.
-        """
-        return self.getOrDefault(self.topicDistributionCol)
-
     @since("2.0.0")
     def setKeepLastCheckpoint(self, value):
         """
@@ -1155,16 +1236,62 @@ def setKeepLastCheckpoint(self, value):
         """
         return self._set(keepLastCheckpoint=value)
 
-    @since("2.0.0")
-    def getKeepLastCheckpoint(self):
+
+@inherit_doc
+class _PowerIterationClusteringParams(HasMaxIter, HasWeightCol):
+    """
+    Params for :py:class:`PowerIterationClustering`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    k = Param(Params._dummy(), "k",
+              "The number of clusters to create. Must be > 1.",
+              typeConverter=TypeConverters.toInt)
+    initMode = Param(Params._dummy(), "initMode",
+                     "The initialization algorithm. This can be either " +
+                     "'random' to use a random vector as vertex properties, or 'degree' to use " +
+                     "a normalized sum of similarities with other vertices.  Supported options: " +
+                     "'random' and 'degree'.",
+                     typeConverter=TypeConverters.toString)
+    srcCol = Param(Params._dummy(), "srcCol",
+                   "Name of the input column for source vertex IDs.",
+                   typeConverter=TypeConverters.toString)
+    dstCol = Param(Params._dummy(), "dstCol",
+                   "Name of the input column for destination vertex IDs.",
+                   typeConverter=TypeConverters.toString)
+
+    @since("2.4.0")
+    def getK(self):
         """
-        Gets the value of :py:attr:`keepLastCheckpoint` or its default value.
+        Gets the value of :py:attr:`k` or its default value.
         """
-        return self.getOrDefault(self.keepLastCheckpoint)
+        return self.getOrDefault(self.k)
+
+    @since("2.4.0")
+    def getInitMode(self):
+        """
+        Gets the value of :py:attr:`initMode` or its default value.
+        """
+        return self.getOrDefault(self.initMode)
+
+    @since("2.4.0")
+    def getSrcCol(self):
+        """
+        Gets the value of :py:attr:`srcCol` or its default value.
+        """
+        return self.getOrDefault(self.srcCol)
+
+    @since("2.4.0")
+    def getDstCol(self):
+        """
+        Gets the value of :py:attr:`dstCol` or its default value.
+        """
+        return self.getOrDefault(self.dstCol)
 
 
 @inherit_doc
-class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReadable,
+class PowerIterationClustering(_PowerIterationClusteringParams, JavaParams, JavaMLReadable,
                                JavaMLWritable):
     """
     Power Iteration Clustering (PIC), a scalable graph clustering algorithm developed by
@@ -1184,7 +1311,9 @@ class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReada
     ...         (4, 0, 0.5), (4, 1, 0.7), (4, 2, 0.9), (4, 3, 1.1),
     ...         (5, 0, 0.5), (5, 1, 0.7), (5, 2, 0.9), (5, 3, 1.1), (5, 4, 1.3)]
     >>> df = spark.createDataFrame(data).toDF("src", "dst", "weight").repartition(1)
-    >>> pic = PowerIterationClustering(k=2, maxIter=40, weightCol="weight")
+    >>> pic = PowerIterationClustering(k=2, weightCol="weight")
+    >>> pic.setMaxIter(40)
+    PowerIterationClustering...
     >>> assignments = pic.assignClusters(df)
     >>> assignments.sort(assignments.id).show(truncate=False)
     +---+-------+
@@ -1209,22 +1338,6 @@ class PowerIterationClustering(HasMaxIter, HasWeightCol, JavaParams, JavaMLReada
     .. versionadded:: 2.4.0
     """
 
-    k = Param(Params._dummy(), "k",
-              "The number of clusters to create. Must be > 1.",
-              typeConverter=TypeConverters.toInt)
-    initMode = Param(Params._dummy(), "initMode",
-                     "The initialization algorithm. This can be either " +
-                     "'random' to use a random vector as vertex properties, or 'degree' to use " +
-                     "a normalized sum of similarities with other vertices.  Supported options: " +
-                     "'random' and 'degree'.",
-                     typeConverter=TypeConverters.toString)
-    srcCol = Param(Params._dummy(), "srcCol",
-                   "Name of the input column for source vertex IDs.",
-                   typeConverter=TypeConverters.toString)
-    dstCol = Param(Params._dummy(), "dstCol",
-                   "Name of the input column for destination vertex IDs.",
-                   typeConverter=TypeConverters.toString)
-
     @keyword_only
     def __init__(self, k=2, maxIter=20, initMode="random", srcCol="src", dstCol="dst",
                  weightCol=None):
@@ -1258,13 +1371,6 @@ def setK(self, value):
         """
         return self._set(k=value)
 
-    @since("2.4.0")
-    def getK(self):
-        """
-        Gets the value of :py:attr:`k` or its default value.
-        """
-        return self.getOrDefault(self.k)
-
     @since("2.4.0")
     def setInitMode(self, value):
         """
@@ -1272,13 +1378,6 @@ def setInitMode(self, value):
         """
         return self._set(initMode=value)
 
-    @since("2.4.0")
-    def getInitMode(self):
-        """
-        Gets the value of :py:attr:`initMode` or its default value.
-        """
-        return self.getOrDefault(self.initMode)
-
     @since("2.4.0")
     def setSrcCol(self, value):
         """
@@ -1286,13 +1385,6 @@ def setSrcCol(self, value):
         """
         return self._set(srcCol=value)
 
-    @since("2.4.0")
-    def getSrcCol(self):
-        """
-        Gets the value of :py:attr:`srcCol` or its default value.
-        """
-        return self.getOrDefault(self.srcCol)
-
     @since("2.4.0")
     def setDstCol(self, value):
         """
@@ -1300,13 +1392,6 @@ def setDstCol(self, value):
         """
         return self._set(dstCol=value)
 
-    @since("2.4.0")
-    def getDstCol(self):
-        """
-        Gets the value of :py:attr:`dstCol` or its default value.
-        """
-        return self.getOrDefault(self.dstCol)
-
     @since("2.4.0")
     def assignClusters(self, dataset):
         """
diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py
index d96cdd594a3f3..6917cce615aa4 100644
--- a/python/pyspark/ml/evaluation.py
+++ b/python/pyspark/ml/evaluation.py
@@ -139,6 +139,8 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
     0.70...
     >>> evaluator.evaluate(dataset, {evaluator.metricName: "areaUnderPR"})
     0.82...
+    >>> evaluator.getNumBins()
+    1000
 
     .. versionadded:: 1.4.0
     """
@@ -147,17 +149,22 @@ class BinaryClassificationEvaluator(JavaEvaluator, HasLabelCol, HasRawPrediction
                        "metric name in evaluation (areaUnderROC|areaUnderPR)",
                        typeConverter=TypeConverters.toString)
 
+    numBins = Param(Params._dummy(), "numBins", "Number of bins to down-sample the curves "
+                    "(ROC curve, PR curve) in area computation. If 0, no down-sampling will "
+                    "occur. Must be >= 0.",
+                    typeConverter=TypeConverters.toInt)
+
     @keyword_only
     def __init__(self, rawPredictionCol="rawPrediction", labelCol="label",
-                 metricName="areaUnderROC", weightCol=None):
+                 metricName="areaUnderROC", weightCol=None, numBins=1000):
         """
         __init__(self, rawPredictionCol="rawPrediction", labelCol="label", \
-                 metricName="areaUnderROC", weightCol=None)
+                 metricName="areaUnderROC", weightCol=None, numBins=1000)
         """
         super(BinaryClassificationEvaluator, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.evaluation.BinaryClassificationEvaluator", self.uid)
-        self._setDefault(metricName="areaUnderROC")
+        self._setDefault(metricName="areaUnderROC", numBins=1000)
         kwargs = self._input_kwargs
         self._set(**kwargs)
 
@@ -175,13 +182,27 @@ def getMetricName(self):
         """
         return self.getOrDefault(self.metricName)
 
+    @since("3.0.0")
+    def setNumBins(self, value):
+        """
+        Sets the value of :py:attr:`numBins`.
+        """
+        return self._set(numBins=value)
+
+    @since("3.0.0")
+    def getNumBins(self):
+        """
+        Gets the value of numBins or its default value.
+        """
+        return self.getOrDefault(self.numBins)
+
     @keyword_only
     @since("1.4.0")
     def setParams(self, rawPredictionCol="rawPrediction", labelCol="label",
-                  metricName="areaUnderROC", weightCol=None):
+                  metricName="areaUnderROC", weightCol=None, numBins=1000):
         """
         setParams(self, rawPredictionCol="rawPrediction", labelCol="label", \
-                  metricName="areaUnderROC", weightCol=None)
+                  metricName="areaUnderROC", weightCol=None, numBins=1000)
         Sets params for binary classification evaluator.
         """
         kwargs = self._input_kwargs
@@ -218,6 +239,8 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeigh
     >>> evaluator = RegressionEvaluator(predictionCol="raw", weightCol="weight")
     >>> evaluator.evaluate(dataset)
     2.740...
+    >>> evaluator.getThroughOrigin()
+    False
 
     .. versionadded:: 1.4.0
     """
@@ -226,20 +249,25 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol, HasWeigh
                        rmse - root mean squared error (default)
                        mse - mean squared error
                        r2 - r^2 metric
-                       mae - mean absolute error.""",
+                       mae - mean absolute error
+                       var - explained variance.""",
                        typeConverter=TypeConverters.toString)
 
+    throughOrigin = Param(Params._dummy(), "throughOrigin",
+                          "whether the regression is through the origin.",
+                          typeConverter=TypeConverters.toBoolean)
+
     @keyword_only
     def __init__(self, predictionCol="prediction", labelCol="label",
-                 metricName="rmse", weightCol=None):
+                 metricName="rmse", weightCol=None, throughOrigin=False):
         """
         __init__(self, predictionCol="prediction", labelCol="label", \
-                 metricName="rmse", weightCol=None)
+                 metricName="rmse", weightCol=None, throughOrigin=False)
         """
         super(RegressionEvaluator, self).__init__()
         self._java_obj = self._new_java_obj(
             "org.apache.spark.ml.evaluation.RegressionEvaluator", self.uid)
-        self._setDefault(metricName="rmse")
+        self._setDefault(metricName="rmse", throughOrigin=False)
         kwargs = self._input_kwargs
         self._set(**kwargs)
 
@@ -257,13 +285,27 @@ def getMetricName(self):
         """
         return self.getOrDefault(self.metricName)
 
+    @since("3.0.0")
+    def setThroughOrigin(self, value):
+        """
+        Sets the value of :py:attr:`throughOrigin`.
+        """
+        return self._set(throughOrigin=value)
+
+    @since("3.0.0")
+    def getThroughOrigin(self):
+        """
+        Gets the value of throughOrigin or its default value.
+        """
+        return self.getOrDefault(self.throughOrigin)
+
     @keyword_only
     @since("1.4.0")
     def setParams(self, predictionCol="prediction", labelCol="label",
-                  metricName="rmse", weightCol=None):
+                  metricName="rmse", weightCol=None, throughOrigin=False):
         """
         setParams(self, predictionCol="prediction", labelCol="label", \
-                  metricName="rmse", weightCol=None)
+                  metricName="rmse", weightCol=None, throughOrigin=False)
         Sets params for regression evaluator.
         """
         kwargs = self._input_kwargs
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 9ab4e4d68691d..94bdd24f0f887 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -327,26 +327,34 @@ class BucketedRandomProjectionLSHModel(LSHModel, JavaMLReadable, JavaMLWritable)
 
 
 @inherit_doc
-class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasHandleInvalid,
-                 JavaMLReadable, JavaMLWritable):
+class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasInputCols, HasOutputCols,
+                 HasHandleInvalid, JavaMLReadable, JavaMLWritable):
     """
-    Maps a column of continuous features to a column of feature buckets.
-
-    >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),), (float("nan"),)]
-    >>> df = spark.createDataFrame(values, ["values"])
+    Maps a column of continuous features to a column of feature buckets. Since 3.0.0,
+    :py:class:`Bucketizer` can map multiple columns at once by setting the :py:attr:`inputCols`
+    parameter. Note that when both the :py:attr:`inputCol` and :py:attr:`inputCols` parameters
+    are set, an Exception will be thrown. The :py:attr:`splits` parameter is only used for single
+    column usage, and :py:attr:`splitsArray` is for multiple columns.
+
+    >>> values = [(0.1, 0.0), (0.4, 1.0), (1.2, 1.3), (1.5, float("nan")),
+    ...     (float("nan"), 1.0), (float("nan"), 0.0)]
+    >>> df = spark.createDataFrame(values, ["values1", "values2"])
     >>> bucketizer = Bucketizer(splits=[-float("inf"), 0.5, 1.4, float("inf")],
-    ...     inputCol="values", outputCol="buckets")
+    ...     inputCol="values1", outputCol="buckets")
     >>> bucketed = bucketizer.setHandleInvalid("keep").transform(df).collect()
-    >>> len(bucketed)
-    6
-    >>> bucketed[0].buckets
-    0.0
-    >>> bucketed[1].buckets
-    0.0
-    >>> bucketed[2].buckets
-    1.0
-    >>> bucketed[3].buckets
-    2.0
+    >>> bucketed = bucketizer.setHandleInvalid("keep").transform(df.select("values1"))
+    >>> bucketed.show(truncate=False)
+    +-------+-------+
+    |values1|buckets|
+    +-------+-------+
+    |0.1    |0.0    |
+    |0.4    |0.0    |
+    |1.2    |1.0    |
+    |1.5    |2.0    |
+    |NaN    |3.0    |
+    |NaN    |3.0    |
+    +-------+-------+
+    ...
     >>> bucketizer.setParams(outputCol="b").transform(df).head().b
     0.0
     >>> bucketizerPath = temp_path + "/bucketizer"
@@ -357,6 +365,22 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasHandleInvalid,
     >>> bucketed = bucketizer.setHandleInvalid("skip").transform(df).collect()
     >>> len(bucketed)
     4
+    >>> bucketizer2 = Bucketizer(splitsArray=
+    ...     [[-float("inf"), 0.5, 1.4, float("inf")], [-float("inf"), 0.5, float("inf")]],
+    ...     inputCols=["values1", "values2"], outputCols=["buckets1", "buckets2"])
+    >>> bucketed2 = bucketizer2.setHandleInvalid("keep").transform(df)
+    >>> bucketed2.show(truncate=False)
+    +-------+-------+--------+--------+
+    |values1|values2|buckets1|buckets2|
+    +-------+-------+--------+--------+
+    |0.1    |0.0    |0.0     |0.0     |
+    |0.4    |1.0    |0.0     |1.0     |
+    |1.2    |1.3    |1.0     |1.0     |
+    |1.5    |NaN    |2.0     |2.0     |
+    |NaN    |1.0    |3.0     |1.0     |
+    |NaN    |0.0    |3.0     |0.0     |
+    +-------+-------+--------+--------+
+    ...
 
     .. versionadded:: 1.4.0
     """
@@ -374,14 +398,30 @@ class Bucketizer(JavaTransformer, HasInputCol, HasOutputCol, HasHandleInvalid,
     handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries "
                           "containing NaN values. Values outside the splits will always be treated "
                           "as errors. Options are 'skip' (filter out rows with invalid values), " +
-                          "'error' (throw an error), or 'keep' (keep invalid values in a special " +
-                          "additional bucket).",
+                          "'error' (throw an error), or 'keep' (keep invalid values in a " +
+                          "special additional bucket). Note that in the multiple column " +
+                          "case, the invalid handling is applied to all columns. That said " +
+                          "for 'error' it will throw an error if any invalids are found in " +
+                          "any column, for 'skip' it will skip rows with any invalids in " +
+                          "any columns, etc.",
                           typeConverter=TypeConverters.toString)
 
+    splitsArray = Param(Params._dummy(), "splitsArray", "The array of split points for mapping " +
+                        "continuous features into buckets for multiple columns. For each input " +
+                        "column, with n+1 splits, there are n buckets. A bucket defined by " +
+                        "splits x,y holds values in the range [x,y) except the last bucket, " +
+                        "which also includes y. The splits should be of length >= 3 and " +
+                        "strictly increasing. Values at -inf, inf must be explicitly provided " +
+                        "to cover all Double values; otherwise, values outside the splits " +
+                        "specified will be treated as errors.",
+                        typeConverter=TypeConverters.toListListFloat)
+
     @keyword_only
-    def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error"):
+    def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error",
+                 splitsArray=None, inputCols=None, outputCols=None):
         """
-        __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error")
+        __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \
+                 splitsArray=None, inputCols=None, outputCols=None)
         """
         super(Bucketizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.Bucketizer", self.uid)
@@ -391,9 +431,11 @@ def __init__(self, splits=None, inputCol=None, outputCol=None, handleInvalid="er
 
     @keyword_only
     @since("1.4.0")
-    def setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error"):
+    def setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error",
+                  splitsArray=None, inputCols=None, outputCols=None):
         """
-        setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error")
+        setParams(self, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \
+                  splitsArray=None, inputCols=None, outputCols=None)
         Sets params for this Bucketizer.
         """
         kwargs = self._input_kwargs
@@ -413,6 +455,20 @@ def getSplits(self):
         """
         return self.getOrDefault(self.splits)
 
+    @since("3.0.0")
+    def setSplitsArray(self, value):
+        """
+        Sets the value of :py:attr:`splitsArray`.
+        """
+        return self._set(splitsArray=value)
+
+    @since("3.0.0")
+    def getSplitsArray(self):
+        """
+        Gets the array of split points or its default value.
+        """
+        return self.getOrDefault(self.splitsArray)
+
 
 class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol):
     """
@@ -1903,17 +1959,22 @@ def getDegree(self):
 
 
 @inherit_doc
-class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInvalid,
-                          JavaMLReadable, JavaMLWritable):
+class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasInputCols, HasOutputCols,
+                          HasHandleInvalid, JavaMLReadable, JavaMLWritable):
     """
-    `QuantileDiscretizer` takes a column with continuous features and outputs a column with binned
-    categorical features. The number of bins can be set using the :py:attr:`numBuckets` parameter.
-    It is possible that the number of buckets used will be less than this value, for example, if
-    there are too few distinct values of the input to create enough distinct quantiles.
+    :py:class:`QuantileDiscretizer` takes a column with continuous features and outputs a column
+    with binned categorical features. The number of bins can be set using the :py:attr:`numBuckets`
+    parameter. It is possible that the number of buckets used will be less than this value, for
+    example, if there are too few distinct values of the input to create enough distinct quantiles.
+    Since 3.0.0, :py:class:`QuantileDiscretizer` can map multiple columns at once by setting the
+    :py:attr:`inputCols` parameter. If both of the :py:attr:`inputCol` and :py:attr:`inputCols`
+    parameters are set, an Exception will be thrown. To specify the number of buckets for each
+    column, the :py:attr:`numBucketsArray` parameter can be set, or if the number of buckets
+    should be the same across columns, :py:attr:`numBuckets` can be set as a convenience.
 
     NaN handling: Note also that
-    QuantileDiscretizer will raise an error when it finds NaN values in the dataset, but the user
-    can also choose to either keep or remove NaN values within the dataset by setting
+    :py:class:`QuantileDiscretizer` will raise an error when it finds NaN values in the dataset,
+    but the user can also choose to either keep or remove NaN values within the dataset by setting
     :py:attr:`handleInvalid` parameter. If the user chooses to keep NaN values, they will be
     handled specially and placed into their own bucket, for example, if 4 buckets are used, then
     non-NaN data will be put into buckets[0-3], but NaNs will be counted in a special bucket[4].
@@ -1925,29 +1986,61 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInv
     The lower and upper bin bounds will be `-Infinity` and `+Infinity`, covering all real values.
 
     >>> values = [(0.1,), (0.4,), (1.2,), (1.5,), (float("nan"),), (float("nan"),)]
-    >>> df = spark.createDataFrame(values, ["values"])
-    >>> qds = QuantileDiscretizer(numBuckets=2,
+    >>> df1 = spark.createDataFrame(values, ["values"])
+    >>> qds1 = QuantileDiscretizer(numBuckets=2,
     ...     inputCol="values", outputCol="buckets", relativeError=0.01, handleInvalid="error")
-    >>> qds.getRelativeError()
+    >>> qds1.getRelativeError()
     0.01
-    >>> bucketizer = qds.fit(df)
-    >>> qds.setHandleInvalid("keep").fit(df).transform(df).count()
+    >>> bucketizer = qds1.fit(df1)
+    >>> qds1.setHandleInvalid("keep").fit(df1).transform(df1).count()
     6
-    >>> qds.setHandleInvalid("skip").fit(df).transform(df).count()
+    >>> qds1.setHandleInvalid("skip").fit(df1).transform(df1).count()
     4
     >>> splits = bucketizer.getSplits()
     >>> splits[0]
     -inf
     >>> print("%2.1f" % round(splits[1], 1))
     0.4
-    >>> bucketed = bucketizer.transform(df).head()
+    >>> bucketed = bucketizer.transform(df1).head()
     >>> bucketed.buckets
     0.0
     >>> quantileDiscretizerPath = temp_path + "/quantile-discretizer"
-    >>> qds.save(quantileDiscretizerPath)
+    >>> qds1.save(quantileDiscretizerPath)
     >>> loadedQds = QuantileDiscretizer.load(quantileDiscretizerPath)
-    >>> loadedQds.getNumBuckets() == qds.getNumBuckets()
+    >>> loadedQds.getNumBuckets() == qds1.getNumBuckets()
     True
+    >>> inputs = [(0.1, 0.0), (0.4, 1.0), (1.2, 1.3), (1.5, 1.5),
+    ...     (float("nan"), float("nan")), (float("nan"), float("nan"))]
+    >>> df2 = spark.createDataFrame(inputs, ["input1", "input2"])
+    >>> qds2 = QuantileDiscretizer(relativeError=0.01, handleInvalid="error", numBuckets=2,
+    ...     inputCols=["input1", "input2"], outputCols=["output1", "output2"])
+    >>> qds2.getRelativeError()
+    0.01
+    >>> qds2.setHandleInvalid("keep").fit(df2).transform(df2).show()
+    +------+------+-------+-------+
+    |input1|input2|output1|output2|
+    +------+------+-------+-------+
+    |   0.1|   0.0|    0.0|    0.0|
+    |   0.4|   1.0|    1.0|    1.0|
+    |   1.2|   1.3|    1.0|    1.0|
+    |   1.5|   1.5|    1.0|    1.0|
+    |   NaN|   NaN|    2.0|    2.0|
+    |   NaN|   NaN|    2.0|    2.0|
+    +------+------+-------+-------+
+    ...
+    >>> qds3 = QuantileDiscretizer(relativeError=0.01, handleInvalid="error",
+    ...      numBucketsArray=[5, 10], inputCols=["input1", "input2"],
+    ...      outputCols=["output1", "output2"])
+    >>> qds3.setHandleInvalid("skip").fit(df2).transform(df2).show()
+    +------+------+-------+-------+
+    |input1|input2|output1|output2|
+    +------+------+-------+-------+
+    |   0.1|   0.0|    1.0|    1.0|
+    |   0.4|   1.0|    2.0|    2.0|
+    |   1.2|   1.3|    3.0|    3.0|
+    |   1.5|   1.5|    4.0|    4.0|
+    +------+------+-------+-------+
+    ...
 
     .. versionadded:: 2.0.0
     """
@@ -1965,15 +2058,26 @@ class QuantileDiscretizer(JavaEstimator, HasInputCol, HasOutputCol, HasHandleInv
     handleInvalid = Param(Params._dummy(), "handleInvalid", "how to handle invalid entries. " +
                           "Options are skip (filter out rows with invalid values), " +
                           "error (throw an error), or keep (keep invalid values in a special " +
-                          "additional bucket).",
+                          "additional bucket). Note that in the multiple columns " +
+                          "case, the invalid handling is applied to all columns. That said " +
+                          "for 'error' it will throw an error if any invalids are found in " +
+                          "any columns, for 'skip' it will skip rows with any invalids in " +
+                          "any columns, etc.",
                           typeConverter=TypeConverters.toString)
 
+    numBucketsArray = Param(Params._dummy(), "numBucketsArray", "Array of number of buckets " +
+                            "(quantiles, or categories) into which data points are grouped. " +
+                            "This is for multiple columns input. If transforming multiple " +
+                            "columns and numBucketsArray is not set, but numBuckets is set, " +
+                            "then numBuckets will be applied across all columns.",
+                            typeConverter=TypeConverters.toListInt)
+
     @keyword_only
     def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
-                 handleInvalid="error"):
+                 handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None):
         """
         __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
-                 handleInvalid="error")
+                 handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None)
         """
         super(QuantileDiscretizer, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.QuantileDiscretizer",
@@ -1985,10 +2089,10 @@ def __init__(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.
     @keyword_only
     @since("2.0.0")
     def setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001,
-                  handleInvalid="error"):
+                  handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None):
         """
         setParams(self, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \
-                  handleInvalid="error")
+                  handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None)
         Set the params for the QuantileDiscretizer
         """
         kwargs = self._input_kwargs
@@ -2008,6 +2112,20 @@ def getNumBuckets(self):
         """
         return self.getOrDefault(self.numBuckets)
 
+    @since("3.0.0")
+    def setNumBucketsArray(self, value):
+        """
+        Sets the value of :py:attr:`numBucketsArray`.
+        """
+        return self._set(numBucketsArray=value)
+
+    @since("3.0.0")
+    def getNumBucketsArray(self):
+        """
+        Gets the value of numBucketsArray or its default value.
+        """
+        return self.getOrDefault(self.numBucketsArray)
+
     @since("2.0.0")
     def setRelativeError(self, value):
         """
@@ -2026,10 +2144,17 @@ def _create_model(self, java_model):
         """
         Private method to convert the java_model to a Python model.
         """
-        return Bucketizer(splits=list(java_model.getSplits()),
-                          inputCol=self.getInputCol(),
-                          outputCol=self.getOutputCol(),
-                          handleInvalid=self.getHandleInvalid())
+        if (self.isSet(self.inputCol)):
+            return Bucketizer(splits=list(java_model.getSplits()),
+                              inputCol=self.getInputCol(),
+                              outputCol=self.getOutputCol(),
+                              handleInvalid=self.getHandleInvalid())
+        else:
+            splitsArrayList = [list(x) for x in list(java_model.getSplitsArray())]
+            return Bucketizer(splitsArray=splitsArrayList,
+                              inputCols=self.getInputCols(),
+                              outputCols=self.getOutputCols(),
+                              handleInvalid=self.getHandleInvalid())
 
 
 @inherit_doc
diff --git a/python/pyspark/ml/param/__init__.py b/python/pyspark/ml/param/__init__.py
index 043c25cf9feb4..5b6b70292f099 100644
--- a/python/pyspark/ml/param/__init__.py
+++ b/python/pyspark/ml/param/__init__.py
@@ -134,6 +134,16 @@ def toListFloat(value):
                 return [float(v) for v in value]
         raise TypeError("Could not convert %s to list of floats" % value)
 
+    @staticmethod
+    def toListListFloat(value):
+        """
+        Convert a value to list of list of floats, if possible.
+        """
+        if TypeConverters._can_convert_to_list(value):
+            value = TypeConverters.toList(value)
+            return [TypeConverters.toListFloat(v) for v in value]
+        raise TypeError("Could not convert %s to list of list of floats" % value)
+
     @staticmethod
     def toListInt(value):
         """
diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py
index bf2716485df9c..676662da8c316 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -67,6 +67,13 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, HasPredictionCol, Ha
     indicated user preferences rather than explicit ratings given to
     items.
 
+    .. note:: the input rating dataframe to the ALS implementation should be deterministic.
+              Nondeterministic data can cause failure during fitting ALS model.
+              For example, an order-sensitive operation like sampling after a repartition makes
+              dataframe output nondeterministic, like `df.repartition(2).sample(False, 0.5, 1618)`.
+              Checkpointing sampled dataframe or adding a sort before sampling can help make the
+              dataframe deterministic.
+
     >>> df = spark.createDataFrame(
     ...     [(0, 0, 4.0), (0, 1, 2.0), (1, 1, 3.0), (1, 2, 4.0), (2, 1, 1.0), (2, 2, 5.0)],
     ...     ["user", "item", "rating"])
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 2d1d1272c17f8..a1e82176f27fe 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -20,7 +20,8 @@
 from pyspark import since, keyword_only
 from pyspark.ml.param.shared import *
 from pyspark.ml.util import *
-from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaWrapper
+from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, \
+    JavaPredictor, JavaPredictionModel, JavaWrapper
 from pyspark.ml.common import inherit_doc
 from pyspark.sql import DataFrame
 
@@ -37,10 +38,9 @@
 
 
 @inherit_doc
-class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasMaxIter,
-                       HasRegParam, HasTol, HasElasticNetParam, HasFitIntercept,
-                       HasStandardization, HasSolver, HasWeightCol, HasAggregationDepth, HasLoss,
-                       JavaMLWritable, JavaMLReadable):
+class LinearRegression(JavaPredictor, HasMaxIter, HasRegParam, HasTol, HasElasticNetParam,
+                       HasFitIntercept, HasStandardization, HasSolver, HasWeightCol,
+                       HasAggregationDepth, HasLoss, JavaMLWritable, JavaMLReadable):
     """
     Linear regression.
 
@@ -66,15 +66,21 @@ class LinearRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPrediction
     ...     (0.0, 2.0, Vectors.sparse(1, [], []))], ["label", "weight", "features"])
     >>> lr = LinearRegression(maxIter=5, regParam=0.0, solver="normal", weightCol="weight")
     >>> model = lr.fit(df)
+    >>> model.setFeaturesCol("features")
+    LinearRegression...
+    >>> model.setPredictionCol("newPrediction")
+    LinearRegression...
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
-    >>> abs(model.transform(test0).head().prediction - (-1.0)) < 0.001
+    >>> abs(model.predict(test0.head().features) - (-1.0)) < 0.001
+    True
+    >>> abs(model.transform(test0).head().newPrediction - (-1.0)) < 0.001
     True
     >>> abs(model.coefficients[0] - 1.0) < 0.001
     True
     >>> abs(model.intercept - 0.0) < 0.001
     True
     >>> test1 = spark.createDataFrame([(Vectors.sparse(1, [0], [1.0]),)], ["features"])
-    >>> abs(model.transform(test1).head().prediction - 1.0) < 0.001
+    >>> abs(model.transform(test1).head().newPrediction - 1.0) < 0.001
     True
     >>> lr.setParams("vector")
     Traceback (most recent call last):
@@ -161,7 +167,7 @@ def getEpsilon(self):
         return self.getOrDefault(self.epsilon)
 
 
-class LinearRegressionModel(JavaModel, JavaPredictionModel, GeneralJavaMLWritable, JavaMLReadable,
+class LinearRegressionModel(JavaPredictionModel, GeneralJavaMLWritable, JavaMLReadable,
                             HasTrainingSummary):
     """
     Model fitted by :class:`LinearRegression`.
@@ -459,9 +465,38 @@ def totalIterations(self):
         return self._call_java("totalIterations")
 
 
+class _IsotonicRegressionBase(HasFeaturesCol, HasLabelCol, HasPredictionCol, HasWeightCol):
+    """
+    Params for :py:class:`IsotonicRegression` and :py:class:`IsotonicRegressionModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    isotonic = Param(
+        Params._dummy(), "isotonic",
+        "whether the output sequence should be isotonic/increasing (true) or" +
+        "antitonic/decreasing (false).", typeConverter=TypeConverters.toBoolean)
+    featureIndex = Param(
+        Params._dummy(), "featureIndex",
+        "The index of the feature if featuresCol is a vector column, no effect otherwise.",
+        typeConverter=TypeConverters.toInt)
+
+    def getIsotonic(self):
+        """
+        Gets the value of isotonic or its default value.
+        """
+        return self.getOrDefault(self.isotonic)
+
+    def getFeatureIndex(self):
+        """
+        Gets the value of featureIndex or its default value.
+        """
+        return self.getOrDefault(self.featureIndex)
+
+
 @inherit_doc
-class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                         HasWeightCol, JavaMLWritable, JavaMLReadable):
+class IsotonicRegression(JavaEstimator, _IsotonicRegressionBase, HasWeightCol,
+                         JavaMLWritable, JavaMLReadable):
     """
     Currently implemented using parallelized pool adjacent violators algorithm.
     Only univariate (single feature) algorithm supported.
@@ -472,6 +507,12 @@ class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
     ...     (0.0, Vectors.sparse(1, [], []))], ["label", "features"])
     >>> ir = IsotonicRegression()
     >>> model = ir.fit(df)
+    >>> model.setFeaturesCol("features")
+    IsotonicRegression...
+    >>> model.setLabelCol("newLabel")
+    IsotonicRegression...
+    >>> model.getLabelCol()
+    'newLabel'
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
     >>> model.transform(test0).head().prediction
     0.0
@@ -492,16 +533,6 @@ class IsotonicRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
 
     .. versionadded:: 1.6.0
     """
-
-    isotonic = \
-        Param(Params._dummy(), "isotonic",
-              "whether the output sequence should be isotonic/increasing (true) or" +
-              "antitonic/decreasing (false).", typeConverter=TypeConverters.toBoolean)
-    featureIndex = \
-        Param(Params._dummy(), "featureIndex",
-              "The index of the feature if featuresCol is a vector column, no effect otherwise.",
-              typeConverter=TypeConverters.toInt)
-
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  weightCol=None, isotonic=True, featureIndex=0):
@@ -536,26 +567,15 @@ def setIsotonic(self, value):
         """
         return self._set(isotonic=value)
 
-    def getIsotonic(self):
-        """
-        Gets the value of isotonic or its default value.
-        """
-        return self.getOrDefault(self.isotonic)
-
     def setFeatureIndex(self, value):
         """
         Sets the value of :py:attr:`featureIndex`.
         """
         return self._set(featureIndex=value)
 
-    def getFeatureIndex(self):
-        """
-        Gets the value of featureIndex or its default value.
-        """
-        return self.getOrDefault(self.featureIndex)
-
 
-class IsotonicRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
+class IsotonicRegressionModel(JavaModel, _IsotonicRegressionBase,
+                              JavaMLWritable, JavaMLReadable):
     """
     Model fitted by :class:`IsotonicRegression`.
 
@@ -804,8 +824,7 @@ def getLossType(self):
 
 
 @inherit_doc
-class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeightCol,
-                            HasPredictionCol, DecisionTreeParams, TreeRegressorParams,
+class DecisionTreeRegressor(JavaPredictor, HasWeightCol, DecisionTreeParams, TreeRegressorParams,
                             HasCheckpointInterval, HasSeed, JavaMLWritable, JavaMLReadable,
                             HasVarianceCol):
     """
@@ -828,6 +847,8 @@ class DecisionTreeRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasWeigh
     >>> model.numFeatures
     1
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
@@ -950,7 +971,7 @@ def setImpurity(self, value):
 
 
 @inherit_doc
-class DecisionTreeModel(JavaModel, JavaPredictionModel):
+class DecisionTreeModel(JavaPredictionModel):
     """
     Abstraction for Decision Tree models.
 
@@ -1052,9 +1073,8 @@ def featureImportances(self):
 
 
 @inherit_doc
-class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasSeed,
-                            RandomForestParams, TreeRegressorParams, HasCheckpointInterval,
-                            JavaMLWritable, JavaMLReadable):
+class RandomForestRegressor(JavaPredictor, HasSeed, RandomForestParams, TreeRegressorParams,
+                            HasCheckpointInterval, JavaMLWritable, JavaMLReadable):
     """
     `Random Forest <http://en.wikipedia.org/wiki/Random_forest>`_
     learning algorithm for regression.
@@ -1072,6 +1092,8 @@ class RandomForestRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     >>> allclose(model.treeWeights, [1.0, 1.0])
     True
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
@@ -1240,9 +1262,8 @@ def featureImportances(self):
 
 
 @inherit_doc
-class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                   GBTRegressorParams, HasCheckpointInterval, HasSeed, JavaMLWritable,
-                   JavaMLReadable):
+class GBTRegressor(JavaPredictor, GBTRegressorParams, HasCheckpointInterval, HasSeed,
+                   JavaMLWritable, JavaMLReadable):
     """
     `Gradient-Boosted Trees (GBTs) <http://en.wikipedia.org/wiki/Gradient_boosting>`_
     learning algorithm for regression.
@@ -1259,6 +1280,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
     >>> print(gbt.getFeatureSubsetStrategy())
     all
     >>> model = gbt.fit(df)
+    >>> model.setFeaturesCol("features")
+    GBTRegressionModel...
     >>> model.featureImportances
     SparseVector(1, {0: 1.0})
     >>> model.numFeatures
@@ -1266,6 +1289,8 @@ class GBTRegressor(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
     >>> allclose(model.treeWeights, [1.0, 0.1, 0.1, 0.1, 0.1])
     True
     >>> test0 = spark.createDataFrame([(Vectors.dense(-1.0),)], ["features"])
+    >>> model.predict(test0.head().features)
+    0.0
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
@@ -1464,9 +1489,55 @@ def evaluateEachIteration(self, dataset, loss):
         return self._call_java("evaluateEachIteration", dataset, loss)
 
 
+class _AFTSurvivalRegressionParams(HasFeaturesCol, HasLabelCol, HasPredictionCol,
+                                   HasMaxIter, HasTol, HasFitIntercept,
+                                   HasAggregationDepth):
+    """
+    Params for :py:class:`AFTSurvivalRegression` and :py:class:`AFTSurvivalRegressionModel`.
+
+    .. versionadded:: 3.0.0
+    """
+
+    censorCol = Param(
+        Params._dummy(), "censorCol",
+        "censor column name. The value of this column could be 0 or 1. " +
+        "If the value is 1, it means the event has occurred i.e. " +
+        "uncensored; otherwise censored.", typeConverter=TypeConverters.toString)
+    quantileProbabilities = Param(
+        Params._dummy(), "quantileProbabilities",
+        "quantile probabilities array. Values of the quantile probabilities array " +
+        "should be in the range (0, 1) and the array should be non-empty.",
+        typeConverter=TypeConverters.toListFloat)
+    quantilesCol = Param(
+        Params._dummy(), "quantilesCol",
+        "quantiles column name. This column will output quantiles of " +
+        "corresponding quantileProbabilities if it is set.",
+        typeConverter=TypeConverters.toString)
+
+    @since("1.6.0")
+    def getCensorCol(self):
+        """
+        Gets the value of censorCol or its default value.
+        """
+        return self.getOrDefault(self.censorCol)
+
+    @since("1.6.0")
+    def getQuantileProbabilities(self):
+        """
+        Gets the value of quantileProbabilities or its default value.
+        """
+        return self.getOrDefault(self.quantileProbabilities)
+
+    @since("1.6.0")
+    def getQuantilesCol(self):
+        """
+        Gets the value of quantilesCol or its default value.
+        """
+        return self.getOrDefault(self.quantilesCol)
+
+
 @inherit_doc
-class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol,
-                            HasFitIntercept, HasMaxIter, HasTol, HasAggregationDepth,
+class AFTSurvivalRegression(JavaEstimator, _AFTSurvivalRegressionParams,
                             JavaMLWritable, JavaMLReadable):
     """
     Accelerated Failure Time (AFT) Model Survival Regression
@@ -1482,6 +1553,8 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     ...     (1e-40, Vectors.sparse(1, [], []), 0.0)], ["label", "features", "censor"])
     >>> aftsr = AFTSurvivalRegression()
     >>> model = aftsr.fit(df)
+    >>> model.setFeaturesCol("features")
+    AFTSurvivalRegression...
     >>> model.predict(Vectors.dense(6.3))
     1.0
     >>> model.predictQuantiles(Vectors.dense(6.3))
@@ -1512,20 +1585,6 @@ class AFTSurvivalRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredi
     .. versionadded:: 1.6.0
     """
 
-    censorCol = Param(Params._dummy(), "censorCol",
-                      "censor column name. The value of this column could be 0 or 1. " +
-                      "If the value is 1, it means the event has occurred i.e. " +
-                      "uncensored; otherwise censored.", typeConverter=TypeConverters.toString)
-    quantileProbabilities = \
-        Param(Params._dummy(), "quantileProbabilities",
-              "quantile probabilities array. Values of the quantile probabilities array " +
-              "should be in the range (0, 1) and the array should be non-empty.",
-              typeConverter=TypeConverters.toListFloat)
-    quantilesCol = Param(Params._dummy(), "quantilesCol",
-                         "quantiles column name. This column will output quantiles of " +
-                         "corresponding quantileProbabilities if it is set.",
-                         typeConverter=TypeConverters.toString)
-
     @keyword_only
     def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
                  fitIntercept=True, maxIter=100, tol=1E-6, censorCol="censor",
@@ -1571,13 +1630,6 @@ def setCensorCol(self, value):
         """
         return self._set(censorCol=value)
 
-    @since("1.6.0")
-    def getCensorCol(self):
-        """
-        Gets the value of censorCol or its default value.
-        """
-        return self.getOrDefault(self.censorCol)
-
     @since("1.6.0")
     def setQuantileProbabilities(self, value):
         """
@@ -1585,13 +1637,6 @@ def setQuantileProbabilities(self, value):
         """
         return self._set(quantileProbabilities=value)
 
-    @since("1.6.0")
-    def getQuantileProbabilities(self):
-        """
-        Gets the value of quantileProbabilities or its default value.
-        """
-        return self.getOrDefault(self.quantileProbabilities)
-
     @since("1.6.0")
     def setQuantilesCol(self, value):
         """
@@ -1599,15 +1644,9 @@ def setQuantilesCol(self, value):
         """
         return self._set(quantilesCol=value)
 
-    @since("1.6.0")
-    def getQuantilesCol(self):
-        """
-        Gets the value of quantilesCol or its default value.
-        """
-        return self.getOrDefault(self.quantilesCol)
-
 
-class AFTSurvivalRegressionModel(JavaModel, JavaMLWritable, JavaMLReadable):
+class AFTSurvivalRegressionModel(JavaModel, _AFTSurvivalRegressionParams,
+                                 JavaMLWritable, JavaMLReadable):
     """
     Model fitted by :class:`AFTSurvivalRegression`.
 
@@ -1654,9 +1693,8 @@ def predict(self, features):
 
 
 @inherit_doc
-class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, HasPredictionCol,
-                                  HasFitIntercept, HasMaxIter, HasTol, HasRegParam, HasWeightCol,
-                                  HasSolver, JavaMLWritable, JavaMLReadable):
+class GeneralizedLinearRegression(JavaPredictor, HasFitIntercept, HasMaxIter, HasTol, HasRegParam,
+                                  HasWeightCol, HasSolver, JavaMLWritable, JavaMLReadable):
     """
     Generalized Linear Regression.
 
@@ -1686,6 +1724,8 @@ class GeneralizedLinearRegression(JavaEstimator, HasLabelCol, HasFeaturesCol, Ha
     ...     (2.0, Vectors.dense(1.0, 1.0)),], ["label", "features"])
     >>> glr = GeneralizedLinearRegression(family="gaussian", link="identity", linkPredictionCol="p")
     >>> model = glr.fit(df)
+    >>> model.setFeaturesCol("features")
+    GeneralizedLinearRegression...
     >>> transformed = model.transform(df)
     >>> abs(transformed.head().prediction - 1.5) < 0.001
     True
@@ -1861,7 +1901,7 @@ def getOffsetCol(self):
         return self.getOrDefault(self.offsetCol)
 
 
-class GeneralizedLinearRegressionModel(JavaModel, JavaPredictionModel, JavaMLWritable,
+class GeneralizedLinearRegressionModel(JavaPredictionModel, JavaMLWritable,
                                        JavaMLReadable, HasTrainingSummary):
     """
     Model fitted by :class:`GeneralizedLinearRegression`.
diff --git a/python/pyspark/ml/tests/test_image.py b/python/pyspark/ml/tests/test_image.py
index 0008b0b670d34..5cc2a815eaa14 100644
--- a/python/pyspark/ml/tests/test_image.py
+++ b/python/pyspark/ml/tests/test_image.py
@@ -20,7 +20,7 @@
 
 from pyspark.ml.image import ImageSchema
 from pyspark.testing.mlutils import PySparkTestCase, SparkSessionTestCase
-from pyspark.sql import HiveContext, Row
+from pyspark.sql import Row
 from pyspark.testing.utils import QuietTest
 
 
@@ -67,47 +67,6 @@ def test_read_images(self):
                 lambda: ImageSchema.toImage("a"))
 
 
-class ImageFileFormatOnHiveContextTest(PySparkTestCase):
-
-    @classmethod
-    def setUpClass(cls):
-        super(ImageFileFormatOnHiveContextTest, cls).setUpClass()
-        cls.hive_available = True
-        # Note that here we enable Hive's support.
-        cls.spark = None
-        try:
-            cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
-        except py4j.protocol.Py4JError:
-            cls.tearDownClass()
-            cls.hive_available = False
-        except TypeError:
-            cls.tearDownClass()
-            cls.hive_available = False
-        if cls.hive_available:
-            cls.spark = HiveContext._createForTesting(cls.sc)
-
-    def setUp(self):
-        if not self.hive_available:
-            self.skipTest("Hive is not available.")
-
-    @classmethod
-    def tearDownClass(cls):
-        super(ImageFileFormatOnHiveContextTest, cls).tearDownClass()
-        if cls.spark is not None:
-            cls.spark.sparkSession.stop()
-            cls.spark = None
-
-    def test_read_images_multiple_times(self):
-        # This test case is to check if ImageFileFormat tries to
-        # initiate Hive client multiple times. See SPARK-22651.
-        data_path = 'data/mllib/images/origin/kittens'
-        for i in range(2):
-            self.spark.read.format("image") \
-                .option("dropInvalid", True) \
-                .option("recursiveFileLookup", True) \
-                .load(data_path)
-
-
 if __name__ == "__main__":
     from pyspark.ml.tests.test_image import *
 
diff --git a/python/pyspark/ml/tests/test_param.py b/python/pyspark/ml/tests/test_param.py
index cbeac0b1319c8..f12310c7d8eba 100644
--- a/python/pyspark/ml/tests/test_param.py
+++ b/python/pyspark/ml/tests/test_param.py
@@ -87,6 +87,15 @@ def test_list_float(self):
         self.assertTrue(all([type(v) == float for v in b.getSplits()]))
         self.assertRaises(TypeError, lambda: Bucketizer(splits=["a", 1.0]))
 
+    def test_list_list_float(self):
+        b = Bucketizer(splitsArray=[[-0.1, 0.5, 3], [-5, 1.5]])
+        self.assertEqual(b.getSplitsArray(), [[-0.1, 0.5, 3.0], [-5.0, 1.5]])
+        self.assertTrue(all([type(v) == list for v in b.getSplitsArray()]))
+        self.assertTrue(all([type(v) == float for v in b.getSplitsArray()[0]]))
+        self.assertTrue(all([type(v) == float for v in b.getSplitsArray()[1]]))
+        self.assertRaises(TypeError, lambda: Bucketizer(splitsArray=["a", 1.0]))
+        self.assertRaises(TypeError, lambda: Bucketizer(splitsArray=[[-5, 1.5], ["a", 1.0]]))
+
     def test_list_string(self):
         for labels in [np.array(['a', u'b']), ['a', u'b'], np.array(['a', 'b'])]:
             idx_to_string = IndexToString(labels=labels)
@@ -343,7 +352,8 @@ def test_java_params(self):
         for module in modules:
             for name, cls in inspect.getmembers(module, inspect.isclass):
                 if not name.endswith('Model') and not name.endswith('Params') \
-                        and issubclass(cls, JavaParams) and not inspect.isabstract(cls):
+                        and issubclass(cls, JavaParams) and not inspect.isabstract(cls) \
+                        and not name.startswith('Java'):
                     # NOTE: disable check_params_exist until there is parity with Scala API
                     check_params(self, cls(), check_params_exist=False)
 
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index debaf38e0896e..81b785e71f302 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -342,22 +342,6 @@ def read(cls):
         return JavaMLReader(cls)
 
 
-@inherit_doc
-class JavaPredictionModel():
-    """
-    (Private) Java Model for prediction tasks (regression and classification).
-    To be mixed in with class:`pyspark.ml.JavaModel`
-    """
-
-    @property
-    @since("2.1.0")
-    def numFeatures(self):
-        """
-        Returns the number of features the model was trained on. If unknown, returns -1
-        """
-        return self._call_java("numFeatures")
-
-
 @inherit_doc
 class DefaultParamsWritable(MLWritable):
     """
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 9bb1262a54500..3137db7181c07 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -20,10 +20,12 @@
 if sys.version >= '3':
     xrange = range
 
+from pyspark import since
 from pyspark import SparkContext
 from pyspark.sql import DataFrame
 from pyspark.ml import Estimator, Transformer, Model
 from pyspark.ml.param import Params
+from pyspark.ml.param.shared import HasFeaturesCol, HasLabelCol, HasPredictionCol
 from pyspark.ml.util import _jvm
 from pyspark.ml.common import inherit_doc, _java2py, _py2java
 
@@ -361,3 +363,75 @@ def __init__(self, java_model=None):
             self._create_params_from_java()
 
             self._resetUid(java_model.uid())
+
+
+@inherit_doc
+class JavaPredictorParams(HasLabelCol, HasFeaturesCol, HasPredictionCol):
+    """
+    (Private) Trait for parameters for prediction (regression and classification)
+    """
+    pass
+
+
+@inherit_doc
+class JavaPredictor(JavaEstimator, JavaPredictorParams):
+    """
+    (Private) Java Estimator for prediction tasks (regression and classification).
+    """
+
+    @since("3.0.0")
+    def setLabelCol(self, value):
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    @since("3.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+
+@inherit_doc
+class JavaPredictionModel(JavaModel, JavaPredictorParams):
+    """
+    (Private) Java Model for prediction tasks (regression and classification).
+    """
+
+    @since("3.0.0")
+    def setFeaturesCol(self, value):
+        """
+        Sets the value of :py:attr:`featuresCol`.
+        """
+        return self._set(featuresCol=value)
+
+    @since("3.0.0")
+    def setPredictionCol(self, value):
+        """
+        Sets the value of :py:attr:`predictionCol`.
+        """
+        return self._set(predictionCol=value)
+
+    @property
+    @since("2.1.0")
+    def numFeatures(self):
+        """
+        Returns the number of features the model was trained on. If unknown, returns -1
+        """
+        return self._call_java("numFeatures")
+
+    @since("3.0.0")
+    def predict(self, value):
+        """
+        Predict label for the given features.
+        """
+        return self._call_java("predict", value)
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index f220911993096..e41e5c9cc8e89 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -304,7 +304,7 @@ class KMeans(object):
 
     @classmethod
     @since('0.9.0')
-    def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||",
+    def train(cls, rdd, k, maxIterations=100, initializationMode="k-means||",
               seed=None, initializationSteps=2, epsilon=1e-4, initialModel=None):
         """
         Train a k-means clustering model.
@@ -317,8 +317,6 @@ def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"
         :param maxIterations:
           Maximum number of iterations allowed.
           (default: 100)
-        :param runs:
-          This param has no effect since Spark 2.0.0.
         :param initializationMode:
           The initialization algorithm. This can be either "random" or
           "k-means||".
@@ -342,8 +340,6 @@ def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"
           rather than using the random or k-means|| initializationModel.
           (default: None)
         """
-        if runs != 1:
-            warnings.warn("The param `runs` has no effect since Spark 2.0.0.")
         clusterInitialModel = []
         if initialModel is not None:
             if not isinstance(initialModel, KMeansModel):
@@ -351,7 +347,7 @@ def train(cls, rdd, k, maxIterations=100, runs=1, initializationMode="k-means||"
                                 "to be of <type 'KMeansModel'>")
             clusterInitialModel = [_convert_to_vector(c) for c in initialModel.clusterCenters]
         model = callMLlibFunc("trainKMeansModel", rdd.map(_convert_to_vector), k, maxIterations,
-                              runs, initializationMode, seed, initializationSteps, epsilon,
+                              initializationMode, seed, initializationSteps, epsilon,
                               clusterInitialModel)
         centers = callJavaFunc(rdd.context, model.clusterCenters)
         return KMeansModel([c.toArray() for c in centers])
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 16c226f02e633..1edffaa4ca168 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -75,6 +75,7 @@ class PythonEvalType(object):
     SQL_WINDOW_AGG_PANDAS_UDF = 203
     SQL_SCALAR_PANDAS_ITER_UDF = 204
     SQL_MAP_PANDAS_ITER_UDF = 205
+    SQL_COGROUPED_MAP_PANDAS_UDF = 206
 
 
 def portable_hash(x):
@@ -2436,17 +2437,23 @@ def countApproxDistinct(self, relativeSD=0.05):
         hashRDD = self.map(lambda x: portable_hash(x) & 0xFFFFFFFF)
         return hashRDD._to_java_object_rdd().countApproxDistinct(relativeSD)
 
-    def toLocalIterator(self):
+    def toLocalIterator(self, prefetchPartitions=False):
         """
         Return an iterator that contains all of the elements in this RDD.
         The iterator will consume as much memory as the largest partition in this RDD.
+        With prefetch it may consume up to the memory of the 2 largest partitions.
+
+        :param prefetchPartitions: If Spark should pre-fetch the next partition
+                                   before it is needed.
 
         >>> rdd = sc.parallelize(range(10))
         >>> [x for x in rdd.toLocalIterator()]
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
         """
         with SCCallSiteSync(self.context) as css:
-            sock_info = self.ctx._jvm.PythonRDD.toLocalIteratorAndServe(self._jrdd.rdd())
+            sock_info = self.ctx._jvm.PythonRDD.toLocalIteratorAndServe(
+                self._jrdd.rdd(),
+                prefetchPartitions)
         return _local_iterator_from_socket(sock_info, self._jrdd_deserializer)
 
     def barrier(self):
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 00f6081a3b14f..994e42e238df1 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -69,7 +69,7 @@
 pickle_protocol = pickle.HIGHEST_PROTOCOL
 
 from pyspark import cloudpickle
-from pyspark.util import _exception_message
+from pyspark.util import _exception_message, print_exec
 
 
 __all__ = ["PickleSerializer", "MarshalSerializer", "UTF8Deserializer"]
@@ -401,6 +401,32 @@ def __repr__(self):
         return "ArrowStreamPandasUDFSerializer"
 
 
+class CogroupUDFSerializer(ArrowStreamPandasUDFSerializer):
+
+    def load_stream(self, stream):
+        """
+        Deserialize Cogrouped ArrowRecordBatches to a tuple of Arrow tables and yield as two
+        lists of pandas.Series.
+        """
+        import pyarrow as pa
+        dataframes_in_group = None
+
+        while dataframes_in_group is None or dataframes_in_group > 0:
+            dataframes_in_group = read_int(stream)
+
+            if dataframes_in_group == 2:
+                batch1 = [batch for batch in ArrowStreamSerializer.load_stream(self, stream)]
+                batch2 = [batch for batch in ArrowStreamSerializer.load_stream(self, stream)]
+                yield (
+                    [self.arrow_to_pandas(c) for c in pa.Table.from_batches(batch1).itercolumns()],
+                    [self.arrow_to_pandas(c) for c in pa.Table.from_batches(batch2).itercolumns()]
+                )
+
+            elif dataframes_in_group != 0:
+                raise ValueError(
+                    'Invalid number of pandas.DataFrames in group {0}'.format(dataframes_in_group))
+
+
 class BatchedSerializer(Serializer):
 
     """
@@ -690,7 +716,7 @@ def dumps(self, obj):
                 msg = "Object too large to serialize: %s" % emsg
             else:
                 msg = "Could not serialize object: %s: %s" % (e.__class__.__name__, emsg)
-            cloudpickle.print_exec(sys.stderr)
+            print_exec(sys.stderr)
             raise pickle.PicklingError(msg)
 
 
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index c3c06c8124362..ba4c4feec75c8 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -43,7 +43,7 @@
 
 
 from pyspark.sql.types import Row
-from pyspark.sql.context import SQLContext, HiveContext, UDFRegistration
+from pyspark.sql.context import SQLContext, UDFRegistration
 from pyspark.sql.session import SparkSession
 from pyspark.sql.column import Column
 from pyspark.sql.catalog import Catalog
@@ -51,11 +51,12 @@
 from pyspark.sql.group import GroupedData
 from pyspark.sql.readwriter import DataFrameReader, DataFrameWriter
 from pyspark.sql.window import Window, WindowSpec
+from pyspark.sql.cogroup import CoGroupedData
 
 
 __all__ = [
-    'SparkSession', 'SQLContext', 'HiveContext', 'UDFRegistration',
+    'SparkSession', 'SQLContext', 'UDFRegistration',
     'DataFrame', 'GroupedData', 'Column', 'Catalog', 'Row',
     'DataFrameNaFunctions', 'DataFrameStatFunctions', 'Window', 'WindowSpec',
-    'DataFrameReader', 'DataFrameWriter'
+    'DataFrameReader', 'DataFrameWriter', 'CoGroupedData'
 ]
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 974251f63b37a..08cf6ee330785 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -138,26 +138,6 @@ def listColumns(self, tableName, dbName=None):
                 isBucket=jcolumn.isBucket()))
         return columns
 
-    @since(2.0)
-    def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
-        """Creates a table based on the dataset in a data source.
-
-        It returns the DataFrame associated with the external table.
-
-        The data source is specified by the ``source`` and a set of ``options``.
-        If ``source`` is not specified, the default data source configured by
-        ``spark.sql.sources.default`` will be used.
-
-        Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
-        created external table.
-
-        :return: :class:`DataFrame`
-        """
-        warnings.warn(
-            "createExternalTable is deprecated since Spark 2.2, please use createTable instead.",
-            DeprecationWarning)
-        return self.createTable(tableName, path, source, schema, **options)
-
     @since(2.2)
     def createTable(self, tableName, path=None, source=None, schema=None, **options):
         """Creates a table based on the dataset in a data source.
diff --git a/python/pyspark/sql/cogroup.py b/python/pyspark/sql/cogroup.py
new file mode 100644
index 0000000000000..ef87e703bce14
--- /dev/null
+++ b/python/pyspark/sql/cogroup.py
@@ -0,0 +1,143 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import sys
+
+from pyspark import since
+from pyspark.rdd import PythonEvalType
+from pyspark.sql.column import Column
+from pyspark.sql.dataframe import DataFrame
+
+
+class CoGroupedData(object):
+    """
+    A logical grouping of two :class:`GroupedData`,
+    created by :func:`GroupedData.cogroup`.
+
+    .. note:: Experimental
+
+    .. versionadded:: 3.0
+    """
+
+    def __init__(self, gd1, gd2):
+        self._gd1 = gd1
+        self._gd2 = gd2
+        self.sql_ctx = gd1.sql_ctx
+
+    @since(3.0)
+    def apply(self, udf):
+        """
+        Applies a function to each cogroup using a pandas udf and returns the result
+        as a `DataFrame`.
+
+        The user-defined function should take two `pandas.DataFrame` and return another
+        `pandas.DataFrame`.  For each side of the cogroup, all columns are passed together as a
+        `pandas.DataFrame` to the user-function and the returned `pandas.DataFrame` are combined as
+        a :class:`DataFrame`.
+
+        The returned `pandas.DataFrame` can be of arbitrary length and its schema must match the
+        returnType of the pandas udf.
+
+        .. note:: This function requires a full shuffle. All the data of a cogroup will be loaded
+            into memory, so the user should be aware of the potential OOM risk if data is skewed
+            and certain groups are too large to fit in memory.
+
+        .. note:: Experimental
+
+        :param udf: a cogrouped map user-defined function returned by
+            :func:`pyspark.sql.functions.pandas_udf`.
+
+        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+        >>> df1 = spark.createDataFrame(
+        ...     [(20000101, 1, 1.0), (20000101, 2, 2.0), (20000102, 1, 3.0), (20000102, 2, 4.0)],
+        ...     ("time", "id", "v1"))
+        >>> df2 = spark.createDataFrame(
+        ...     [(20000101, 1, "x"), (20000101, 2, "y")],
+        ...     ("time", "id", "v2"))
+        >>> @pandas_udf("time int, id int, v1 double, v2 string",
+        ...             PandasUDFType.COGROUPED_MAP)  # doctest: +SKIP
+        ... def asof_join(l, r):
+        ...     return pd.merge_asof(l, r, on="time", by="id")
+        >>> df1.groupby("id").cogroup(df2.groupby("id")).apply(asof_join).show()  # doctest: +SKIP
+        +--------+---+---+---+
+        |    time| id| v1| v2|
+        +--------+---+---+---+
+        |20000101|  1|1.0|  x|
+        |20000102|  1|3.0|  x|
+        |20000101|  2|2.0|  y|
+        |20000102|  2|4.0|  y|
+        +--------+---+---+---+
+
+        Alternatively, the user can define a function that takes three arguments.  In this case,
+        the grouping key(s) will be passed as the first argument and the data will be passed as the
+        second and third arguments.  The grouping key(s) will be passed as a tuple of numpy data
+        types, e.g., `numpy.int32` and `numpy.float64`. The data will still be passed in as two
+        `pandas.DataFrame` containing all columns from the original Spark DataFrames.
+
+        >>> @pandas_udf("time int, id int, v1 double, v2 string",
+        ...             PandasUDFType.COGROUPED_MAP)  # doctest: +SKIP
+        ... def asof_join(k, l, r):
+        ...     if k == (1,):
+        ...         return pd.merge_asof(l, r, on="time", by="id")
+        ...     else:
+        ...         return pd.DataFrame(columns=['time', 'id', 'v1', 'v2'])
+        >>> df1.groupby("id").cogroup(df2.groupby("id")).apply(asof_join).show()  # doctest: +SKIP
+        +--------+---+---+---+
+        |    time| id| v1| v2|
+        +--------+---+---+---+
+        |20000101|  1|1.0|  x|
+        |20000102|  1|3.0|  x|
+        +--------+---+---+---+
+
+        .. seealso:: :meth:`pyspark.sql.functions.pandas_udf`
+
+        """
+        # Columns are special because hasattr always return True
+        if isinstance(udf, Column) or not hasattr(udf, 'func') \
+           or udf.evalType != PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
+            raise ValueError("Invalid udf: the udf argument must be a pandas_udf of type "
+                             "COGROUPED_MAP.")
+        all_cols = self._extract_cols(self._gd1) + self._extract_cols(self._gd2)
+        udf_column = udf(*all_cols)
+        jdf = self._gd1._jgd.flatMapCoGroupsInPandas(self._gd2._jgd, udf_column._jc.expr())
+        return DataFrame(jdf, self.sql_ctx)
+
+    @staticmethod
+    def _extract_cols(gd):
+        df = gd._df
+        return [df[col] for col in df.columns]
+
+
+def _test():
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.cogroup
+    globs = pyspark.sql.cogroup.__dict__.copy()
+    spark = SparkSession.builder\
+        .master("local[4]")\
+        .appName("sql.cogroup tests")\
+        .getOrCreate()
+    globs['spark'] = spark
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.cogroup, globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF)
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 48a49c583f9c7..e6bd2eb31a844 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -32,7 +32,7 @@
 from pyspark.sql.udf import UDFRegistration
 from pyspark.sql.utils import install_exception_handler
 
-__all__ = ["SQLContext", "HiveContext"]
+__all__ = ["SQLContext"]
 
 
 class SQLContext(object):
@@ -325,24 +325,6 @@ def dropTempTable(self, tableName):
         """
         self.sparkSession.catalog.dropTempView(tableName)
 
-    @since(1.3)
-    def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
-        """Creates an external table based on the dataset in a data source.
-
-        It returns the DataFrame associated with the external table.
-
-        The data source is specified by the ``source`` and a set of ``options``.
-        If ``source`` is not specified, the default data source configured by
-        ``spark.sql.sources.default`` will be used.
-
-        Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
-        created external table.
-
-        :return: :class:`DataFrame`
-        """
-        return self.sparkSession.catalog.createExternalTable(
-            tableName, path, source, schema, **options)
-
     @ignore_unicode_prefix
     @since(1.0)
     def sql(self, sqlQuery):
@@ -466,53 +448,6 @@ def streams(self):
         return StreamingQueryManager(self._ssql_ctx.streams())
 
 
-class HiveContext(SQLContext):
-    """A variant of Spark SQL that integrates with data stored in Hive.
-
-    Configuration for Hive is read from ``hive-site.xml`` on the classpath.
-    It supports running both SQL and HiveQL commands.
-
-    :param sparkContext: The SparkContext to wrap.
-    :param jhiveContext: An optional JVM Scala HiveContext. If set, we do not instantiate a new
-        :class:`HiveContext` in the JVM, instead we make all calls to this object.
-
-    .. note:: Deprecated in 2.0.0. Use SparkSession.builder.enableHiveSupport().getOrCreate().
-    """
-
-    def __init__(self, sparkContext, jhiveContext=None):
-        warnings.warn(
-            "HiveContext is deprecated in Spark 2.0.0. Please use " +
-            "SparkSession.builder.enableHiveSupport().getOrCreate() instead.",
-            DeprecationWarning)
-        if jhiveContext is None:
-            sparkContext._conf.set("spark.sql.catalogImplementation", "hive")
-            sparkSession = SparkSession.builder._sparkContext(sparkContext).getOrCreate()
-        else:
-            sparkSession = SparkSession(sparkContext, jhiveContext.sparkSession())
-        SQLContext.__init__(self, sparkContext, sparkSession, jhiveContext)
-
-    @classmethod
-    def _createForTesting(cls, sparkContext):
-        """(Internal use only) Create a new HiveContext for testing.
-
-        All test code that touches HiveContext *must* go through this method. Otherwise,
-        you may end up launching multiple derby instances and encounter with incredibly
-        confusing error messages.
-        """
-        jsc = sparkContext._jsc.sc()
-        jtestHive = sparkContext._jvm.org.apache.spark.sql.hive.test.TestHiveContext(jsc, False)
-        return cls(sparkContext, jtestHive)
-
-    def refreshTable(self, tableName):
-        """Invalidate and refresh all the cached the metadata of the given
-        table. For performance reasons, Spark SQL or the external data source
-        library it uses might cache certain metadata about a table, such as the
-        location of blocks. When those change outside of Spark SQL, users should
-        call this function to invalidate the cache.
-        """
-        self._ssql_ctx.refreshTable(tableName)
-
-
 def _test():
     import os
     import doctest
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 398471234d2b7..03b37fa7d0d9b 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -520,16 +520,20 @@ def collect(self):
 
     @ignore_unicode_prefix
     @since(2.0)
-    def toLocalIterator(self):
+    def toLocalIterator(self, prefetchPartitions=False):
         """
         Returns an iterator that contains all of the rows in this :class:`DataFrame`.
         The iterator will consume as much memory as the largest partition in this DataFrame.
+        With prefetch it may consume up to the memory of the 2 largest partitions.
+
+        :param prefetchPartitions: If Spark should pre-fetch the next partition
+                                   before it is needed.
 
         >>> list(df.toLocalIterator())
         [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
         """
         with SCCallSiteSync(self._sc) as css:
-            sock_info = self._jdf.toPythonIterator()
+            sock_info = self._jdf.toPythonIterator(prefetchPartitions)
         return _local_iterator_from_socket(sock_info, BatchedSerializer(PickleSerializer()))
 
     @ignore_unicode_prefix
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index c7ff2882ed95a..95b78175d5561 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1969,7 +1969,7 @@ def arrays_overlap(a1, a2):
 def slice(x, start, length):
     """
     Collection function: returns an array containing  all the elements in `x` from index `start`
-    (or starting from the end if `start` is negative) with the specified `length`.
+    (array indices start at 1, or from the end if `start` is negative) with the specified `length`.
     >>> df = spark.createDataFrame([([1, 2, 3],), ([4, 5],)], ['x'])
     >>> df.select(slice(df.x, 2, 2).alias("sliced")).collect()
     [Row(sliced=[2, 3]), Row(sliced=[5])]
@@ -2052,11 +2052,12 @@ def element_at(col, extraction):
     [Row(element_at(data, 1)=u'a'), Row(element_at(data, 1)=None)]
 
     >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},), ({},)], ['data'])
-    >>> df.select(element_at(df.data, "a")).collect()
+    >>> df.select(element_at(df.data, lit("a"))).collect()
     [Row(element_at(data, a)=1.0), Row(element_at(data, a)=None)]
     """
     sc = SparkContext._active_spark_context
-    return Column(sc._jvm.functions.element_at(_to_java_column(col), extraction))
+    return Column(sc._jvm.functions.element_at(
+        _to_java_column(col), lit(extraction)._jc))  # noqa: F821 'lit' is dynamically defined.
 
 
 @since(2.4)
@@ -2814,6 +2815,8 @@ class PandasUDFType(object):
 
     GROUPED_MAP = PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
 
+    COGROUPED_MAP = PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF
+
     GROUPED_AGG = PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF
 
     MAP_ITER = PythonEvalType.SQL_MAP_PANDAS_ITER_UDF
@@ -3320,7 +3323,8 @@ def pandas_udf(f=None, returnType=None, functionType=None):
                          PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
                          PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
                          PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
-                         PythonEvalType.SQL_MAP_PANDAS_ITER_UDF]:
+                         PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
+                         PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF]:
         raise ValueError("Invalid functionType: "
                          "functionType must be one the values from PandasUDFType")
 
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index ec90ba905ef66..fcad64142485e 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -22,6 +22,7 @@
 from pyspark.sql.column import Column, _to_seq
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.types import *
+from pyspark.sql.cogroup import CoGroupedData
 
 __all__ = ["GroupedData"]
 
@@ -218,6 +219,15 @@ def pivot(self, pivot_col, values=None):
             jgd = self._jgd.pivot(pivot_col, values)
         return GroupedData(jgd, self._df)
 
+    @since(3.0)
+    def cogroup(self, other):
+        """
+        Cogroups this group with another group so that we can run cogrouped operations.
+
+        See :class:`CoGroupedData` for the operations that can be run.
+        """
+        return CoGroupedData(self, other)
+
     @since(2.3)
     def apply(self, udf):
         """
@@ -232,7 +242,7 @@ def apply(self, udf):
         The returned `pandas.DataFrame` can be of arbitrary length and its schema must match the
         returnType of the pandas udf.
 
-        .. note:: This function requires a full shuffle. all the data of a group will be loaded
+        .. note:: This function requires a full shuffle. All the data of a group will be loaded
             into memory, so the user should be aware of the potential OOM risk if data is skewed
             and certain groups are too large to fit in memory.
 
diff --git a/python/pyspark/sql/tests/test_appsubmit.py b/python/pyspark/sql/tests/test_appsubmit.py
deleted file mode 100644
index 99c0317cc45df..0000000000000
--- a/python/pyspark/sql/tests/test_appsubmit.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-import subprocess
-import tempfile
-
-import py4j
-
-from pyspark import SparkContext
-from pyspark.tests.test_appsubmit import SparkSubmitTests
-
-
-class HiveSparkSubmitTests(SparkSubmitTests):
-
-    @classmethod
-    def setUpClass(cls):
-        # get a SparkContext to check for availability of Hive
-        sc = SparkContext('local[4]', cls.__name__)
-        cls.hive_available = True
-        try:
-            sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
-        except py4j.protocol.Py4JError:
-            cls.hive_available = False
-        except TypeError:
-            cls.hive_available = False
-        finally:
-            # we don't need this SparkContext for the test
-            sc.stop()
-
-    def setUp(self):
-        super(HiveSparkSubmitTests, self).setUp()
-        if not self.hive_available:
-            self.skipTest("Hive is not available.")
-
-    def test_hivecontext(self):
-        # This test checks that HiveContext is using Hive metastore (SPARK-16224).
-        # It sets a metastore url and checks if there is a derby dir created by
-        # Hive metastore. If this derby dir exists, HiveContext is using
-        # Hive metastore.
-        metastore_path = os.path.join(tempfile.mkdtemp(), "spark16224_metastore_db")
-        metastore_URL = "jdbc:derby:;databaseName=" + metastore_path + ";create=true"
-        hive_site_dir = os.path.join(self.programDir, "conf")
-        hive_site_file = self.createTempFile("hive-site.xml", ("""
-            |<configuration>
-            |  <property>
-            |  <name>javax.jdo.option.ConnectionURL</name>
-            |  <value>%s</value>
-            |  </property>
-            |</configuration>
-            """ % metastore_URL).lstrip(), "conf")
-        script = self.createTempFile("test.py", """
-            |import os
-            |
-            |from pyspark.conf import SparkConf
-            |from pyspark.context import SparkContext
-            |from pyspark.sql import HiveContext
-            |
-            |conf = SparkConf()
-            |sc = SparkContext(conf=conf)
-            |hive_context = HiveContext(sc)
-            |print(hive_context.sql("show databases").collect())
-            """)
-        proc = subprocess.Popen(
-            self.sparkSubmit + ["--master", "local-cluster[1,1,1024]",
-                                "--driver-class-path", hive_site_dir, script],
-            stdout=subprocess.PIPE)
-        out, err = proc.communicate()
-        self.assertEqual(0, proc.returncode)
-        self.assertIn("default", out.decode('utf-8'))
-        self.assertTrue(os.path.exists(metastore_path))
-
-
-if __name__ == "__main__":
-    import unittest
-    from pyspark.sql.tests.test_appsubmit import *
-
-    try:
-        import xmlrunner
-        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
-    except ImportError:
-        testRunner = None
-    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py
index b2df3921e62a9..d57ebc48cf5e6 100644
--- a/python/pyspark/sql/tests/test_context.py
+++ b/python/pyspark/sql/tests/test_context.py
@@ -27,7 +27,7 @@
 
 import py4j
 
-from pyspark import HiveContext, Row
+from pyspark.sql import Row, SparkSession
 from pyspark.sql.types import *
 from pyspark.sql.window import Window
 from pyspark.testing.utils import ReusedPySparkTestCase
@@ -40,15 +40,20 @@ def setUpClass(cls):
         ReusedPySparkTestCase.setUpClass()
         cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
         cls.hive_available = True
+        cls.spark = None
         try:
             cls.sc._jvm.org.apache.hadoop.hive.conf.HiveConf()
         except py4j.protocol.Py4JError:
+            cls.tearDownClass()
             cls.hive_available = False
         except TypeError:
+            cls.tearDownClass()
             cls.hive_available = False
+        if cls.hive_available:
+            cls.spark = SparkSession.builder.enableHiveSupport().getOrCreate()
+
         os.unlink(cls.tempdir.name)
         if cls.hive_available:
-            cls.spark = HiveContext._createForTesting(cls.sc)
             cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
             cls.df = cls.sc.parallelize(cls.testData).toDF()
 
@@ -60,13 +65,16 @@ def setUp(self):
     def tearDownClass(cls):
         ReusedPySparkTestCase.tearDownClass()
         shutil.rmtree(cls.tempdir.name, ignore_errors=True)
+        if cls.spark is not None:
+            cls.spark.stop()
+            cls.spark = None
 
     def test_save_and_load_table(self):
         df = self.df
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
         df.write.saveAsTable("savedJsonTable", "json", "append", path=tmpPath)
-        actual = self.spark.createExternalTable("externalJsonTable", tmpPath, "json")
+        actual = self.spark.catalog.createTable("externalJsonTable", tmpPath, "json")
         self.assertEqual(sorted(df.collect()),
                          sorted(self.spark.sql("SELECT * FROM savedJsonTable").collect()))
         self.assertEqual(sorted(df.collect()),
@@ -76,7 +84,7 @@ def test_save_and_load_table(self):
 
         df.write.saveAsTable("savedJsonTable", "json", "overwrite", path=tmpPath)
         schema = StructType([StructField("value", StringType(), True)])
-        actual = self.spark.createExternalTable("externalJsonTable", source="json",
+        actual = self.spark.catalog.createTable("externalJsonTable", source="json",
                                                 schema=schema, path=tmpPath,
                                                 noUse="this options will not be used")
         self.assertEqual(sorted(df.collect()),
@@ -87,11 +95,11 @@ def test_save_and_load_table(self):
         self.spark.sql("DROP TABLE savedJsonTable")
         self.spark.sql("DROP TABLE externalJsonTable")
 
-        defaultDataSourceName = self.spark.getConf("spark.sql.sources.default",
-                                                   "org.apache.spark.sql.parquet")
+        defaultDataSourceName = self.spark.conf.get("spark.sql.sources.default",
+                                                    "org.apache.spark.sql.parquet")
         self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
         df.write.saveAsTable("savedJsonTable", path=tmpPath, mode="overwrite")
-        actual = self.spark.createExternalTable("externalJsonTable", path=tmpPath)
+        actual = self.spark.catalog.createTable("externalJsonTable", path=tmpPath)
         self.assertEqual(sorted(df.collect()),
                          sorted(self.spark.sql("SELECT * FROM savedJsonTable").collect()))
         self.assertEqual(sorted(df.collect()),
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index bc4ee8814f7af..90a5415c15c27 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -690,6 +690,12 @@ def test_to_local_iterator(self):
         expected = df.collect()
         self.assertEqual(expected, list(it))
 
+    def test_to_local_iterator_prefetch(self):
+        df = self.spark.range(8, numPartitions=4)
+        expected = df.collect()
+        it = df.toLocalIterator(prefetchPartitions=True)
+        self.assertEqual(expected, list(it))
+
     def test_to_local_iterator_not_fully_consumed(self):
         # SPARK-23961: toLocalIterator throws exception when not fully consumed
         # Create a DataFrame large enough so that write to socket will eventually block
diff --git a/python/pyspark/sql/tests/test_pandas_udf_cogrouped_map.py b/python/pyspark/sql/tests/test_pandas_udf_cogrouped_map.py
new file mode 100644
index 0000000000000..bc2265fc5fe19
--- /dev/null
+++ b/python/pyspark/sql/tests/test_pandas_udf_cogrouped_map.py
@@ -0,0 +1,275 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+import sys
+
+from pyspark.sql.functions import array, explode, col, lit, udf, sum, pandas_udf, PandasUDFType
+from pyspark.sql.types import DoubleType, StructType, StructField
+from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, have_pyarrow, \
+    pandas_requirement_message, pyarrow_requirement_message
+from pyspark.testing.utils import QuietTest
+
+if have_pandas:
+    import pandas as pd
+    from pandas.util.testing import assert_frame_equal, assert_series_equal
+
+if have_pyarrow:
+    import pyarrow as pa
+
+
+# Tests below use pd.DataFrame.assign that will infer mixed types (unicode/str) for column names
+# From kwargs w/ Python 2, so need to set check_column_type=False and avoid this check
+_check_column_type = sys.version >= '3'
+
+
+@unittest.skipIf(
+    not have_pandas or not have_pyarrow,
+    pandas_requirement_message or pyarrow_requirement_message)
+class CoGroupedMapPandasUDFTests(ReusedSQLTestCase):
+
+    @property
+    def data1(self):
+        return self.spark.range(10).toDF('id') \
+            .withColumn("ks", array([lit(i) for i in range(20, 30)])) \
+            .withColumn("k", explode(col('ks')))\
+            .withColumn("v", col('k') * 10)\
+            .drop('ks')
+
+    @property
+    def data2(self):
+        return self.spark.range(10).toDF('id') \
+            .withColumn("ks", array([lit(i) for i in range(20, 30)])) \
+            .withColumn("k", explode(col('ks'))) \
+            .withColumn("v2", col('k') * 100) \
+            .drop('ks')
+
+    def test_simple(self):
+        self._test_merge(self.data1, self.data2)
+
+    def test_left_group_empty(self):
+        left = self.data1.where(col("id") % 2 == 0)
+        self._test_merge(left, self.data2)
+
+    def test_right_group_empty(self):
+        right = self.data2.where(col("id") % 2 == 0)
+        self._test_merge(self.data1, right)
+
+    def test_different_schemas(self):
+        right = self.data2.withColumn('v3', lit('a'))
+        self._test_merge(self.data1, right, 'id long, k int, v int, v2 int, v3 string')
+
+    def test_complex_group_by(self):
+        left = pd.DataFrame.from_dict({
+            'id': [1, 2, 3],
+            'k':  [5, 6, 7],
+            'v': [9, 10, 11]
+        })
+
+        right = pd.DataFrame.from_dict({
+            'id': [11, 12, 13],
+            'k': [5, 6, 7],
+            'v2': [90, 100, 110]
+        })
+
+        left_gdf = self.spark\
+            .createDataFrame(left)\
+            .groupby(col('id') % 2 == 0)
+
+        right_gdf = self.spark \
+            .createDataFrame(right) \
+            .groupby(col('id') % 2 == 0)
+
+        @pandas_udf('k long, v long, v2 long', PandasUDFType.COGROUPED_MAP)
+        def merge_pandas(l, r):
+            return pd.merge(l[['k', 'v']], r[['k', 'v2']], on=['k'])
+
+        result = left_gdf \
+            .cogroup(right_gdf) \
+            .apply(merge_pandas) \
+            .sort(['k']) \
+            .toPandas()
+
+        expected = pd.DataFrame.from_dict({
+            'k': [5, 6, 7],
+            'v': [9, 10, 11],
+            'v2': [90, 100, 110]
+        })
+
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
+
+    def test_empty_group_by(self):
+        left = self.data1
+        right = self.data2
+
+        @pandas_udf('id long, k int, v int, v2 int', PandasUDFType.COGROUPED_MAP)
+        def merge_pandas(l, r):
+            return pd.merge(l, r, on=['id', 'k'])
+
+        result = left.groupby().cogroup(right.groupby())\
+            .apply(merge_pandas) \
+            .sort(['id', 'k']) \
+            .toPandas()
+
+        left = left.toPandas()
+        right = right.toPandas()
+
+        expected = pd \
+            .merge(left, right, on=['id', 'k']) \
+            .sort_values(by=['id', 'k'])
+
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
+
+    def test_mixed_scalar_udfs_followed_by_cogrouby_apply(self):
+        df = self.spark.range(0, 10).toDF('v1')
+        df = df.withColumn('v2', udf(lambda x: x + 1, 'int')(df['v1'])) \
+            .withColumn('v3', pandas_udf(lambda x: x + 2, 'int')(df['v1']))
+
+        result = df.groupby().cogroup(df.groupby())\
+            .apply(pandas_udf(lambda x, y: pd.DataFrame([(x.sum().sum(), y.sum().sum())]),
+                              'sum1 int, sum2 int',
+                              PandasUDFType.COGROUPED_MAP)).collect()
+
+        self.assertEquals(result[0]['sum1'], 165)
+        self.assertEquals(result[0]['sum2'], 165)
+
+    def test_with_key_left(self):
+        self._test_with_key(self.data1, self.data1, isLeft=True)
+
+    def test_with_key_right(self):
+        self._test_with_key(self.data1, self.data1, isLeft=False)
+
+    def test_with_key_left_group_empty(self):
+        left = self.data1.where(col("id") % 2 == 0)
+        self._test_with_key(left, self.data1, isLeft=True)
+
+    def test_with_key_right_group_empty(self):
+        right = self.data1.where(col("id") % 2 == 0)
+        self._test_with_key(self.data1, right, isLeft=False)
+
+    def test_with_key_complex(self):
+
+        @pandas_udf('id long, k int, v int, key boolean', PandasUDFType.COGROUPED_MAP)
+        def left_assign_key(key, l, _):
+            return l.assign(key=key[0])
+
+        result = self.data1 \
+            .groupby(col('id') % 2 == 0)\
+            .cogroup(self.data2.groupby(col('id') % 2 == 0)) \
+            .apply(left_assign_key) \
+            .sort(['id', 'k']) \
+            .toPandas()
+
+        expected = self.data1.toPandas()
+        expected = expected.assign(key=expected.id % 2 == 0)
+
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
+
+    def test_wrong_return_type(self):
+        with QuietTest(self.sc):
+            with self.assertRaisesRegexp(
+                    NotImplementedError,
+                    'Invalid returnType.*cogrouped map Pandas UDF.*MapType'):
+                pandas_udf(
+                    lambda l, r: l,
+                    'id long, v map<int, int>',
+                    PandasUDFType.COGROUPED_MAP)
+
+    def test_wrong_args(self):
+        # Test that we get a sensible exception invalid values passed to apply
+        left = self.data1
+        right = self.data2
+        with QuietTest(self.sc):
+            # Function rather than a udf
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+                left.groupby('id').cogroup(right.groupby('id')).apply(lambda l, r: l)
+
+            # Udf missing return type
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+                left.groupby('id').cogroup(right.groupby('id'))\
+                    .apply(udf(lambda l, r: l, DoubleType()))
+
+            # Pass in expression rather than udf
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+                left.groupby('id').cogroup(right.groupby('id')).apply(left.v + 1)
+
+            # Zero arg function
+            with self.assertRaisesRegexp(ValueError, 'Invalid function'):
+                left.groupby('id').cogroup(right.groupby('id'))\
+                    .apply(pandas_udf(lambda: 1, StructType([StructField("d", DoubleType())])))
+
+            # Udf without PandasUDFType
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf'):
+                left.groupby('id').cogroup(right.groupby('id'))\
+                    .apply(pandas_udf(lambda x, y: x, DoubleType()))
+
+            # Udf with incorrect PandasUDFType
+            with self.assertRaisesRegexp(ValueError, 'Invalid udf.*COGROUPED_MAP'):
+                left.groupby('id').cogroup(right.groupby('id'))\
+                    .apply(pandas_udf(lambda x, y: x, DoubleType(), PandasUDFType.SCALAR))
+
+    @staticmethod
+    def _test_with_key(left, right, isLeft):
+
+        @pandas_udf('id long, k int, v int, key long', PandasUDFType.COGROUPED_MAP)
+        def right_assign_key(key, l, r):
+            return l.assign(key=key[0]) if isLeft else r.assign(key=key[0])
+
+        result = left \
+            .groupby('id') \
+            .cogroup(right.groupby('id')) \
+            .apply(right_assign_key) \
+            .toPandas()
+
+        expected = left.toPandas() if isLeft else right.toPandas()
+        expected = expected.assign(key=expected.id)
+
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
+
+    @staticmethod
+    def _test_merge(left, right, output_schema='id long, k int, v int, v2 int'):
+
+        @pandas_udf(output_schema, PandasUDFType.COGROUPED_MAP)
+        def merge_pandas(l, r):
+            return pd.merge(l, r, on=['id', 'k'])
+
+        result = left \
+            .groupby('id') \
+            .cogroup(right.groupby('id')) \
+            .apply(merge_pandas)\
+            .sort(['id', 'k']) \
+            .toPandas()
+
+        left = left.toPandas()
+        right = right.toPandas()
+
+        expected = pd \
+            .merge(left, right, on=['id', 'k']) \
+            .sort_values(by=['id', 'k'])
+
+        assert_frame_equal(expected, result, check_column_type=_check_column_type)
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_pandas_udf_cogrouped_map import *
+
+    try:
+        import xmlrunner
+        testRunner = xmlrunner.XMLTestRunner(output='target/test-reports', verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
index adbe2d103ade0..8918d5ac0cdda 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_grouped_map.py
@@ -37,14 +37,9 @@
     import pyarrow as pa
 
 
-"""
-Tests below use pd.DataFrame.assign that will infer mixed types (unicode/str) for column names
-from kwargs w/ Python 2, so need to set check_column_type=False and avoid this check
-"""
-if sys.version < '3':
-    _check_column_type = False
-else:
-    _check_column_type = True
+# Tests below use pd.DataFrame.assign that will infer mixed types (unicode/str) for column names
+# from kwargs w/ Python 2, so need to set check_column_type=False and avoid this check
+_check_column_type = sys.version >= '3'
 
 
 @unittest.skipIf(
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
index d254508e5d35b..d0eeaa5c62182 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/test_pandas_udf_scalar.py
@@ -1110,7 +1110,7 @@ def test_datasource_with_udf(self):
                 .format("org.apache.spark.sql.sources.SimpleScanSource") \
                 .option('from', 0).option('to', 1).load().toDF('i')
             datasource_v2_df = self.spark.read \
-                .format("org.apache.spark.sql.sources.v2.SimpleDataSourceV2") \
+                .format("org.apache.spark.sql.connector.SimpleDataSourceV2") \
                 .load().toDF('i', 'j')
 
             c1 = pandas_udf(lambda x: x + 1, 'int')(lit(1))
diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py
index ea2a686cddaa2..052a5b2835245 100644
--- a/python/pyspark/sql/tests/test_serde.py
+++ b/python/pyspark/sql/tests/test_serde.py
@@ -132,6 +132,10 @@ def test_int_array_serialization(self):
         df = self.spark.createDataFrame(data, "array<integer>")
         self.assertEqual(len(list(filter(lambda r: None in r.value, df.collect()))), 0)
 
+    def test_bytes_as_binary_type(self):
+        df = self.spark.createDataFrame([[b"abcd"]], "col binary")
+        self.assertEqual(df.first().col, bytearray(b'abcd'))
+
 
 if __name__ == "__main__":
     import unittest
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 4a0a3760a556c..c274dc73225b6 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -552,7 +552,7 @@ def test_datasource_with_udf(self):
                 .format("org.apache.spark.sql.sources.SimpleScanSource") \
                 .option('from', 0).option('to', 1).load().toDF('i')
             datasource_v2_df = self.spark.read \
-                .format("org.apache.spark.sql.sources.v2.SimpleDataSourceV2") \
+                .format("org.apache.spark.sql.connector.SimpleDataSourceV2") \
                 .load().toDF('i', 'j')
 
             c1 = udf(lambda x: x + 1, 'int')(lit(1))
diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py
index fdadd5631cb73..072ea08085fba 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -26,6 +27,12 @@ def test_capture_analysis_exception(self):
         self.assertRaises(AnalysisException, lambda: self.spark.sql("select abc"))
         self.assertRaises(AnalysisException, lambda: self.df.selectExpr("a + b"))
 
+    def test_capture_user_friendly_exception(self):
+        try:
+            self.spark.sql("select `中文字段`")
+        except AnalysisException as e:
+            self.assertRegexpMatches(str(e), "cannot resolve '`中文字段`'")
+
     def test_capture_parse_exception(self):
         self.assertRaises(ParseException, lambda: self.spark.sql("abc"))
 
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 0c7f4ce3ddc67..81fdd41435694 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1190,7 +1190,7 @@ def convert_struct(obj):
     DoubleType: (float,),
     DecimalType: (decimal.Decimal,),
     StringType: (str, unicode),
-    BinaryType: (bytearray,),
+    BinaryType: (bytearray, bytes),
     DateType: (datetime.date, datetime.datetime),
     TimestampType: (datetime.datetime,),
     ArrayType: (list, tuple, array),
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index 188ec2634974a..c4d7c1ed205f1 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -42,6 +42,7 @@ def _create_udf(f, returnType, evalType):
     if evalType in (PythonEvalType.SQL_SCALAR_PANDAS_UDF,
                     PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
                     PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
+                    PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
                     PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
                     PythonEvalType.SQL_MAP_PANDAS_ITER_UDF):
 
@@ -65,6 +66,13 @@ def _create_udf(f, returnType, evalType):
                 "Invalid function: pandas_udfs with function type GROUPED_MAP "
                 "must take either one argument (data) or two arguments (key, data).")
 
+        if evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF \
+                and len(argspec.args) not in (2, 3):
+            raise ValueError(
+                "Invalid function: pandas_udfs with function type COGROUPED_MAP "
+                "must take either two arguments (left, right) "
+                "or three arguments (key, left, right).")
+
     # Set the name of the UserDefinedFunction object to be the name of function f
     udf_obj = UserDefinedFunction(
         f, returnType=returnType, name=None, evalType=evalType, deterministic=True)
@@ -147,6 +155,17 @@ def returnType(self):
             else:
                 raise TypeError("Invalid returnType for map iterator Pandas "
                                 "UDFs: returnType must be a StructType.")
+        elif self.evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
+            if isinstance(self._returnType_placeholder, StructType):
+                try:
+                    to_arrow_type(self._returnType_placeholder)
+                except TypeError:
+                    raise NotImplementedError(
+                        "Invalid returnType with cogrouped map Pandas UDFs: "
+                        "%s is not supported" % str(self._returnType_placeholder))
+            else:
+                raise TypeError("Invalid returnType for cogrouped map Pandas "
+                                "UDFs: returnType must be a StructType.")
         elif self.evalType == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
             try:
                 # StructType is not yet allowed as a return type, explicitly check here to fail fast
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index c30cc1482750a..83afafdd8b138 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -16,6 +16,10 @@
 #
 
 import py4j
+import sys
+
+if sys.version_info.major >= 3:
+    unicode = str
 
 
 class CapturedException(Exception):
@@ -25,7 +29,12 @@ def __init__(self, desc, stackTrace, cause=None):
         self.cause = convert_exception(cause) if cause is not None else None
 
     def __str__(self):
-        return repr(self.desc)
+        desc = self.desc
+        # encode unicode instance for python2 for human readable description
+        if sys.version_info.major < 3 and isinstance(desc, unicode):
+            return str(desc.encode('utf-8'))
+        else:
+            return str(desc)
 
 
 class AnalysisException(CapturedException):
diff --git a/python/pyspark/streaming/kinesis.py b/python/pyspark/streaming/kinesis.py
index 4ed9f2a40c3a7..729ec97505aad 100644
--- a/python/pyspark/streaming/kinesis.py
+++ b/python/pyspark/streaming/kinesis.py
@@ -80,7 +80,6 @@ def createStream(ssc, kinesisAppName, streamName, endpointUrl, regionName,
         jduration = ssc._jduration(checkpointInterval)
 
         try:
-            # Use KinesisUtilsPythonHelper to access Scala's KinesisUtils
             helper = ssc._jvm.org.apache.spark.streaming.kinesis.KinesisUtilsPythonHelper()
         except TypeError as e:
             if str(e) == "'JavaPackage' object is not callable":
diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py
index bff080362085f..e7a7971dfc9a6 100644
--- a/python/pyspark/tests/test_rdd.py
+++ b/python/pyspark/tests/test_rdd.py
@@ -14,11 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from datetime import datetime, timedelta
 import hashlib
 import os
 import random
 import sys
 import tempfile
+import time
 from glob import glob
 
 from py4j.protocol import Py4JJavaError
@@ -68,6 +70,26 @@ def test_to_localiterator(self):
         it2 = rdd2.toLocalIterator()
         self.assertEqual([1, 2, 3], sorted(it2))
 
+    def test_to_localiterator_prefetch(self):
+        # Test that we fetch the next partition in parallel
+        # We do this by returning the current time and:
+        # reading the first elem, waiting, and reading the second elem
+        # If not in parallel then these would be at different times
+        # But since they are being computed in parallel we see the time
+        # is "close enough" to the same.
+        rdd = self.sc.parallelize(range(2), 2)
+        times1 = rdd.map(lambda x: datetime.now())
+        times2 = rdd.map(lambda x: datetime.now())
+        times_iter_prefetch = times1.toLocalIterator(prefetchPartitions=True)
+        times_iter = times2.toLocalIterator(prefetchPartitions=False)
+        times_prefetch_head = next(times_iter_prefetch)
+        times_head = next(times_iter)
+        time.sleep(2)
+        times_next = next(times_iter)
+        times_prefetch_next = next(times_iter_prefetch)
+        self.assertTrue(times_next - times_head >= timedelta(seconds=2))
+        self.assertTrue(times_prefetch_next - times_prefetch_head < timedelta(seconds=1))
+
     def test_save_as_textfile_with_unicode(self):
         # Regression test for SPARK-970
         x = u"\u00A1Hola, mundo!"
diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index 18fde17f4a064..ccbe21f3a6f38 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -150,6 +151,20 @@ def test_with_different_versions_of_python(self):
         finally:
             self.sc.pythonVer = version
 
+    def test_python_exception_non_hanging(self):
+        # SPARK-21045: exceptions with no ascii encoding shall not hanging PySpark.
+        try:
+            def f():
+                raise Exception("exception with 中 and \xd6\xd0")
+
+            self.sc.parallelize([1]).map(lambda x: f()).count()
+        except Py4JJavaError as e:
+            if sys.version_info.major < 3:
+                # we have to use unicode here to avoid UnicodeDecodeError
+                self.assertRegexpMatches(unicode(e).encode("utf-8"), "exception with 中")
+            else:
+                self.assertRegexpMatches(str(e), "exception with 中")
+
 
 class WorkerReuseTest(PySparkTestCase):
 
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index d0ecd43ead5a6..ad4cf7c9b1c8b 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -18,6 +18,7 @@
 
 import re
 import sys
+import traceback
 import inspect
 from py4j.protocol import Py4JJavaError
 
@@ -62,6 +63,11 @@ def _get_argspec(f):
     return argspec
 
 
+def print_exec(stream):
+    ei = sys.exc_info()
+    traceback.print_exception(ei[0], ei[1], ei[2], None, stream)
+
+
 class VersionUtils(object):
     """
     Provides utility method to determine Spark versions with given input string.
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 7f38c27360ed9..698193d6bdd87 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -39,7 +39,7 @@
 from pyspark.rdd import PythonEvalType
 from pyspark.serializers import write_with_length, write_int, read_long, read_bool, \
     write_long, read_int, SpecialLengths, UTF8Deserializer, PickleSerializer, \
-    BatchedSerializer, ArrowStreamPandasUDFSerializer
+    BatchedSerializer, ArrowStreamPandasUDFSerializer, CogroupUDFSerializer
 from pyspark.sql.types import to_arrow_type, StructType
 from pyspark.util import _get_argspec, fail_on_stopiteration
 from pyspark import shuffle
@@ -121,6 +121,33 @@ def verify_result_type(result):
                                  map(verify_result_type, f(*iterator)))
 
 
+def wrap_cogrouped_map_pandas_udf(f, return_type, argspec):
+
+    def wrapped(left_key_series, left_value_series, right_key_series, right_value_series):
+        import pandas as pd
+
+        left_df = pd.concat(left_value_series, axis=1)
+        right_df = pd.concat(right_value_series, axis=1)
+
+        if len(argspec.args) == 2:
+            result = f(left_df, right_df)
+        elif len(argspec.args) == 3:
+            key_series = left_key_series if not left_df.empty else right_key_series
+            key = tuple(s[0] for s in key_series)
+            result = f(key, left_df, right_df)
+        if not isinstance(result, pd.DataFrame):
+            raise TypeError("Return type of the user-defined function should be "
+                            "pandas.DataFrame, but is {}".format(type(result)))
+        if not len(result.columns) == len(return_type):
+            raise RuntimeError(
+                "Number of columns of the returned pandas.DataFrame "
+                "doesn't match specified schema. "
+                "Expected: {} Actual: {}".format(len(return_type), len(result.columns)))
+        return result
+
+    return lambda kl, vl, kr, vr: [(wrapped(kl, vl, kr, vr), to_arrow_type(return_type))]
+
+
 def wrap_grouped_map_pandas_udf(f, return_type, argspec):
 
     def wrapped(key_series, value_series):
@@ -244,6 +271,9 @@ def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index):
     elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
         argspec = _get_argspec(chained_func)  # signature was lost when wrapping it
         return arg_offsets, wrap_grouped_map_pandas_udf(func, return_type, argspec)
+    elif eval_type == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
+        argspec = _get_argspec(chained_func)  # signature was lost when wrapping it
+        return arg_offsets, wrap_cogrouped_map_pandas_udf(func, return_type, argspec)
     elif eval_type == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
         return arg_offsets, wrap_grouped_agg_pandas_udf(func, return_type)
     elif eval_type == PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF:
@@ -258,6 +288,7 @@ def read_udfs(pickleSer, infile, eval_type):
     runner_conf = {}
 
     if eval_type in (PythonEvalType.SQL_SCALAR_PANDAS_UDF,
+                     PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
                      PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
                      PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
                      PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
@@ -280,13 +311,16 @@ def read_udfs(pickleSer, infile, eval_type):
             "spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName", "true")\
             .lower() == "true"
 
-        # Scalar Pandas UDF handles struct type arguments as pandas DataFrames instead of
-        # pandas Series. See SPARK-27240.
-        df_for_struct = (eval_type == PythonEvalType.SQL_SCALAR_PANDAS_UDF or
-                         eval_type == PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF or
-                         eval_type == PythonEvalType.SQL_MAP_PANDAS_ITER_UDF)
-        ser = ArrowStreamPandasUDFSerializer(timezone, safecheck, assign_cols_by_name,
-                                             df_for_struct)
+        if eval_type == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
+            ser = CogroupUDFSerializer(timezone, safecheck, assign_cols_by_name)
+        else:
+            # Scalar Pandas UDF handles struct type arguments as pandas DataFrames instead of
+            # pandas Series. See SPARK-27240.
+            df_for_struct = (eval_type == PythonEvalType.SQL_SCALAR_PANDAS_UDF or
+                             eval_type == PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF or
+                             eval_type == PythonEvalType.SQL_MAP_PANDAS_ITER_UDF)
+            ser = ArrowStreamPandasUDFSerializer(timezone, safecheck, assign_cols_by_name,
+                                                 df_for_struct)
     else:
         ser = BatchedSerializer(PickleSerializer(), 100)
 
@@ -343,6 +377,32 @@ def map_batch(batch):
         # profiling is not supported for UDF
         return func, None, ser, ser
 
+    def extract_key_value_indexes(grouped_arg_offsets):
+        """
+        Helper function to extract the key and value indexes from arg_offsets for the grouped and
+        cogrouped pandas udfs. See BasePandasGroupExec.resolveArgOffsets for equivalent scala code.
+
+        :param grouped_arg_offsets:  List containing the key and value indexes of columns of the
+            DataFrames to be passed to the udf. It consists of n repeating groups where n is the
+            number of DataFrames.  Each group has the following format:
+                group[0]: length of group
+                group[1]: length of key indexes
+                group[2.. group[1] +2]: key attributes
+                group[group[1] +3 group[0]]: value attributes
+        """
+        parsed = []
+        idx = 0
+        while idx < len(grouped_arg_offsets):
+            offsets_len = grouped_arg_offsets[idx]
+            idx += 1
+            offsets = grouped_arg_offsets[idx: idx + offsets_len]
+            split_index = offsets[0] + 1
+            offset_keys = offsets[1: split_index]
+            offset_values = offsets[split_index:]
+            parsed.append([offset_keys, offset_values])
+            idx += offsets_len
+        return parsed
+
     udfs = {}
     call_udf = []
     mapper_str = ""
@@ -359,10 +419,24 @@ def map_batch(batch):
         arg_offsets, udf = read_single_udf(
             pickleSer, infile, eval_type, runner_conf, udf_index=0)
         udfs['f'] = udf
-        split_offset = arg_offsets[0] + 1
-        arg0 = ["a[%d]" % o for o in arg_offsets[1: split_offset]]
-        arg1 = ["a[%d]" % o for o in arg_offsets[split_offset:]]
-        mapper_str = "lambda a: f([%s], [%s])" % (", ".join(arg0), ", ".join(arg1))
+        parsed_offsets = extract_key_value_indexes(arg_offsets)
+        keys = ["a[%d]" % (o,) for o in parsed_offsets[0][0]]
+        vals = ["a[%d]" % (o, ) for o in parsed_offsets[0][1]]
+        mapper_str = "lambda a: f([%s], [%s])" % (", ".join(keys), ", ".join(vals))
+    elif eval_type == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
+        # We assume there is only one UDF here because cogrouped map doesn't
+        # support combining multiple UDFs.
+        assert num_udfs == 1
+        arg_offsets, udf = read_single_udf(
+            pickleSer, infile, eval_type, runner_conf, udf_index=0)
+        udfs['f'] = udf
+        parsed_offsets = extract_key_value_indexes(arg_offsets)
+        df1_keys = ["a[0][%d]" % (o, ) for o in parsed_offsets[0][0]]
+        df1_vals = ["a[0][%d]" % (o, ) for o in parsed_offsets[0][1]]
+        df2_keys = ["a[1][%d]" % (o, ) for o in parsed_offsets[1][0]]
+        df2_vals = ["a[1][%d]" % (o, ) for o in parsed_offsets[1][1]]
+        mapper_str = "lambda a: f([%s], [%s], [%s], [%s])" % (
+            ", ".join(df1_keys), ", ".join(df1_vals), ", ".join(df2_keys), ", ".join(df2_vals))
     else:
         # Create function like this:
         #   lambda a: (f0(a[0]), f1(a[1], a[2]), f2(a[3]))
@@ -524,8 +598,15 @@ def process():
             process()
     except Exception:
         try:
+            exc_info = traceback.format_exc()
+            if isinstance(exc_info, bytes):
+                # exc_info may contains other encoding bytes, replace the invalid bytes and convert
+                # it back to utf-8 again
+                exc_info = exc_info.decode("utf-8", "replace").encode("utf-8")
+            else:
+                exc_info = exc_info.encode("utf-8")
             write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
-            write_with_length(traceback.format_exc().encode("utf-8"), outfile)
+            write_with_length(exc_info, outfile)
         except IOError:
             # JVM close the socket
             pass
diff --git a/python/run-tests.py b/python/run-tests.py
index 3ad661800b908..b1119b044d71f 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -117,7 +117,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
                     log_file.writelines(per_test_output)
                 per_test_output.seek(0)
                 for line in per_test_output:
-                    decoded_line = line.decode()
+                    decoded_line = line.decode("utf-8", "replace")
                     if not re.match('[0-9]+', decoded_line):
                         print(decoded_line, end='')
                 per_test_output.close()
@@ -134,7 +134,7 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
             per_test_output.seek(0)
             # Here expects skipped test output from unittest when verbosity level is
             # 2 (or --verbose option is enabled).
-            decoded_lines = map(lambda line: line.decode(), iter(per_test_output))
+            decoded_lines = map(lambda line: line.decode("utf-8", "replace"), iter(per_test_output))
             skipped_tests = list(filter(
                 lambda line: re.search(r'test_.* \(pyspark\..*\) ... (skip|SKIP)', line),
                 decoded_lines))
diff --git a/repl/src/main/scala/org/apache/spark/repl/Main.scala b/repl/src/main/scala/org/apache/spark/repl/Main.scala
index e4ddcef9772e4..a68b112ed2b96 100644
--- a/repl/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/Main.scala
@@ -53,7 +53,7 @@ object Main extends Logging {
     // scalastyle:on println
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     isShellSession = true
     doMain(args, new SparkILoop)
   }
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index ed3c672e7dafa..92984ed45f828 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -89,7 +89,7 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter)
   }
 
   /** Print a welcome message */
-  override def printWelcome() {
+  override def printWelcome(): Unit = {
     import org.apache.spark.SPARK_VERSION
     echo("""Welcome to
       ____              __
diff --git a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
index 1d2e8acff9a3a..5428fa4ee9df7 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ExecutorClassLoaderSuite.scala
@@ -36,7 +36,7 @@ import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
 import org.scalatest.BeforeAndAfterAll
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.TestUtils.JavaSourceFromString
@@ -58,7 +58,7 @@ class ExecutorClassLoaderSuite
   var url1: String = _
   var urls2: Array[URL] = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     tempDir1 = Utils.createTempDir()
     tempDir2 = Utils.createTempDir()
@@ -71,7 +71,7 @@ class ExecutorClassLoaderSuite
     parentClassNames.foreach(TestUtils.createCompiledClass(_, tempDir2, "2"))
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       Utils.deleteRecursively(tempDir1)
       Utils.deleteRecursively(tempDir2)
@@ -272,7 +272,8 @@ class ExecutorClassLoaderSuite
         assert(e.getMessage.contains("ThisIsAClassName"))
         // RemoteClassLoaderError must not be LinkageError nor ClassNotFoundException. Otherwise,
         // JVM will cache it and doesn't retry to load a class.
-        assert(!e.isInstanceOf[LinkageError] && !e.isInstanceOf[ClassNotFoundException])
+        assert(!(classOf[LinkageError].isAssignableFrom(e.getClass)))
+        assert(!(classOf[ClassNotFoundException].isAssignableFrom(e.getClass)))
       } finally {
         rpcEnv.shutdown()
         rpcEnv.awaitTermination()
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 38e3fc4f93ae0..ea8254e36dd9b 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -72,13 +72,13 @@ class ReplSuite extends SparkFunSuite with BeforeAndAfterAll {
   def runInterpreterInPasteMode(master: String, input: String): String =
     runInterpreter(master, ":paste\n" + input + 4.toChar) // 4 is the ascii code of CTRL + D
 
-  def assertContains(message: String, output: String) {
+  def assertContains(message: String, output: String): Unit = {
     val isContain = output.contains(message)
     assert(isContain,
       "Interpreter output did not contain '" + message + "':\n" + output)
   }
 
-  def assertDoesNotContain(message: String, output: String) {
+  def assertDoesNotContain(message: String, output: String): Unit = {
     val isContain = output.contains(message)
     assert(!isContain,
       "Interpreter output contained '" + message + "':\n" + output)
diff --git a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
index 5b0ad4249754a..4795306692f7a 100644
--- a/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/SingletonReplSuite.scala
@@ -95,13 +95,13 @@ class SingletonReplSuite extends SparkFunSuite {
     out.getBuffer.substring(currentOffset)
   }
 
-  def assertContains(message: String, output: String) {
+  def assertContains(message: String, output: String): Unit = {
     val isContain = output.contains(message)
     assert(isContain,
       "Interpreter output did not contain '" + message + "':\n" + output)
   }
 
-  def assertDoesNotContain(message: String, output: String) {
+  def assertDoesNotContain(message: String, output: String): Unit = {
     val isContain = output.contains(message)
     assert(!isContain,
       "Interpreter output contained '" + message + "':\n" + output)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStore.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStore.scala
index d50ea85d3757b..c175308590964 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStore.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStore.scala
@@ -22,7 +22,7 @@ private[spark] trait ExecutorPodsSnapshotsStore {
 
   def addSubscriber
       (processBatchIntervalMillis: Long)
-      (onNewSnapshots: Seq[ExecutorPodsSnapshot] => Unit)
+      (onNewSnapshots: Seq[ExecutorPodsSnapshot] => Unit): Unit
 
   def stop(): Unit
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
index aa421be6e8412..2cc7f8e21bad0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
@@ -22,7 +22,7 @@ import io.fabric8.kubernetes.client.dsl.PodResource
 import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations}
 import org.mockito.Mockito.{verify, when}
 import org.scalatest.BeforeAndAfter
-import org.scalatest.mockito.MockitoSugar._
+import org.scalatestplus.mockito.MockitoSugar._
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.k8s._
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index cd3e8b234c189..84d99115a93ac 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -141,7 +141,7 @@
           <reportsDirectory>${project.build.directory}/surefire-reports</reportsDirectory>
           <junitxml>.</junitxml>
           <filereports>SparkTestSuite.txt</filereports>
-          <argLine>-ea -Xmx4g -XX:ReservedCodeCacheSize=512m ${extraScalaTestArgs}</argLine>
+          <argLine>-ea -Xmx4g -XX:ReservedCodeCacheSize=1g ${extraScalaTestArgs}</argLine>
           <stderr/>
           <systemProperties>
             <log4j.configuration>file:src/test/resources/log4j.properties</log4j.configuration>
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index b6f3484a062c8..7181774b9f17e 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -224,9 +224,20 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
   }
 
   private def getServiceUrl(serviceName: String): String = {
+    val fuzzyUrlMatcher = """^(.*?)([a-zA-Z]+://.*?)(\s*)$""".r
     Eventually.eventually(TIMEOUT, INTERVAL) {
       // ns is always available either random or provided by the user
-      Minikube.minikubeServiceAction(serviceName, "-n", kubernetesTestComponents.namespace, "--url")
+      val rawUrl = Minikube.minikubeServiceAction(
+        serviceName, "-n", kubernetesTestComponents.namespace, "--url")
+      val url = rawUrl match {
+        case fuzzyUrlMatcher(junk, url, extra) =>
+          logDebug(s"Service url matched junk ${junk} - url ${url} - extra ${extra}")
+          url
+        case _ =>
+          logWarning(s"Response from minikube ${rawUrl} did not match URL regex")
+          rawUrl
+      }
+      url
     }
   }
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala
index cd61ea1040f35..54a9dbf07816c 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SecretsTestsSuite.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest
 
+import java.util.Locale
+
 import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model.{Pod, SecretBuilder}
@@ -57,11 +59,17 @@ private[spark] trait SecretsTestsSuite { k8sSuite: KubernetesSuite =>
     createTestSecret()
     sparkAppConf
       .set(s"spark.kubernetes.driver.secrets.$ENV_SECRET_NAME", SECRET_MOUNT_PATH)
-      .set(s"spark.kubernetes.driver.secretKeyRef.USERNAME", s"$ENV_SECRET_NAME:username")
-      .set(s"spark.kubernetes.driver.secretKeyRef.PASSWORD", s"$ENV_SECRET_NAME:password")
+      .set(
+        s"spark.kubernetes.driver.secretKeyRef.${ENV_SECRET_KEY_1_CAP}",
+        s"$ENV_SECRET_NAME:${ENV_SECRET_KEY_1}")
+      .set(
+        s"spark.kubernetes.driver.secretKeyRef.${ENV_SECRET_KEY_2_CAP}",
+        s"$ENV_SECRET_NAME:${ENV_SECRET_KEY_2}")
       .set(s"spark.kubernetes.executor.secrets.$ENV_SECRET_NAME", SECRET_MOUNT_PATH)
-      .set(s"spark.kubernetes.executor.secretKeyRef.USERNAME", s"$ENV_SECRET_NAME:username")
-      .set(s"spark.kubernetes.executor.secretKeyRef.PASSWORD", s"$ENV_SECRET_NAME:password")
+      .set(s"spark.kubernetes.executor.secretKeyRef.${ENV_SECRET_KEY_1_CAP}",
+        s"${ENV_SECRET_NAME}:$ENV_SECRET_KEY_1")
+      .set(s"spark.kubernetes.executor.secretKeyRef.${ENV_SECRET_KEY_2_CAP}",
+        s"${ENV_SECRET_NAME}:$ENV_SECRET_KEY_2")
     try {
       runSparkPiAndVerifyCompletion(
         driverPodChecker = (driverPod: Pod) => {
@@ -81,19 +89,30 @@ private[spark] trait SecretsTestsSuite { k8sSuite: KubernetesSuite =>
   }
 
   private def checkSecrets(pod: Pod): Unit = {
-    Eventually.eventually(TIMEOUT, INTERVAL) {
-      implicit val podName: String = pod.getMetadata.getName
-      implicit val components: KubernetesTestComponents = kubernetesTestComponents
+    logDebug(s"Checking secrets for ${pod}")
+    // Wait for the pod to become ready & have secrets provisioned
+    implicit val podName: String = pod.getMetadata.getName
+    implicit val components: KubernetesTestComponents = kubernetesTestComponents
+    val env = Eventually.eventually(TIMEOUT, INTERVAL) {
+      logDebug(s"Checking env of ${pod.getMetadata().getName()} ....")
       val env = Utils.executeCommand("env")
-      assert(env.toString.contains(ENV_SECRET_VALUE_1))
-      assert(env.toString.contains(ENV_SECRET_VALUE_2))
-      val fileUsernameContents = Utils
-        .executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_1")
-      val filePasswordContents = Utils
-        .executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_2")
-      assert(fileUsernameContents.toString.trim.equals(ENV_SECRET_VALUE_1))
-      assert(filePasswordContents.toString.trim.equals(ENV_SECRET_VALUE_2))
+      assert(!env.isEmpty)
+      env
     }
+    env.toString should include (s"${ENV_SECRET_KEY_1_CAP}=$ENV_SECRET_VALUE_1")
+    env.toString should include (s"${ENV_SECRET_KEY_2_CAP}=$ENV_SECRET_VALUE_2")
+
+    // Make sure our secret files are mounted correctly
+    val files = Utils.executeCommand("ls", s"$SECRET_MOUNT_PATH")
+    files should include (ENV_SECRET_KEY_1)
+    files should include (ENV_SECRET_KEY_2)
+    // Validate the contents
+    val fileUsernameContents = Utils
+      .executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_1")
+    fileUsernameContents.toString.trim should equal(ENV_SECRET_VALUE_1)
+    val filePasswordContents = Utils
+      .executeCommand("cat", s"$SECRET_MOUNT_PATH/$ENV_SECRET_KEY_2")
+    filePasswordContents.toString.trim should equal(ENV_SECRET_VALUE_2)
   }
 }
 
@@ -102,6 +121,8 @@ private[spark] object SecretsTestsSuite {
   val SECRET_MOUNT_PATH = "/etc/secret"
   val ENV_SECRET_KEY_1 = "username"
   val ENV_SECRET_KEY_2 = "password"
+  val ENV_SECRET_KEY_1_CAP = ENV_SECRET_KEY_1.toUpperCase(Locale.ROOT)
+  val ENV_SECRET_KEY_2_CAP = ENV_SECRET_KEY_2.toUpperCase(Locale.ROOT)
   val ENV_SECRET_VALUE_1 = "secretusername"
   val ENV_SECRET_VALUE_2 = "secretpassword"
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
index a687a1bca1800..9f85805b9d315 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/Utils.scala
@@ -18,9 +18,12 @@ package org.apache.spark.deploy.k8s.integrationtest
 
 import java.io.{Closeable, File, PrintWriter}
 import java.nio.file.{Files, Path}
+import java.util.concurrent.CountDownLatch
 
 import scala.collection.JavaConverters._
 
+import io.fabric8.kubernetes.client.dsl.ExecListener
+import okhttp3.Response
 import org.apache.commons.io.output.ByteArrayOutputStream
 
 import org.apache.spark.{SPARK_VERSION, SparkException}
@@ -45,20 +48,49 @@ object Utils extends Logging {
       implicit podName: String,
       kubernetesTestComponents: KubernetesTestComponents): String = {
     val out = new ByteArrayOutputStream()
-    val watch = kubernetesTestComponents
+    val pod = kubernetesTestComponents
       .kubernetesClient
       .pods()
       .withName(podName)
+    // Avoid timing issues by looking for open/close
+    class ReadyListener extends ExecListener {
+      val openLatch: CountDownLatch = new CountDownLatch(1)
+      val closeLatch: CountDownLatch = new CountDownLatch(1)
+
+      override def onOpen(response: Response) {
+        openLatch.countDown()
+      }
+
+      override def onClose(a: Int, b: String) {
+        closeLatch.countDown()
+      }
+
+      override def onFailure(e: Throwable, r: Response) {
+      }
+
+      def waitForInputStreamToConnect(): Unit = {
+        openLatch.await()
+      }
+
+      def waitForClose(): Unit = {
+        closeLatch.await()
+      }
+    }
+    val listener = new ReadyListener()
+    val watch = pod
       .readingInput(System.in)
       .writingOutput(out)
       .writingError(System.err)
       .withTTY()
+      .usingListener(listener)
       .exec(cmd.toArray: _*)
-    // wait to get some result back
-    Thread.sleep(1000)
+    // under load sometimes the stdout isn't connected by the time we try to read from it.
+    listener.waitForInputStreamToConnect()
+    listener.waitForClose()
     watch.close()
     out.flush()
-    out.toString()
+    val result = out.toString()
+    result
   }
 
   def createTempFile(contents: String, hostPath: String): String = {
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
index bc1247ad78936..c6139dd268d5c 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcher.scala
@@ -106,7 +106,7 @@ private[mesos] object MesosClusterDispatcher
   extends Logging
   with CommandLineUtils {
 
-  override def main(args: Array[String]) {
+  override def main(args: Array[String]): Unit = {
     Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler)
     Utils.initDaemon(log)
     val conf = new SparkConf
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
index 15bbe60d6c8fb..c0cdcda14291f 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/ui/MesosClusterUI.scala
@@ -37,7 +37,7 @@ private[spark] class MesosClusterUI(
 
   def activeWebUiUrl: String = "http://" + dispatcherPublicAddress + ":" + boundPort
 
-  override def initialize() {
+  override def initialize(): Unit = {
     attachPage(new MesosClusterPage(this))
     attachPage(new DriverPage(this))
     addStaticHandler(MesosClusterUI.STATIC_RESOURCE_DIR)
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index bfa88d68d06c2..213d33cb4f0f2 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -43,7 +43,7 @@ private[spark] class MesosExecutorBackend
   var executor: Executor = null
   var driver: ExecutorDriver = null
 
-  override def statusUpdate(taskId: Long, state: TaskState.TaskState, data: ByteBuffer) {
+  override def statusUpdate(taskId: Long, state: TaskState.TaskState, data: ByteBuffer): Unit = {
     val mesosTaskId = TaskID.newBuilder().setValue(taskId.toString).build()
     driver.sendStatusUpdate(MesosTaskStatus.newBuilder()
       .setTaskId(mesosTaskId)
@@ -56,7 +56,7 @@ private[spark] class MesosExecutorBackend
       driver: ExecutorDriver,
       executorInfo: ExecutorInfo,
       frameworkInfo: FrameworkInfo,
-      slaveInfo: SlaveInfo) {
+      slaveInfo: SlaveInfo): Unit = {
 
     // Get num cores for this task from ExecutorInfo, created in MesosSchedulerBackend.
     val cpusPerTask = executorInfo.getResourcesList.asScala
@@ -85,7 +85,7 @@ private[spark] class MesosExecutorBackend
       env)
   }
 
-  override def launchTask(d: ExecutorDriver, taskInfo: TaskInfo) {
+  override def launchTask(d: ExecutorDriver, taskInfo: TaskInfo): Unit = {
     val taskDescription = TaskDescription.decode(taskInfo.getData.asReadOnlyByteBuffer())
     if (executor == null) {
       logError("Received launchTask but executor was null")
@@ -96,11 +96,11 @@ private[spark] class MesosExecutorBackend
     }
   }
 
-  override def error(d: ExecutorDriver, message: String) {
+  override def error(d: ExecutorDriver, message: String): Unit = {
     logError("Error from Mesos: " + message)
   }
 
-  override def killTask(d: ExecutorDriver, t: TaskID) {
+  override def killTask(d: ExecutorDriver, t: TaskID): Unit = {
     if (executor == null) {
       logError("Received KillTask but executor was null")
     } else {
@@ -110,20 +110,20 @@ private[spark] class MesosExecutorBackend
     }
   }
 
-  override def reregistered(d: ExecutorDriver, p2: SlaveInfo) {}
+  override def reregistered(d: ExecutorDriver, p2: SlaveInfo): Unit = {}
 
-  override def disconnected(d: ExecutorDriver) {}
+  override def disconnected(d: ExecutorDriver): Unit = {}
 
-  override def frameworkMessage(d: ExecutorDriver, data: Array[Byte]) {}
+  override def frameworkMessage(d: ExecutorDriver, data: Array[Byte]): Unit = {}
 
-  override def shutdown(d: ExecutorDriver) {}
+  override def shutdown(d: ExecutorDriver): Unit = {}
 }
 
 /**
  * Entry point for Mesos executor.
  */
 private[spark] object MesosExecutorBackend extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     Utils.initDaemon(log)
     // Create a new Executor and start it running
     val runner = new MesosExecutorBackend()
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
index a54bca800a007..e916125ffdb67 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala
@@ -187,7 +187,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     id.toString
   }
 
-  override def start() {
+  override def start(): Unit = {
     super.start()
 
     if (sc.deployMode == "client") {
@@ -322,12 +322,12 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     }
   }
 
-  override def offerRescinded(d: org.apache.mesos.SchedulerDriver, o: OfferID) {}
+  override def offerRescinded(d: org.apache.mesos.SchedulerDriver, o: OfferID): Unit = {}
 
   override def registered(
       driver: org.apache.mesos.SchedulerDriver,
       frameworkId: FrameworkID,
-      masterInfo: MasterInfo) {
+      masterInfo: MasterInfo): Unit = {
 
     this.appId = frameworkId.getValue
     this.mesosExternalShuffleClient.foreach(_.init(appId))
@@ -341,11 +341,11 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     totalCoreCount.get >= maxCoresOption.getOrElse(0) * minRegisteredRatio
   }
 
-  override def disconnected(d: org.apache.mesos.SchedulerDriver) {
+  override def disconnected(d: org.apache.mesos.SchedulerDriver): Unit = {
     launcherBackend.setState(SparkAppHandle.State.SUBMITTED)
   }
 
-  override def reregistered(d: org.apache.mesos.SchedulerDriver, masterInfo: MasterInfo) {
+  override def reregistered(d: org.apache.mesos.SchedulerDriver, masterInfo: MasterInfo): Unit = {
     launcherBackend.setState(SparkAppHandle.State.RUNNING)
   }
 
@@ -353,7 +353,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
    * Method called by Mesos to offer resources on slaves. We respond by launching an executor,
    * unless we've already launched more than we wanted to.
    */
-  override def resourceOffers(d: org.apache.mesos.SchedulerDriver, offers: JList[Offer]) {
+  override def resourceOffers(d: org.apache.mesos.SchedulerDriver, offers: JList[Offer]): Unit = {
     stateLock.synchronized {
       if (stopCalled) {
         logDebug("Ignoring offers during shutdown")
@@ -613,7 +613,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     return true
   }
 
-  override def statusUpdate(d: org.apache.mesos.SchedulerDriver, status: TaskStatus) {
+  override def statusUpdate(d: org.apache.mesos.SchedulerDriver, status: TaskStatus): Unit = {
     val taskId = status.getTaskId.getValue
     val slaveId = status.getSlaveId.getValue
     val state = mesosToTaskState(status.getState)
@@ -675,12 +675,12 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
     }
   }
 
-  override def error(d: org.apache.mesos.SchedulerDriver, message: String) {
+  override def error(d: org.apache.mesos.SchedulerDriver, message: String): Unit = {
     logError(s"Mesos error: $message")
     scheduler.error(message)
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     stopSchedulerBackend()
     launcherBackend.setState(SparkAppHandle.State.FINISHED)
     launcherBackend.close()
@@ -722,7 +722,7 @@ private[spark] class MesosCoarseGrainedSchedulerBackend(
   }
 
   override def frameworkMessage(
-      d: org.apache.mesos.SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]) {}
+      d: org.apache.mesos.SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]): Unit = {}
 
   /**
    * Called when a slave is lost or a Mesos task finished. Updates local view on
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
index a03fecdb2abee..e2a99148dd799 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackend.scala
@@ -74,7 +74,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
 
   @volatile var appId: String = _
 
-  override def start() {
+  override def start(): Unit = {
     classLoader = Thread.currentThread.getContextClassLoader
     val driver = createSchedulerDriver(
       master,
@@ -184,12 +184,12 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     execArgs
   }
 
-  override def offerRescinded(d: org.apache.mesos.SchedulerDriver, o: OfferID) {}
+  override def offerRescinded(d: org.apache.mesos.SchedulerDriver, o: OfferID): Unit = {}
 
   override def registered(
       driver: org.apache.mesos.SchedulerDriver,
       frameworkId: FrameworkID,
-      masterInfo: MasterInfo) {
+      masterInfo: MasterInfo): Unit = {
     inClassLoader() {
       appId = frameworkId.getValue
       logInfo("Registered as framework ID " + appId)
@@ -208,9 +208,9 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
   }
 
-  override def disconnected(d: org.apache.mesos.SchedulerDriver) {}
+  override def disconnected(d: org.apache.mesos.SchedulerDriver): Unit = {}
 
-  override def reregistered(d: org.apache.mesos.SchedulerDriver, masterInfo: MasterInfo) {}
+  override def reregistered(d: org.apache.mesos.SchedulerDriver, masterInfo: MasterInfo): Unit = {}
 
   private def getTasksSummary(tasks: JArrayList[MesosTaskInfo]): String = {
     val builder = new StringBuilder
@@ -229,7 +229,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
    * for tasks in order of priority. We fill each node with tasks in a round-robin manner so that
    * tasks are balanced across the cluster.
    */
-  override def resourceOffers(d: org.apache.mesos.SchedulerDriver, offers: JList[Offer]) {
+  override def resourceOffers(d: org.apache.mesos.SchedulerDriver, offers: JList[Offer]): Unit = {
     inClassLoader() {
       // Fail first on offers with unmet constraints
       val (offersMatchingConstraints, offersNotMatchingConstraints) =
@@ -368,7 +368,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     (taskInfo, finalResources.asJava)
   }
 
-  override def statusUpdate(d: org.apache.mesos.SchedulerDriver, status: TaskStatus) {
+  override def statusUpdate(d: org.apache.mesos.SchedulerDriver, status: TaskStatus): Unit = {
     inClassLoader() {
       val tid = status.getTaskId.getValue.toLong
       val state = mesosToTaskState(status.getState)
@@ -386,7 +386,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
   }
 
-  override def error(d: org.apache.mesos.SchedulerDriver, message: String) {
+  override def error(d: org.apache.mesos.SchedulerDriver, message: String): Unit = {
     inClassLoader() {
       logError("Mesos error: " + message)
       markErr()
@@ -394,18 +394,18 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
   }
 
-  override def stop() {
+  override def stop(): Unit = {
     if (schedulerDriver != null) {
       schedulerDriver.stop()
     }
   }
 
-  override def reviveOffers() {
+  override def reviveOffers(): Unit = {
     schedulerDriver.reviveOffers()
   }
 
   override def frameworkMessage(
-      d: org.apache.mesos.SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]) {}
+      d: org.apache.mesos.SchedulerDriver, e: ExecutorID, s: SlaveID, b: Array[Byte]): Unit = {}
 
   /**
    * Remove executor associated with slaveId in a thread safe manner.
@@ -418,7 +418,7 @@ private[spark] class MesosFineGrainedSchedulerBackend(
   }
 
   private def recordSlaveLost(
-      d: org.apache.mesos.SchedulerDriver, slaveId: SlaveID, reason: ExecutorLossReason) {
+      d: org.apache.mesos.SchedulerDriver, slaveId: SlaveID, reason: ExecutorLossReason): Unit = {
     inClassLoader() {
       logInfo("Mesos slave lost: " + slaveId.getValue)
       removeExecutor(slaveId.getValue, reason.toString)
@@ -426,12 +426,15 @@ private[spark] class MesosFineGrainedSchedulerBackend(
     }
   }
 
-  override def slaveLost(d: org.apache.mesos.SchedulerDriver, slaveId: SlaveID) {
+  override def slaveLost(d: org.apache.mesos.SchedulerDriver, slaveId: SlaveID): Unit = {
     recordSlaveLost(d, slaveId, SlaveLost())
   }
 
   override def executorLost(
-      d: org.apache.mesos.SchedulerDriver, executorId: ExecutorID, slaveId: SlaveID, status: Int) {
+      d: org.apache.mesos.SchedulerDriver,
+      executorId: ExecutorID,
+      slaveId: SlaveID,
+      status: Int): Unit = {
     logInfo("Executor lost: %s, marking slave %s as lost".format(executorId.getValue,
                                                                  slaveId.getValue))
     recordSlaveLost(d, slaveId, ExecutorExited(status, exitCausedByApp = true))
diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
index 06993712035ff..ed3bd358d4082 100644
--- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
+++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala
@@ -149,7 +149,7 @@ trait MesosSchedulerUtils extends Logging {
       // until the scheduler exists
       new Thread(Utils.getFormattedClassName(this) + "-mesos-driver") {
         setDaemon(true)
-        override def run() {
+        override def run(): Unit = {
           try {
             val ret = newDriver.run()
             logInfo("driver.run() returned with code " + ret)
@@ -285,7 +285,6 @@ trait MesosSchedulerUtils extends Logging {
    * The attribute values are the mesos attribute types and they are
    *
    * @param offerAttributes the attributes offered
-   * @return
    */
   protected def toAttributeMap(offerAttributes: JList[Attribute])
     : Map[String, GeneratedMessageV3] = {
@@ -553,7 +552,7 @@ trait MesosSchedulerUtils extends Logging {
    * the same frameworkID.  To enforce that only the first driver registers with the configured
    * framework ID, the driver calls this method after the first registration.
    */
-  def unsetFrameworkID(sc: SparkContext) {
+  def unsetFrameworkID(sc: SparkContext): Unit = {
     sc.conf.remove(mesosConfig.DRIVER_FRAMEWORK_ID)
     System.clearProperty(mesosConfig.DRIVER_FRAMEWORK_ID.key)
   }
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
index 1f83149a05652..344fc38c84fb1 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.rest.mesos
 
 import javax.servlet.http.HttpServletResponse
 
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.TestPrematureExit
@@ -45,7 +45,7 @@ class MesosRestServerSuite extends SparkFunSuite
     testOverheadMemory(conf, "2000M", 3000)
   }
 
-  def testOverheadMemory(conf: SparkConf, driverMemory: String, expectedResult: Int) {
+  def testOverheadMemory(conf: SparkConf, driverMemory: String, expectedResult: Int): Unit = {
     conf.set("spark.master", "testmaster")
     conf.set("spark.app.name", "testapp")
     conf.set(config.DRIVER_MEMORY.key, driverMemory)
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
index e8520061ac38d..7f409ae224fc3 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterManagerSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.deploy.mesos.{config => mesosConfig}
 import org.apache.spark.internal.config._
 
 class MesosClusterManagerSuite extends SparkFunSuite with LocalSparkContext {
-    def testURL(masterURL: String, expectedClass: Class[_], coarse: Boolean) {
+    def testURL(masterURL: String, expectedClass: Class[_], coarse: Boolean): Unit = {
       val conf = new SparkConf().set(mesosConfig.COARSE_MODE, coarse)
       sc = new SparkContext("local", "test", conf)
       val clusterManager = new MesosClusterManager()
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 924a991c50f23..9a50142b51d97 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -27,7 +27,7 @@ import org.apache.mesos.SchedulerDriver
 import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{eq => meq}
 import org.mockito.Mockito._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.Command
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
index f810da17e6c44..8f6ae5904f394 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala
@@ -28,7 +28,7 @@ import org.mockito.ArgumentMatchers.{any, anyInt, anyLong, anyString, eq => meq}
 import org.mockito.Mockito.{times, verify, when}
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.ScalaFutures
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{LocalSparkContext, SecurityManager, SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.deploy.mesos.{config => mesosConfig}
@@ -809,7 +809,8 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite
     }
   }
 
-  private def setBackend(sparkConfVars: Map[String, String] = null, home: String = "/path") {
+  private def setBackend(sparkConfVars: Map[String, String] = null,
+      home: String = "/path"): Unit = {
     initializeSparkConf(sparkConfVars, home)
     sc = new SparkContext(sparkConf)
 
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
index 79a57ad031b71..a5bd34888a0a6 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosFineGrainedSchedulerBackendSuite.scala
@@ -34,7 +34,7 @@ import org.apache.mesos.Protos.Value.Scalar
 import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{any, anyLong, eq => meq}
 import org.mockito.Mockito._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext,
   SparkFunSuite}
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
index 8be24cdbc949a..65f5c625b873a 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtilsSuite.scala
@@ -26,7 +26,7 @@ import com.google.common.io.Files
 import org.apache.mesos.Protos.{FrameworkInfo, Resource, Value}
 import org.mockito.Mockito._
 import org.scalatest._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException, SparkFunSuite}
 import org.apache.spark.deploy.mesos.{config => mesosConfig}
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index ae9486d7e44cf..2e9576e3355d1 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -578,7 +578,11 @@ private[spark] class ApplicationMaster(
             e.getMessage)
         case e: Throwable =>
           failureCount += 1
-          if (!NonFatal(e) || failureCount >= reporterMaxFailures) {
+          if (!NonFatal(e)) {
+            finish(FinalApplicationStatus.FAILED,
+              ApplicationMaster.EXIT_REPORTER_FAILURE,
+              "Fatal exception: " + StringUtils.stringifyException(e))
+          } else if (failureCount >= reporterMaxFailures) {
             finish(FinalApplicationStatus.FAILED,
               ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was thrown " +
                 s"$failureCount time(s) from Reporter thread.")
@@ -703,7 +707,8 @@ private[spark] class ApplicationMaster(
       // of files to add to PYTHONPATH, which Client.scala already handles, so it's empty.
       userArgs = Seq(args.primaryPyFile, "") ++ userArgs
     }
-    if (args.primaryRFile != null && args.primaryRFile.endsWith(".R")) {
+    if (args.primaryRFile != null &&
+        (args.primaryRFile.endsWith(".R") || args.primaryRFile.endsWith(".r"))) {
       // TODO(davies): add R dependencies here
     }
 
@@ -711,7 +716,7 @@ private[spark] class ApplicationMaster(
       .getMethod("main", classOf[Array[String]])
 
     val userThread = new Thread {
-      override def run() {
+      override def run(): Unit = {
         try {
           if (!Modifier.isStatic(mainMethod.getModifiers)) {
             logError(s"Could not find static main method in object ${args.userClass}")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
index c10206c847271..d2275980814e3 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala
@@ -82,7 +82,7 @@ class ApplicationMasterArguments(val args: Array[String]) {
     userArgs = userArgsBuffer.toList
   }
 
-  def printUsageAndExit(exitCode: Int, unknownParam: Any = null) {
+  def printUsageAndExit(exitCode: Int, unknownParam: Any = null): Unit = {
     // scalastyle:off println
     if (unknownParam != null) {
       System.err.println("Unknown/unsupported param " + unknownParam)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 9be3e7bbbfa67..79e63d77a40f2 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -973,7 +973,8 @@ private[spark] class Client(
       } else {
         Utils.classForName("org.apache.spark.deploy.yarn.ExecutorLauncher").getName
       }
-    if (args.primaryRFile != null && args.primaryRFile.endsWith(".R")) {
+    if (args.primaryRFile != null &&
+        (args.primaryRFile.endsWith(".R") || args.primaryRFile.endsWith(".r"))) {
       args.userArgs = ArrayBuffer(args.primaryRFile) ++ args.userArgs
     }
     val userArgs = args.userArgs.flatMap { arg =>
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 61c027ec4483a..45627e9d3f1f8 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -74,7 +74,7 @@ private[spark] class ClientArguments(args: Array[String]) {
       s"""
       |Usage: org.apache.spark.deploy.yarn.Client [options]
       |Options:
-      |  --jar JAR_PATH           Path to your application's JAR file (required in yarn-cluster
+      |  --jar JAR_PATH           Path to your application's JAR file (required in YARN cluster
       |                           mode)
       |  --class CLASS_NAME       Name of your application's main class (required)
       |  --primary-py-file        A main Python file
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index c7c495fef853f..a647bb4f3d68c 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -42,7 +42,7 @@ private[spark] class YarnClientSchedulerBackend(
    * Create a Yarn client to submit an application to the ResourceManager.
    * This waits until the application is running.
    */
-  override def start() {
+  override def start(): Unit = {
     super.start()
 
     val driverHost = conf.get(config.DRIVER_HOST_ADDRESS)
@@ -109,7 +109,7 @@ private[spark] class YarnClientSchedulerBackend(
   private class MonitorThread extends Thread {
     private var allowInterrupt = true
 
-    override def run() {
+    override def run(): Unit = {
       try {
         val YarnAppReport(_, state, diags) =
           client.monitorApplication(appId.get, logApplicationReport = false)
@@ -148,7 +148,7 @@ private[spark] class YarnClientSchedulerBackend(
   /**
    * Stop the scheduler. This assumes `start()` has already been called.
    */
-  override def stop() {
+  override def stop(): Unit = {
     assert(client != null, "Attempted to stop this scheduler before starting it!")
     if (monitorThread != null) {
       monitorThread.stopMonitor()
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
index 1f622a02a62ae..1812a55c39589 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala
@@ -28,7 +28,7 @@ private[spark] class YarnClusterScheduler(sc: SparkContext) extends YarnSchedule
 
   logInfo("Created YarnClusterScheduler")
 
-  override def postStartHook() {
+  override def postStartHook(): Unit = {
     ApplicationMaster.sparkContextInitialized(sc)
     super.postStartHook()
     logInfo("YarnClusterScheduler.postStartHook done")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
index b5575a10a05a0..e70a78d3c4c8d 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -27,7 +27,7 @@ private[spark] class YarnClusterSchedulerBackend(
     sc: SparkContext)
   extends YarnSchedulerBackend(scheduler, sc) {
 
-  override def start() {
+  override def start(): Unit = {
     val attemptId = ApplicationMaster.getAttemptId
     bindToYarn(attemptId.getApplicationId(), Some(attemptId))
     super.start()
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index e16857e84887c..f8ef0d08d829c 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -63,7 +63,7 @@ abstract class BaseYarnClusterSuite
 
   def newYarnConfig(): YarnConfiguration
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
 
     tempDir = Utils.createTempDir()
@@ -115,7 +115,7 @@ abstract class BaseYarnClusterSuite
     File.createTempFile("token", ".txt", hadoopConfDir)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       yarnCluster.stop()
     } finally {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
index b091fec926c4c..c04b4e5cb559e 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.yarn.api.records.LocalResourceType
 import org.apache.hadoop.yarn.api.records.LocalResourceVisibility
 import org.apache.hadoop.yarn.util.ConverterUtils
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.yarn.config._
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 847fc3773de59..bcf16f6dab111 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -523,7 +523,7 @@ class ClientSuite extends SparkFunSuite with Matchers {
     val mapAppConf = mapYARNAppConf ++ mapMRAppConf
   }
 
-  def withAppConf(m: Map[String, String] = Map())(testCode: (Configuration) => Any) {
+  def withAppConf(m: Map[String, String] = Map())(testCode: (Configuration) => Any): Unit = {
     val conf = new Configuration
     m.foreach { case (k, v) => conf.set(k, v, "ClientSpec") }
     testCode(conf)
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala
index afb4b691b52de..29f1c0512fbd5 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala
@@ -30,11 +30,11 @@ class ContainerPlacementStrategySuite extends SparkFunSuite with Matchers with B
   def createContainerRequest(nodes: Array[String]): ContainerRequest =
     new ContainerRequest(containerResource, nodes, null, YarnSparkHadoopUtil.RM_REQUEST_PRIORITY)
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     yarnAllocatorSuite.beforeEach()
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     yarnAllocatorSuite.afterEach()
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index 6f47a418f9180..6216d473882e6 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -69,7 +69,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
 
   var containerNum = 0
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     super.beforeEach()
     rmClient = AMRMClient.createAMRMClient()
     rmClient.init(conf)
@@ -77,7 +77,7 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
     clock = new ManualClock()
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       rmClient.stop()
     } finally {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 7264e2e51ee45..b7c9e83446012 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -333,7 +333,7 @@ private[spark] class SaveExecutorInfo extends SparkListener {
   var driverLogs: Option[collection.Map[String, String]] = None
   var driverAttributes: Option[collection.Map[String, String]] = None
 
-  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
+  override def onExecutorAdded(executor: SparkListenerExecutorAdded): Unit = {
     addedExecutorInfos(executor.executorId) = executor.executorInfo
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
index 835f0736c5a1e..a87820b1528ad 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.atomic.AtomicReference
 import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 
 import org.mockito.Mockito.when
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.scheduler.TaskSchedulerImpl
diff --git a/sql/README.md b/sql/README.md
index f0ea848a41d09..67e3225e2c275 100644
--- a/sql/README.md
+++ b/sql/README.md
@@ -6,7 +6,7 @@ This module provides support for executing relational queries expressed in eithe
 Spark SQL is broken up into four subprojects:
  - Catalyst (sql/catalyst) - An implementation-agnostic framework for manipulating trees of relational operators and expressions.
  - Execution (sql/core) - A query planner / execution engine for translating Catalyst's logical query plans into Spark RDDs.  This component also includes a new public interface, SQLContext, that allows users to execute SQL or LINQ statements against existing RDDs and Parquet files.
- - Hive Support (sql/hive) - Includes an extension of SQLContext called HiveContext that allows users to write queries using a subset of HiveQL and access data from a Hive Metastore using Hive SerDes. There are also wrappers that allow users to run queries that include Hive UDFs, UDAFs, and UDTFs.
+ - Hive Support (sql/hive) - Includes extensions that allow users to write queries using a subset of HiveQL and access data from a Hive Metastore using Hive SerDes. There are also wrappers that allow users to run queries that include Hive UDFs, UDAFs, and UDTFs.
  - HiveServer and CLI support (sql/hive-thriftserver) - Includes support for the SQL CLI (bin/spark-sql) and a HiveServer2 (for JDBC/ODBC) compatible server.
 
 Running `./sql/create-docs.sh` generates SQL documentation for built-in functions under `sql/site`.
diff --git a/sql/catalyst/benchmarks/HashBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/HashBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..d6452891a3ddd
--- /dev/null
+++ b/sql/catalyst/benchmarks/HashBenchmark-jdk11-results.txt
@@ -0,0 +1,70 @@
+================================================================================================
+single ints
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                3285           3286           1        163.4           6.1       1.0X
+codegen version                                    6838           6838           0         78.5          12.7       0.5X
+codegen version 64-bit                             6247           6247           0         85.9          11.6       0.5X
+codegen HiveHash version                           4927           4927           0        109.0           9.2       0.7X
+
+
+================================================================================================
+single longs
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                3615           3615           0        148.5           6.7       1.0X
+codegen version                                    9630           9633           5         55.8          17.9       0.4X
+codegen version 64-bit                             6763           6764           2         79.4          12.6       0.5X
+codegen HiveHash version                           5709           5711           2         94.0          10.6       0.6X
+
+
+================================================================================================
+normal
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                2946           2965          27          0.7        1404.7       1.0X
+codegen version                                    2386           2386           1          0.9        1137.7       1.2X
+codegen version 64-bit                              895            895           0          2.3         426.8       3.3X
+codegen HiveHash version                           4454           4463          14          0.5        2123.6       0.7X
+
+
+================================================================================================
+array
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                2325           2374          68          0.1       17740.6       1.0X
+codegen version                                    4928           4977          69          0.0       37597.1       0.5X
+codegen version 64-bit                             3404           3408           5          0.0       25973.0       0.7X
+codegen HiveHash version                           1491           1512          30          0.1       11376.3       1.6X
+
+
+================================================================================================
+map
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                   0              0           0         45.1          22.2       1.0X
+codegen version                                     271            278           8          0.0       66062.7       0.0X
+codegen version 64-bit                              208            210           2          0.0       50775.0       0.0X
+codegen HiveHash version                             52             53           0          0.1       12794.9       0.0X
+
+
diff --git a/sql/catalyst/benchmarks/HashBenchmark-results.txt b/sql/catalyst/benchmarks/HashBenchmark-results.txt
index 2459b35c75bb5..8075df73399b9 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash For single ints:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-interpreted version                           5615 / 5616         95.6          10.5       1.0X
-codegen version                               8400 / 8407         63.9          15.6       0.7X
-codegen version 64-bit                        8139 / 8145         66.0          15.2       0.7X
-codegen HiveHash version                      7213 / 7348         74.4          13.4       0.8X
+Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                3364           3364           0        159.6           6.3       1.0X
+codegen version                                    6921           6924           4         77.6          12.9       0.5X
+codegen version 64-bit                             6139           6140           2         87.5          11.4       0.5X
+codegen HiveHash version                           5097           5099           2        105.3           9.5       0.7X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash For single longs:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-interpreted version                           6053 / 6054         88.7          11.3       1.0X
-codegen version                               9367 / 9369         57.3          17.4       0.6X
-codegen version 64-bit                        8041 / 8051         66.8          15.0       0.8X
-codegen HiveHash version                      7546 / 7575         71.1          14.1       0.8X
+Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                3838           3838           0        139.9           7.1       1.0X
+codegen version                                    9690           9703          19         55.4          18.0       0.4X
+codegen version 64-bit                             6876           6877           1         78.1          12.8       0.6X
+codegen HiveHash version                           5717           5718           1         93.9          10.6       0.7X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash For normal:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-interpreted version                           3181 / 3182          0.7        1517.0       1.0X
-codegen version                               2403 / 2403          0.9        1145.7       1.3X
-codegen version 64-bit                         915 /  916          2.3         436.2       3.5X
-codegen HiveHash version                      4505 / 4527          0.5        2148.3       0.7X
+Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                2914           2918           6          0.7        1389.6       1.0X
+codegen version                                    2337           2341           6          0.9        1114.5       1.2X
+codegen version 64-bit                              910            911           3          2.3         433.9       3.2X
+codegen HiveHash version                           4479           4480           1          0.5        2135.9       0.7X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash For array:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-interpreted version                           1828 / 1844          0.1       13946.1       1.0X
-codegen version                               3678 / 3804          0.0       28058.2       0.5X
-codegen version 64-bit                        2925 / 2931          0.0       22317.8       0.6X
-codegen HiveHash version                      1216 / 1217          0.1        9280.0       1.5X
+Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                2081           2133          74          0.1       15878.7       1.0X
+codegen version                                    4385           4506         172          0.0       33452.2       0.5X
+codegen version 64-bit                             3518           3552          48          0.0       26842.8       0.6X
+codegen HiveHash version                           1715           1752          52          0.1       13085.4       1.2X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash For map:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-interpreted version                              0 /    0         44.3          22.6       1.0X
-codegen version                                176 /  176          0.0       42978.8       0.0X
-codegen version 64-bit                         173 /  175          0.0       42214.3       0.0X
-codegen HiveHash version                        44 /   44          0.1       10659.9       0.0X
+Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+interpreted version                                   0              0           0         50.4          19.8       1.0X
+codegen version                                     195            204           8          0.0       47681.7       0.0X
+codegen version 64-bit                              186            190           3          0.0       45296.8       0.0X
+codegen HiveHash version                             48             48           0          0.1       11610.3       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..d8d43068929ed
--- /dev/null
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk11-results.txt
@@ -0,0 +1,77 @@
+================================================================================================
+Benchmark for MurMurHash 3 and xxHash64
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       14             14           0        147.0           6.8       1.0X
+xxHash 64-bit                                        18             18           0        119.7           8.4       0.8X
+HiveHasher                                           16             16           0        129.9           7.7       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       23             23           0         89.8          11.1       1.0X
+xxHash 64-bit                                        22             23           0         93.3          10.7       1.0X
+HiveHasher                                           26             26           0         79.4          12.6       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       32             32           0         65.5          15.3       1.0X
+xxHash 64-bit                                        26             26           0         80.8          12.4       1.2X
+HiveHasher                                           41             41           0         50.8          19.7       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       49             49           0         43.2          23.2       1.0X
+xxHash 64-bit                                        44             44           0         48.0          20.8       1.1X
+HiveHasher                                           55             56           0         37.8          26.4       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                      127            127           0         16.5          60.5       1.0X
+xxHash 64-bit                                        83             83           0         25.3          39.5       1.5X
+HiveHasher                                          196            196           0         10.7          93.3       0.6X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                      327            327           0          6.4         155.9       1.0X
+xxHash 64-bit                                       138            138           0         15.2          65.9       2.4X
+HiveHasher                                          628            628           0          3.3         299.6       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     1145           1145           0          1.8         545.9       1.0X
+xxHash 64-bit                                       370            371           0          5.7         176.6       3.1X
+HiveHasher                                         2325           2325           0          0.9        1108.6       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     2209           2211           3          0.9        1053.4       1.0X
+xxHash 64-bit                                       615            615           0          3.4         293.0       3.6X
+HiveHasher                                         4590           4590           0          0.5        2188.7       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     8687           8702          22          0.2        4142.2       1.0X
+xxHash 64-bit                                      2033           2034           1          1.0         969.5       4.3X
+HiveHasher                                        18216          18218           2          0.1        8686.1       0.5X
+
+
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
index a4304ee3b5f60..83bd970e14392 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 8:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                  16 /   16        127.7           7.8       1.0X
-xxHash 64-bit                                   23 /   23         90.7          11.0       0.7X
-HiveHasher                                      16 /   16        134.8           7.4       1.1X
+Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       15             15           0        138.0           7.2       1.0X
+xxHash 64-bit                                        17             17           0        125.7           8.0       0.9X
+HiveHasher                                           16             16           0        134.4           7.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 16:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                  26 /   26         79.5          12.6       1.0X
-xxHash 64-bit                                   26 /   27         79.3          12.6       1.0X
-HiveHasher                                      30 /   30         70.1          14.3       0.9X
+Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       24             24           0         86.8          11.5       1.0X
+xxHash 64-bit                                        22             22           0         96.5          10.4       1.1X
+HiveHasher                                           31             31           0         66.8          15.0       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 24:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                  36 /   36         58.1          17.2       1.0X
-xxHash 64-bit                                   30 /   30         70.2          14.2       1.2X
-HiveHasher                                      45 /   45         46.4          21.5       0.8X
+Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       34             34           0         61.9          16.2       1.0X
+xxHash 64-bit                                        26             26           0         79.6          12.6       1.3X
+HiveHasher                                           48             48           0         44.0          22.7       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 31:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                  50 /   50         41.8          23.9       1.0X
-xxHash 64-bit                                   43 /   43         49.3          20.3       1.2X
-HiveHasher                                      58 /   58         35.9          27.8       0.9X
+Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                       50             50           0         41.9          23.9       1.0X
+xxHash 64-bit                                        40             40           0         52.4          19.1       1.3X
+HiveHasher                                           61             61           0         34.4          29.1       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 95:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                 132 /  132         15.9          62.7       1.0X
-xxHash 64-bit                                   79 /   79         26.7          37.5       1.7X
-HiveHasher                                     198 /  199         10.6          94.6       0.7X
+Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                      132            133           0         15.9          63.1       1.0X
+xxHash 64-bit                                        77             78           0         27.4          36.5       1.7X
+HiveHasher                                          209            209           0         10.0          99.6       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 287:        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                 334 /  334          6.3         159.3       1.0X
-xxHash 64-bit                                  126 /  126         16.7          59.9       2.7X
-HiveHasher                                     633 /  634          3.3         302.0       0.5X
+Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                      333            334           0          6.3         158.9       1.0X
+xxHash 64-bit                                       123            123           0         17.1          58.6       2.7X
+HiveHasher                                          630            630           0          3.3         300.3       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 1055:       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                1149 / 1149          1.8         547.9       1.0X
-xxHash 64-bit                                  327 /  327          6.4         155.9       3.5X
-HiveHasher                                    2338 / 2346          0.9        1114.6       0.5X
+Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     1151           1151           0          1.8         548.9       1.0X
+xxHash 64-bit                                       321            321           0          6.5         153.2       3.6X
+HiveHasher                                         2332           2332           1          0.9        1111.8       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 2079:       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                2215 / 2216          0.9        1056.1       1.0X
-xxHash 64-bit                                  554 /  554          3.8         264.0       4.0X
-HiveHasher                                    4609 / 4609          0.5        2197.5       0.5X
+Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     2213           2213           0          0.9        1055.2       1.0X
+xxHash 64-bit                                       550            550           0          3.8         262.3       4.0X
+HiveHasher                                         4599           4599           1          0.5        2192.8       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Hash byte arrays with length 8223:       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                8633 / 8643          0.2        4116.3       1.0X
-xxHash 64-bit                                 1891 / 1892          1.1         901.6       4.6X
-HiveHasher                                  18206 / 18206          0.1        8681.3       0.5X
+Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                     8660           8664           6          0.2        4129.3       1.0X
+xxHash 64-bit                                      1889           1893           6          1.1         900.6       4.6X
+HiveHasher                                        18269          18272           5          0.1        8711.3       0.5X
 
 
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..8b56fcbebc79c
--- /dev/null
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk11-results.txt
@@ -0,0 +1,14 @@
+================================================================================================
+unsafe projection
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+single long                                        2218           2219           1        121.0           8.3       1.0X
+single nullable long                               3200           3201           1         83.9          11.9       0.7X
+7 primitive types                                  7809           7813           5         34.4          29.1       0.3X
+7 nullable primitive types                        11906          11908           3         22.5          44.4       0.2X
+
+
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
index 43156dc6fc67f..7b30e2075f010 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-unsafe projection:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-single long                                   2867 / 2868         93.6          10.7       1.0X
-single nullable long                          3915 / 3949         68.6          14.6       0.7X
-7 primitive types                             8166 / 8167         32.9          30.4       0.4X
-7 nullable primitive types                  12767 / 12767         21.0          47.6       0.2X
+unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+single long                                        2173           2173           0        123.5           8.1       1.0X
+single nullable long                               3156           3157           2         85.0          11.8       0.7X
+7 primitive types                                  6725           6726           2         39.9          25.1       0.3X
+7 nullable primitive types                        11399          11400           2         23.5          42.5       0.2X
 
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 79d9b4951b151..33a22d190fa70 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -170,4 +170,16 @@
       </plugin>
     </plugins>
   </build>
+
+  <profiles>
+    <profile>
+      <id>scala-2.13</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 </project>
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index f16ac6df8cb04..aa96e079070f4 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -82,16 +82,20 @@ singleTableSchema
 statement
     : query                                                            #statementDefault
     | ctes? dmlStatementNoWith                                         #dmlStatement
-    | USE db=errorCapturingIdentifier                                  #use
+    | USE NAMESPACE? multipartIdentifier                               #use
     | CREATE database (IF NOT EXISTS)? db=errorCapturingIdentifier
         ((COMMENT comment=STRING) |
          locationSpec |
          (WITH DBPROPERTIES tablePropertyList))*                       #createDatabase
     | ALTER database db=errorCapturingIdentifier
         SET DBPROPERTIES tablePropertyList                             #setDatabaseProperties
+    | ALTER database db=errorCapturingIdentifier
+        SET locationSpec                                               #setDatabaseLocation
     | DROP database (IF EXISTS)? db=errorCapturingIdentifier
         (RESTRICT | CASCADE)?                                          #dropDatabase
     | SHOW DATABASES (LIKE? pattern=STRING)?                           #showDatabases
+    | SHOW NAMESPACES ((FROM | IN) multipartIdentifier)?
+        (LIKE? pattern=STRING)?                                        #showNamespaces
     | createTableHeader ('(' colTypeList ')')? tableProvider
         ((OPTIONS options=tablePropertyList) |
         (PARTITIONED BY partitioning=transformList) |
@@ -214,7 +218,8 @@ statement
     | SET ROLE .*?                                                     #failNativeCommand
     | SET .*?                                                          #setConfiguration
     | RESET                                                            #resetConfiguration
-    | DELETE FROM multipartIdentifier tableAlias whereClause           #deleteFromTable
+    | DELETE FROM multipartIdentifier tableAlias whereClause?          #deleteFromTable
+    | UPDATE multipartIdentifier tableAlias setClause whereClause?     #updateTable
     | unsupportedHiveNativeCommands .*?                                #failNativeCommand
     ;
 
@@ -474,6 +479,14 @@ selectClause
     : SELECT (hints+=hint)* setQuantifier? namedExpressionSeq
     ;
 
+setClause
+    : SET assign (',' assign)*
+    ;
+
+assign
+    : key=multipartIdentifier EQ value=expression
+    ;
+
 whereClause
     : WHERE booleanExpression
     ;
@@ -1006,6 +1019,8 @@ ansiNonReserved
     | MINUTES
     | MONTHS
     | MSCK
+    | NAMESPACE
+    | NAMESPACES
     | NO
     | NULLS
     | OF
@@ -1082,6 +1097,7 @@ ansiNonReserved
     | UNCACHE
     | UNLOCK
     | UNSET
+    | UPDATE
     | USE
     | VALUES
     | VIEW
@@ -1255,6 +1271,8 @@ nonReserved
     | MONTH
     | MONTHS
     | MSCK
+    | NAMESPACE
+    | NAMESPACES
     | NO
     | NOT
     | NULL
@@ -1351,6 +1369,7 @@ nonReserved
     | UNKNOWN
     | UNLOCK
     | UNSET
+    | UPDATE
     | USE
     | USER
     | VALUES
@@ -1515,6 +1534,8 @@ MINUTES: 'MINUTES';
 MONTH: 'MONTH';
 MONTHS: 'MONTHS';
 MSCK: 'MSCK';
+NAMESPACE: 'NAMESPACE';
+NAMESPACES: 'NAMESPACES';
 NATURAL: 'NATURAL';
 NO: 'NO';
 NOT: 'NOT' | '!';
@@ -1617,6 +1638,7 @@ UNIQUE: 'UNIQUE';
 UNKNOWN: 'UNKNOWN';
 UNLOCK: 'UNLOCK';
 UNSET: 'UNSET';
+UPDATE: 'UPDATE';
 USE: 'USE';
 USER: 'USER';
 USING: 'USING';
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 769cf36c3df3f..8ee90ed6f4c3b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
+import com.google.common.annotations.VisibleForTesting;
+
 /**
  * Expression information, will be used to describe a expression.
  */
@@ -56,6 +58,11 @@ public String getArguments() {
         return arguments;
     }
 
+    @VisibleForTesting
+    public String getOriginalExamples() {
+        return examples;
+    }
+
     public String getExamples() {
         return replaceFunctionName(examples);
     }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/PartitionTransforms.scala b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/PartitionTransforms.scala
new file mode 100644
index 0000000000000..e48fd8adaef09
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/PartitionTransforms.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.types.{DataType, IntegerType}
+
+/**
+ * Base class for expressions that are converted to v2 partition transforms.
+ *
+ * Subclasses represent abstract transform functions with concrete implementations that are
+ * determined by data source implementations. Because the concrete implementation is not known,
+ * these expressions are [[Unevaluable]].
+ *
+ * These expressions are used to pass transformations from the DataFrame API:
+ *
+ * {{{
+ *   df.writeTo("catalog.db.table").partitionedBy($"category", days($"timestamp")).create()
+ * }}}
+ */
+abstract class PartitionTransformExpression extends Expression with Unevaluable {
+  override def nullable: Boolean = true
+}
+
+/**
+ * Expression for the v2 partition transform years.
+ */
+case class Years(child: Expression) extends PartitionTransformExpression {
+  override def dataType: DataType = IntegerType
+  override def children: Seq[Expression] = Seq(child)
+}
+
+/**
+ * Expression for the v2 partition transform months.
+ */
+case class Months(child: Expression) extends PartitionTransformExpression {
+  override def dataType: DataType = IntegerType
+  override def children: Seq[Expression] = Seq(child)
+}
+
+/**
+ * Expression for the v2 partition transform days.
+ */
+case class Days(child: Expression) extends PartitionTransformExpression {
+  override def dataType: DataType = IntegerType
+  override def children: Seq[Expression] = Seq(child)
+}
+
+/**
+ * Expression for the v2 partition transform hours.
+ */
+case class Hours(child: Expression) extends PartitionTransformExpression {
+  override def dataType: DataType = IntegerType
+  override def children: Seq[Expression] = Seq(child)
+}
+
+/**
+ * Expression for the v2 partition transform bucket.
+ */
+case class Bucket(numBuckets: Literal, child: Expression) extends PartitionTransformExpression {
+  override def dataType: DataType = IntegerType
+  override def children: Seq[Expression] = Seq(numBuckets, child)
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogExtension.java
new file mode 100644
index 0000000000000..65e0b6be00ef3
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogExtension.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * An API to extend the Spark built-in session catalog. Implementation can get the built-in session
+ * catalog from {@link #setDelegateCatalog(CatalogPlugin)}, implement catalog functions with
+ * some custom logic and call the built-in session catalog at the end. For example, they can
+ * implement {@code createTable}, do something else before calling {@code createTable} of the
+ * built-in session catalog.
+ */
+@Experimental
+public interface CatalogExtension extends TableCatalog, SupportsNamespaces {
+
+  /**
+   * This will be called only once by Spark to pass in the Spark built-in session catalog, after
+   * {@link #initialize(String, CaseInsensitiveStringMap)} is called.
+   */
+  void setDelegateCatalog(CatalogPlugin delegate);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/CatalogPlugin.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
similarity index 98%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/CatalogPlugin.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
index 5d4995a05d233..615aab3ef3259 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/CatalogPlugin.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.internal.SQLConf;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Catalogs.java
similarity index 98%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Catalogs.java
index f471a4e71cf43..b6a896c52d535 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Catalogs.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Catalogs.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.SparkException;
 import org.apache.spark.annotation.Private;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
new file mode 100644
index 0000000000000..b93acdc777e9c
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Map;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException;
+import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * A simple implementation of {@link CatalogExtension}, which implements all the catalog functions
+ * by calling the built-in session catalog directly. This is created for convenience, so that users
+ * only need to override some methods where they want to apply custom logic. For example, they can
+ * override {@code createTable}, do something else before calling {@code super.createTable}.
+ */
+@Experimental
+public abstract class DelegatingCatalogExtension implements CatalogExtension {
+
+  private CatalogPlugin delegate;
+
+  public final void setDelegateCatalog(CatalogPlugin delegate) {
+    this.delegate = delegate;
+  }
+
+  @Override
+  public String name() {
+    return delegate.name();
+  }
+
+  @Override
+  public final void initialize(String name, CaseInsensitiveStringMap options) {}
+
+  @Override
+  public Identifier[] listTables(String[] namespace) throws NoSuchNamespaceException {
+    return asTableCatalog().listTables(namespace);
+  }
+
+  @Override
+  public Table loadTable(Identifier ident) throws NoSuchTableException {
+    return asTableCatalog().loadTable(ident);
+  }
+
+  @Override
+  public void invalidateTable(Identifier ident) {
+    asTableCatalog().invalidateTable(ident);
+  }
+
+  @Override
+  public boolean tableExists(Identifier ident) {
+    return asTableCatalog().tableExists(ident);
+  }
+
+  @Override
+  public Table createTable(
+      Identifier ident,
+      StructType schema,
+      Transform[] partitions,
+      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException {
+    return asTableCatalog().createTable(ident, schema, partitions, properties);
+  }
+
+  @Override
+  public Table alterTable(
+      Identifier ident,
+      TableChange... changes) throws NoSuchTableException {
+    return asTableCatalog().alterTable(ident, changes);
+  }
+
+  @Override
+  public boolean dropTable(Identifier ident) {
+    return asTableCatalog().dropTable(ident);
+  }
+
+  @Override
+  public void renameTable(
+      Identifier oldIdent,
+      Identifier newIdent) throws NoSuchTableException, TableAlreadyExistsException {
+    asTableCatalog().renameTable(oldIdent, newIdent);
+  }
+
+  @Override
+  public String[] defaultNamespace() {
+    return asNamespaceCatalog().defaultNamespace();
+  }
+
+  @Override
+  public String[][] listNamespaces() throws NoSuchNamespaceException {
+    return asNamespaceCatalog().listNamespaces();
+  }
+
+  @Override
+  public String[][] listNamespaces(String[] namespace) throws NoSuchNamespaceException {
+    return asNamespaceCatalog().listNamespaces(namespace);
+  }
+
+  @Override
+  public boolean namespaceExists(String[] namespace) {
+    return asNamespaceCatalog().namespaceExists(namespace);
+  }
+
+  @Override
+  public Map<String, String> loadNamespaceMetadata(
+      String[] namespace) throws NoSuchNamespaceException {
+    return asNamespaceCatalog().loadNamespaceMetadata(namespace);
+  }
+
+  @Override
+  public void createNamespace(
+      String[] namespace,
+      Map<String, String> metadata) throws NamespaceAlreadyExistsException {
+    asNamespaceCatalog().createNamespace(namespace, metadata);
+  }
+
+  @Override
+  public void alterNamespace(
+      String[] namespace,
+      NamespaceChange... changes) throws NoSuchNamespaceException {
+    asNamespaceCatalog().alterNamespace(namespace, changes);
+  }
+
+  @Override
+  public boolean dropNamespace(String[] namespace) throws NoSuchNamespaceException {
+    return asNamespaceCatalog().dropNamespace(namespace);
+  }
+
+  private TableCatalog asTableCatalog() {
+    return (TableCatalog)delegate;
+  }
+
+  private SupportsNamespaces asNamespaceCatalog() {
+    return (SupportsNamespaces)delegate;
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Identifier.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Identifier.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Identifier.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Identifier.java
index 3e697c1945bfc..c3ac9c20db3b0 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/Identifier.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Identifier.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Experimental;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
index 34f3882c9c412..56d13ef742cea 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/IdentifierImpl.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import com.google.common.base.Preconditions;
 import org.apache.spark.annotation.Experimental;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/NamespaceChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/NamespaceChange.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/NamespaceChange.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/NamespaceChange.java
index 6f5895bcc3806..1db6877c8d0be 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/NamespaceChange.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/NamespaceChange.java
@@ -15,7 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Experimental;
 
 /**
  * NamespaceChange subclasses represent requested changes to a namespace. These are passed to
@@ -29,6 +31,7 @@
  *     )
  * </pre>
  */
+@Experimental
 public interface NamespaceChange {
   /**
    * Create a NamespaceChange for setting a namespace property.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SessionConfigSupport.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SessionConfigSupport.java
index d27fbfdd14617..fc076ee00a623 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SessionConfigSupport.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SessionConfigSupport.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/StagedTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
similarity index 90%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/StagedTable.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
index b2baa93b146a5..9fd70cc97700d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/StagedTable.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
@@ -15,12 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import java.util.Map;
-import org.apache.spark.sql.catalog.v2.Identifier;
-import org.apache.spark.sql.catalog.v2.StagingTableCatalog;
-import org.apache.spark.sql.catalog.v2.expressions.Transform;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.Transform;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
@@ -37,6 +37,7 @@
  * at which point implementations are expected to commit the table's metadata into the metastore
  * along with the data that was written by the writes from the write builder this table created.
  */
+@Experimental
 public interface StagedTable extends Table {
 
   /**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/StagingTableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/StagingTableCatalog.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
index fc055e91a6acf..ca9160aa2f168 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/StagingTableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
@@ -15,18 +15,17 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import java.util.Map;
 
-import org.apache.spark.sql.catalog.v2.expressions.Transform;
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.Transform;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
-import org.apache.spark.sql.sources.v2.StagedTable;
-import org.apache.spark.sql.sources.v2.SupportsWrite;
-import org.apache.spark.sql.sources.v2.writer.BatchWrite;
-import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+import org.apache.spark.sql.connector.write.BatchWrite;
+import org.apache.spark.sql.connector.write.WriterCommitMessage;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
@@ -50,6 +49,7 @@
  * {@link StagedTable#commitStagedChanges()} is called, at which point the staged table can
  * complete both the data write and the metadata swap operation atomically.
  */
+@Experimental
 public interface StagingTableCatalog extends TableCatalog {
 
   /**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsDelete.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
similarity index 94%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsDelete.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
index 8650a0ef1d4ba..80aa57ca18775 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsDelete.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
@@ -15,14 +15,16 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
+import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.sources.Filter;
 
 /**
  * A mix-in interface for {@link Table} delete support. Data sources can implement this
  * interface to provide the ability to delete data from tables that matches filter expressions.
  */
+@Experimental
 public interface SupportsDelete {
   /**
    * Delete data from a data source table that matches filter expressions.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/SupportsNamespaces.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java
similarity index 98%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/SupportsNamespaces.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java
index 12c2e511f33f8..6845f9294a6de 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/SupportsNamespaces.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsNamespaces.java
@@ -15,8 +15,9 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
+import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
 
@@ -35,6 +36,7 @@
  * drop a namespace. Implementations are allowed to discover the existence of objects or namespaces
  * without throwing {@link NoSuchNamespaceException} when no namespace is found.
  */
+@Experimental
 public interface SupportsNamespaces extends CatalogPlugin {
 
   /**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsRead.java
similarity index 87%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsRead.java
index 826fa2f8a0720..cdff1a38ff05f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsRead.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsRead.java
@@ -15,10 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
-import org.apache.spark.sql.sources.v2.reader.Scan;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.read.Scan;
+import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
@@ -26,6 +27,7 @@
  * {@link #newScanBuilder(CaseInsensitiveStringMap)} that is used to create a scan for batch,
  * micro-batch, or continuous processing.
  */
+@Experimental
 public interface SupportsRead extends Table {
 
   /**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsWrite.java
similarity index 85%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsWrite.java
index c52e54569dc0c..5b648468c84ad 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/SupportsWrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsWrite.java
@@ -15,10 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
-import org.apache.spark.sql.sources.v2.writer.BatchWrite;
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder;
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.write.BatchWrite;
+import org.apache.spark.sql.connector.write.WriteBuilder;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
@@ -26,6 +27,7 @@
  * {@link #newWriteBuilder(CaseInsensitiveStringMap)} that is used to create a write
  * for batch or streaming.
  */
+@Experimental
 public interface SupportsWrite extends Table {
 
   /**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/Table.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/Table.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java
index 21e1e8c533f3b..a493736c77277 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/Table.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.catalog.v2.expressions.Transform;
+import org.apache.spark.sql.connector.expressions.Transform;
 import org.apache.spark.sql.types.StructType;
 
 import java.util.Collections;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
similarity index 89%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
index eccf2892b039b..9765118a8dbf7 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableCapability.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Experimental;
 
@@ -24,7 +24,7 @@
  * <p>
  * Tables use {@link Table#capabilities()} to return a set of capabilities. Each capability signals
  * to Spark that the table supports a feature identified by the capability. For example, returning
- * {@code BATCH_READ} allows Spark to read from the table using a batch scan.
+ * {@link #BATCH_READ} allows Spark to read from the table using a batch scan.
  */
 @Experimental
 public enum TableCapability {
@@ -66,7 +66,7 @@ public enum TableCapability {
    * <p>
    * Truncating a table removes all existing rows.
    * <p>
-   * See {@code org.apache.spark.sql.sources.v2.writer.SupportsTruncate}.
+   * See {@link org.apache.spark.sql.connector.write.SupportsTruncate}.
    */
   TRUNCATE,
 
@@ -74,7 +74,7 @@ public enum TableCapability {
    * Signals that the table can replace existing data that matches a filter with appended data in
    * a write operation.
    * <p>
-   * See {@code org.apache.spark.sql.sources.v2.writer.SupportsOverwrite}.
+   * See {@link org.apache.spark.sql.connector.write.SupportsOverwrite}.
    */
   OVERWRITE_BY_FILTER,
 
@@ -82,7 +82,7 @@ public enum TableCapability {
    * Signals that the table can dynamically replace existing data partitions with appended data in
    * a write operation.
    * <p>
-   * See {@code org.apache.spark.sql.sources.v2.writer.SupportsDynamicOverwrite}.
+   * See {@link org.apache.spark.sql.connector.write.SupportsDynamicOverwrite}.
    */
   OVERWRITE_DYNAMIC,
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableCatalog.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index 4775b58edf049..b72f7845a6ea5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
-import org.apache.spark.sql.catalog.v2.expressions.Transform;
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.Transform;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
-import org.apache.spark.sql.sources.v2.Table;
 import org.apache.spark.sql.types.StructType;
 
 import java.util.Map;
@@ -35,6 +35,7 @@
  * table schema when updating, renaming, or dropping existing columns when catalyst analysis is case
  * insensitive.
  */
+@Experimental
 public interface TableCatalog extends CatalogPlugin {
   /**
    * List the tables in a namespace from the catalog.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
similarity index 77%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
index 7eef6aea88120..20c22388b0ef9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/TableChange.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
@@ -15,10 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
+import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.types.DataType;
 
+import java.util.Arrays;
+import java.util.Objects;
+
 /**
  * TableChange subclasses represent requested changes to a table. These are passed to
  * {@link TableCatalog#alterTable}. For example,
@@ -32,6 +36,7 @@
  *     )
  * </pre>
  */
+@Experimental
 public interface TableChange {
 
   /**
@@ -208,6 +213,20 @@ public String property() {
     public String value() {
       return value;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      SetProperty that = (SetProperty) o;
+      return property.equals(that.property) &&
+        value.equals(that.value);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(property, value);
+    }
   }
 
   /**
@@ -225,6 +244,19 @@ private RemoveProperty(String property) {
     public String property() {
       return property;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      RemoveProperty that = (RemoveProperty) o;
+      return property.equals(that.property);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(property);
+    }
   }
 
   interface ColumnChange extends TableChange {
@@ -267,6 +299,24 @@ public boolean isNullable() {
     public String comment() {
       return comment;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      AddColumn addColumn = (AddColumn) o;
+      return isNullable == addColumn.isNullable &&
+        Arrays.equals(fieldNames, addColumn.fieldNames) &&
+        dataType.equals(addColumn.dataType) &&
+        comment.equals(addColumn.comment);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(dataType, isNullable, comment);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
+    }
   }
 
   /**
@@ -294,6 +344,22 @@ public String[] fieldNames() {
     public String newName() {
       return newName;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      RenameColumn that = (RenameColumn) o;
+      return Arrays.equals(fieldNames, that.fieldNames) &&
+        newName.equals(that.newName);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(newName);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
+    }
   }
 
   /**
@@ -326,6 +392,23 @@ public DataType newDataType() {
     public boolean isNullable() {
       return isNullable;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      UpdateColumnType that = (UpdateColumnType) o;
+      return isNullable == that.isNullable &&
+        Arrays.equals(fieldNames, that.fieldNames) &&
+        newDataType.equals(that.newDataType);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(newDataType, isNullable);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
+    }
   }
 
   /**
@@ -352,6 +435,22 @@ public String[] fieldNames() {
     public String newComment() {
       return newComment;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      UpdateColumnComment that = (UpdateColumnComment) o;
+      return Arrays.equals(fieldNames, that.fieldNames) &&
+        newComment.equals(that.newComment);
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(newComment);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
+    }
   }
 
   /**
@@ -370,6 +469,19 @@ private DeleteColumn(String[] fieldNames) {
     public String[] fieldNames() {
       return fieldNames;
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      DeleteColumn that = (DeleteColumn) o;
+      return Arrays.equals(fieldNames, that.fieldNames);
+    }
+
+    @Override
+    public int hashCode() {
+      return Arrays.hashCode(fieldNames);
+    }
   }
 
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java
similarity index 98%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java
index 1d37ff042bd33..e9fd87d0e2d40 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/TableProvider.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableProvider.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.types.StructType;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java
index 1e2aca9556df4..807731ac22948 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions;
+package org.apache.spark.sql.connector.expressions;
 
 import org.apache.spark.annotation.Experimental;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
similarity index 99%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
index 7b264e7480e17..bf2a1bab7bf4a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expressions.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions;
+package org.apache.spark.sql.connector.expressions;
 
 import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.types.DataType;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Literal.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Literal.java
index e41bcf9000c52..942744bf6a5d3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Literal.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions;
+package org.apache.spark.sql.connector.expressions;
 
 import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.types.DataType;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NamedReference.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NamedReference.java
index c71ffbe70651f..e6e5fa59c2499 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NamedReference.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions;
+package org.apache.spark.sql.connector.expressions;
 
 import org.apache.spark.annotation.Experimental;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Transform.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Transform.java
index c85e0c412f1ab..edac6df8a4d3e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Transform.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions;
+package org.apache.spark.sql.connector.expressions;
 
 import org.apache.spark.annotation.Experimental;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Batch.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Batch.java
index 28d80b7a5bc3f..09592598a82f9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Batch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Batch.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/InputPartition.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/InputPartition.java
index 413349782efa2..bedf30803f73f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/InputPartition.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/InputPartition.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import java.io.Serializable;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReader.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReader.java
index 2945925959538..254ee56501b9a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReader.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReader.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import java.io.Closeable;
 import java.io.IOException;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReaderFactory.java
similarity index 98%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReaderFactory.java
index 97f4a473953fc..7fef69ce2a5b8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/PartitionReaderFactory.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/PartitionReaderFactory.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import java.io.Serializable;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
similarity index 93%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
index c3964e2176d4f..c1584a58c117f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Scan.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousStream;
-import org.apache.spark.sql.sources.v2.reader.streaming.MicroBatchStream;
+import org.apache.spark.sql.connector.read.streaming.ContinuousStream;
+import org.apache.spark.sql.connector.read.streaming.MicroBatchStream;
 import org.apache.spark.sql.types.StructType;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableCapability;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableCapability;
 
 /**
  * A logical representation of a data source scan. This interface is used to provide logical
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/ScanBuilder.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/ScanBuilder.java
index d4bc1ff977132..ca2cd59b2e442 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/ScanBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/ScanBuilder.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Statistics.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Statistics.java
index a0b194a41f585..42778b4e28a1a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/Statistics.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Statistics.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import java.util.OptionalLong;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownFilters.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownFilters.java
index f10fd884daabe..bee9e5508ca66 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownFilters.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownFilters.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.sources.Filter;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownRequiredColumns.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownRequiredColumns.java
index 862bd14bffede..97143686d3efc 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsPushDownRequiredColumns.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownRequiredColumns.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.types.StructType;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportPartitioning.java
similarity index 91%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportPartitioning.java
index 4ce97bc5e76b5..5bbc191730391 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportPartitioning.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportPartitioning.java
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.partitioning.Partitioning;
+import org.apache.spark.sql.connector.read.partitioning.Partitioning;
 
 /**
  * A mix in interface for {@link Scan}. Data sources can implement this interface to
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportStatistics.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportStatistics.java
index d7364af69e89c..3cd5ef60dc533 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/SupportsReportStatistics.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportStatistics.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader;
+package org.apache.spark.sql.connector.read;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/ClusteredDistribution.java
similarity index 91%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/ClusteredDistribution.java
index 1cdc02f5736b1..23d3c6b563d1c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/ClusteredDistribution.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/ClusteredDistribution.java
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.partitioning;
+package org.apache.spark.sql.connector.read.partitioning;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReader;
 
 /**
  * A concrete implementation of {@link Distribution}. Represents a distribution where records that
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Distribution.java
similarity index 93%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Distribution.java
index 02b0e68974919..fd397c760f05a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Distribution.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Distribution.java
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.partitioning;
+package org.apache.spark.sql.connector.read.partitioning;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReader;
 
 /**
  * An interface to represent data distribution requirement, which specifies how the records should
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Partitioning.java
similarity index 90%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Partitioning.java
index c7370eb3d38af..f0b9410ee6e61 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/partitioning/Partitioning.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Partitioning.java
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.partitioning;
+package org.apache.spark.sql.connector.read.partitioning;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.SupportsReportPartitioning;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.SupportsReportPartitioning;
 
 /**
  * An interface to represent the output data partitioning for a data source, which is returned by
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java
similarity index 92%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java
index c7f6fce6e81af..8bd5273bb7d8e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReader.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReader.java
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReader;
 
 /**
  * A variation on {@link PartitionReader} for use with continuous streaming processing.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java
similarity index 81%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java
index 41195befe5e57..962864da4aad8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousPartitionReaderFactory.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousPartitionReaderFactory.java
@@ -15,18 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.catalyst.InternalRow;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 
 /**
  * A variation on {@link PartitionReaderFactory} that returns {@link ContinuousPartitionReader}
- * instead of {@link org.apache.spark.sql.sources.v2.reader.PartitionReader}. It's used for
- * continuous streaming processing.
+ * instead of {@link PartitionReader}. It's used for continuous streaming processing.
  */
 @Evolving
 public interface ContinuousPartitionReaderFactory extends PartitionReaderFactory {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java
similarity index 94%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java
index fff5b95a4de14..ee01a2553ae7a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/ContinuousStream.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ContinuousStream.java
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.Scan;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.Scan;
 
 /**
  * A {@link SparkDataStream} for streaming queries with continuous mode.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java
similarity index 86%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java
index 330f07ba4f2f8..ceab0f75734d3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/MicroBatchStream.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/MicroBatchStream.java
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.PartitionReader;
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
-import org.apache.spark.sql.sources.v2.reader.Scan;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.Scan;
 
 /**
  * A {@link SparkDataStream} for streaming queries with micro-batch mode.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java
index 1d34fdd1c28ab..400de2a659746 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/Offset.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/Offset.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java
index 2c97d924a0629..35ad3bbde5cbf 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/PartitionOffset.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/PartitionOffset.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import java.io.Serializable;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java
index 2068a84fc6bb1..1ba0c25ef4466 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/reader/streaming/SparkDataStream.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/SparkDataStream.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.reader.streaming;
+package org.apache.spark.sql.connector.read.streaming;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/BatchWrite.java
similarity index 98%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/BatchWrite.java
index 91297759971b5..37c5539d2518f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/BatchWrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/BatchWrite.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import org.apache.spark.annotation.Evolving;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriter.java
similarity index 99%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriter.java
index 11228ad1ea672..eefe784dede43 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriter.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import java.io.IOException;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriterFactory.java
similarity index 98%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriterFactory.java
index bf2db9059b088..bcf8d8a59e5e5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/DataWriterFactory.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DataWriterFactory.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import java.io.Serializable;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDynamicOverwrite.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDynamicOverwrite.java
index 8058964b662bd..90e668d1935b1 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsDynamicOverwrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDynamicOverwrite.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 /**
  * Write builder trait for tables that support dynamic partition overwrite.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwrite.java
similarity index 97%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwrite.java
index b443b3c3aeb4a..6063a155ee209 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsOverwrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwrite.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import org.apache.spark.sql.sources.AlwaysTrue$;
 import org.apache.spark.sql.sources.Filter;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsTruncate.java
similarity index 96%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsTruncate.java
index 69c2ba5e01a49..15a68375032d2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/SupportsTruncate.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsTruncate.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 /**
  * Write builder trait for tables that support truncation.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
similarity index 93%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
index 158066d402922..f26304e8db7b6 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriteBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriteBuilder.java
@@ -15,12 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableCapability;
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableCapability;
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite;
 import org.apache.spark.sql.types.StructType;
 
 /**
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriterCommitMessage.java
similarity index 92%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriterCommitMessage.java
index 23e8580c404d4..823f955bffb53 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/WriterCommitMessage.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/WriterCommitMessage.java
@@ -15,12 +15,12 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer;
+package org.apache.spark.sql.connector.write;
 
 import java.io.Serializable;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite;
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite;
 
 /**
  * A commit message returned by {@link DataWriter#commit()} and will be sent back to the driver side
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java
index af2f03c9d4192..daaa18d5bc4e7 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingDataWriterFactory.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingDataWriterFactory.java
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer.streaming;
+package org.apache.spark.sql.connector.write.streaming;
 
 import java.io.Serializable;
 
 import org.apache.spark.TaskContext;
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.catalyst.InternalRow;
-import org.apache.spark.sql.sources.v2.writer.DataWriter;
+import org.apache.spark.sql.connector.write.DataWriter;
 
 /**
  * A factory of {@link DataWriter} returned by
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
similarity index 95%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
index 5617f1cdc0efc..0821b34891654 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/writer/streaming/StreamingWrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/streaming/StreamingWrite.java
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer.streaming;
+package org.apache.spark.sql.connector.write.streaming;
 
 import org.apache.spark.annotation.Evolving;
-import org.apache.spark.sql.sources.v2.writer.DataWriter;
-import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage;
+import org.apache.spark.sql.connector.write.DataWriter;
+import org.apache.spark.sql.connector.write.WriterCommitMessage;
 
 /**
  * An interface that defines how to write the data to data source in streaming queries.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogManager.scala
deleted file mode 100644
index d5a6a61f82579..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogManager.scala
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalog.v2
-
-import scala.collection.mutable
-import scala.util.control.NonFatal
-
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.internal.SQLConf
-
-/**
- * A thread-safe manager for [[CatalogPlugin]]s. It tracks all the registered catalogs, and allow
- * the caller to look up a catalog by name.
- */
-class CatalogManager(conf: SQLConf) extends Logging {
-
-  private val catalogs = mutable.HashMap.empty[String, CatalogPlugin]
-
-  def catalog(name: String): CatalogPlugin = synchronized {
-    catalogs.getOrElseUpdate(name, Catalogs.load(name, conf))
-  }
-
-  def defaultCatalog: Option[CatalogPlugin] = {
-    conf.defaultV2Catalog.flatMap { catalogName =>
-      try {
-        Some(catalog(catalogName))
-      } catch {
-        case NonFatal(e) =>
-          logError(s"Cannot load default v2 catalog: $catalogName", e)
-          None
-      }
-    }
-  }
-
-  def v2SessionCatalog: Option[CatalogPlugin] = {
-    try {
-      Some(catalog(CatalogManager.SESSION_CATALOG_NAME))
-    } catch {
-      case NonFatal(e) =>
-        logError("Cannot load v2 session catalog", e)
-        None
-    }
-  }
-
-  private def getDefaultNamespace(c: CatalogPlugin) = c match {
-    case c: SupportsNamespaces => c.defaultNamespace()
-    case _ => Array.empty[String]
-  }
-
-  private var _currentNamespace: Option[Array[String]] = None
-
-  def currentNamespace: Array[String] = synchronized {
-    _currentNamespace.getOrElse {
-      currentCatalog.map { catalogName =>
-        getDefaultNamespace(catalog(catalogName))
-      }.getOrElse(Array("default")) // The builtin catalog use "default" as the default database.
-    }
-  }
-
-  def setCurrentNamespace(namespace: Array[String]): Unit = synchronized {
-    _currentNamespace = Some(namespace)
-  }
-
-  private var _currentCatalog: Option[String] = None
-
-  // Returns the name of current catalog. None means the current catalog is the builtin catalog.
-  def currentCatalog: Option[String] = synchronized {
-    _currentCatalog.orElse(conf.defaultV2Catalog)
-  }
-
-  def setCurrentCatalog(catalogName: String): Unit = synchronized {
-    _currentCatalog = Some(catalogName)
-    _currentNamespace = None
-  }
-
-  // Clear all the registered catalogs. Only used in tests.
-  private[sql] def reset(): Unit = synchronized {
-    catalogs.clear()
-    _currentNamespace = None
-    _currentCatalog = None
-  }
-}
-
-object CatalogManager {
-  val SESSION_CATALOG_NAME: String = "session"
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index 488252aa0c7b5..34d2f45e715e9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -344,7 +344,7 @@ object CatalystTypeConverters {
   private class DecimalConverter(dataType: DecimalType)
     extends CatalystTypeConverter[Any, JavaBigDecimal, Decimal] {
 
-    private val nullOnOverflow = SQLConf.get.decimalOperationsNullOnOverflow
+    private val nullOnOverflow = !SQLConf.get.ansiEnabled
 
     override def toCatalystImpl(scalaValue: Any): Decimal = {
       val decimal = scalaValue match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
index bdab407688a65..1bb13e3368c02 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
@@ -56,7 +56,7 @@ abstract class InternalRow extends SpecializedGetters with Serializable {
    * Note: In order to support update decimal with precision > 18 in UnsafeRow,
    * CAN NOT call setNullAt() for decimal column on UnsafeRow, call setDecimal(i, null, precision).
    */
-  def setDecimal(i: Int, value: Decimal, precision: Int) { update(i, value) }
+  def setDecimal(i: Int, value: Decimal, precision: Int): Unit = update(i, value)
 
   /**
    * Make a copy of the current [[InternalRow]] object.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 2a4e84241a941..ceb3155500385 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -72,7 +72,7 @@ object ScalaReflection extends ScalaReflection {
   /**
    * Synchronize to prevent concurrent usage of `<:<` operator.
    * This operator is not thread safe in any current version of scala; i.e.
-   * (2.11.12, 2.12.8, 2.13.0-M5).
+   * (2.11.12, 2.12.10, 2.13.0-M5).
    *
    * See https://github.com/scala/bug/issues/10766
    */
@@ -906,7 +906,18 @@ trait ScalaReflection extends Logging {
    * only defines a constructor via `apply` method.
    */
   private def getCompanionConstructor(tpe: Type): Symbol = {
-    tpe.typeSymbol.asClass.companion.asTerm.typeSignature.member(universe.TermName("apply"))
+    def throwUnsupportedOperation = {
+      throw new UnsupportedOperationException(s"Unable to find constructor for $tpe. " +
+        s"This could happen if $tpe is an interface, or a trait without companion object " +
+        "constructor.")
+    }
+    tpe.typeSymbol.asClass.companion match {
+      case NoSymbol => throwUnsupportedOperation
+      case sym => sym.asTerm.typeSignature.member(universe.TermName("apply")) match {
+        case NoSymbol => throwUnsupportedOperation
+        case constructorSym => constructorSym
+      }
+    }
   }
 
   protected def constructParams(tpe: Type): Seq[Symbol] = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
index 75c278e781140..026ff6f2983fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
@@ -26,7 +26,7 @@ import org.apache.spark.unsafe.types.UTF8String
 
 object SerializerBuildHelper {
 
-  private def nullOnOverflow: Boolean = SQLConf.get.decimalOperationsNullOnOverflow
+  private def nullOnOverflow: Boolean = !SQLConf.get.ansiEnabled
 
   def createSerializerForBoolean(inputObject: Expression): Expression = {
     Invoke(inputObject, "booleanValue", BooleanType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
index f5e9a146bf359..7e5d56a7d1196 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
-import org.apache.spark.sql.catalog.v2.Identifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.connector.catalog.Identifier
 
 /**
  * Thrown by a catalog when an item already exists. The analyzer will rethrow the exception
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6f17256f8163e..4fb713b8108c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import java.util
 import java.util.Locale
 
 import scala.collection.mutable
@@ -24,8 +25,6 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2._
-import org.apache.spark.sql.catalog.v2.expressions.{FieldReference, IdentityTransform}
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
@@ -39,12 +38,13 @@ import org.apache.spark.sql.catalyst.plans.logical.sql._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.catalyst.util.toPrettySQL
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, Table, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy}
-import org.apache.spark.sql.sources.v2.Table
-import org.apache.spark.sql.sources.v2.internal.V1Table
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and [[EmptyFunctionRegistry]].
@@ -52,14 +52,35 @@ import org.apache.spark.sql.types._
  * to resolve attribute references.
  */
 object SimpleAnalyzer extends Analyzer(
-  new SessionCatalog(
-    new InMemoryCatalog,
-    EmptyFunctionRegistry,
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) {
-    override def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean) {}
-  },
+  new CatalogManager(
+    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true),
+    FakeV2SessionCatalog,
+    new SessionCatalog(
+      new InMemoryCatalog,
+      EmptyFunctionRegistry,
+      new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)) {
+      override def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = {}
+    }),
   new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
 
+object FakeV2SessionCatalog extends TableCatalog {
+  private def fail() = throw new UnsupportedOperationException
+  override def listTables(namespace: Array[String]): Array[Identifier] = fail()
+  override def loadTable(ident: Identifier): Table = {
+    throw new NoSuchTableException(ident.toString)
+  }
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = fail()
+  override def alterTable(ident: Identifier, changes: TableChange*): Table = fail()
+  override def dropTable(ident: Identifier): Boolean = fail()
+  override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = fail()
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = fail()
+  override def name(): String = CatalogManager.SESSION_CATALOG_NAME
+}
+
 /**
  * Provides a way to keep state during the analysis, this enables us to decouple the concerns
  * of analysis environment from the catalog.
@@ -100,16 +121,26 @@ object AnalysisContext {
  * [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]].
  */
 class Analyzer(
-    catalog: SessionCatalog,
+    override val catalogManager: CatalogManager,
     conf: SQLConf,
     maxIterations: Int)
   extends RuleExecutor[LogicalPlan] with CheckAnalysis with LookupCatalog {
 
+  private val catalog: SessionCatalog = catalogManager.v1SessionCatalog
+
+  override def isView(nameParts: Seq[String]): Boolean = catalog.isView(nameParts)
+
+  // Only for tests.
   def this(catalog: SessionCatalog, conf: SQLConf) = {
-    this(catalog, conf, conf.optimizerMaxIterations)
+    this(
+      new CatalogManager(conf, FakeV2SessionCatalog, catalog),
+      conf,
+      conf.optimizerMaxIterations)
   }
 
-  override val catalogManager: CatalogManager = new CatalogManager(conf)
+  def this(catalogManager: CatalogManager, conf: SQLConf) = {
+    this(catalogManager, conf, conf.optimizerMaxIterations)
+  }
 
   def executeAndCheck(plan: LogicalPlan, tracker: QueryPlanningTracker): LogicalPlan = {
     AnalysisHelper.markInAnalyzer {
@@ -156,8 +187,7 @@ class Analyzer(
   lazy val batches: Seq[Batch] = Seq(
     Batch("Hints", fixedPoint,
       new ResolveHints.ResolveJoinStrategyHints(conf),
-      ResolveHints.ResolveCoalesceHints,
-      new ResolveHints.RemoveAllHints(conf)),
+      ResolveHints.ResolveCoalesceHints),
     Batch("Simple Sanity Check", Once,
       LookupFunctions),
     Batch("Substitution", fixedPoint,
@@ -167,8 +197,7 @@ class Analyzer(
       new SubstituteUnresolvedOrdinals(conf)),
     Batch("Resolution", fixedPoint,
       ResolveTableValuedFunctions ::
-      ResolveAlterTable ::
-      ResolveDescribeTable ::
+      new ResolveCatalogs(catalogManager) ::
       ResolveInsertInto ::
       ResolveTables ::
       ResolveRelations ::
@@ -203,7 +232,10 @@ class Analyzer(
       ResolveRandomSeed ::
       TypeCoercion.typeCoercionRules(conf) ++
       extendedResolutionRules : _*),
+    Batch("PostgreSQL Dialect", Once, PostgreSQLDialect.postgreSQLDialectRules: _*),
     Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*),
+    Batch("Remove Unresolved Hints", Once,
+      new ResolveHints.RemoveAllHints(conf)),
     Batch("Nondeterministic", Once,
       PullOutNondeterministic),
     Batch("UDF", Once,
@@ -642,11 +674,18 @@ class Analyzer(
   object ResolveTables extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
       case u: UnresolvedRelation =>
-        val v2TableOpt = lookupV2Relation(u.multipartIdentifier) match {
-          case scala.Left((_, _, tableOpt)) => tableOpt
-          case scala.Right(tableOpt) => tableOpt
-        }
-        v2TableOpt.map(DataSourceV2Relation.create).getOrElse(u)
+        lookupV2Relation(u.multipartIdentifier)
+          .getOrElse(u)
+
+      case i @ InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) if i.query.resolved =>
+        lookupV2Relation(u.multipartIdentifier)
+          .map(v2Relation => i.copy(table = v2Relation))
+          .getOrElse(i)
+
+      case u: UnresolvedV2Relation =>
+        CatalogV2Util.loadTable(u.catalog, u.tableName).map { table =>
+          DataSourceV2Relation.create(table)
+        }.getOrElse(u)
     }
   }
 
@@ -717,7 +756,7 @@ class Analyzer(
     }
 
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
-      case i @ InsertIntoTable(u @ UnresolvedRelation(AsTableIdentifier(ident)), _, child, _, _)
+      case i @ InsertIntoStatement(u @ UnresolvedRelation(AsTableIdentifier(ident)), _, child, _, _)
           if child.resolved =>
         EliminateSubqueryAliases(lookupTableFromCatalog(ident, u)) match {
           case v: View =>
@@ -761,41 +800,28 @@ class Analyzer(
 
   object ResolveInsertInto extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-      case i @ InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) if i.query.resolved =>
-        lookupV2Relation(u.multipartIdentifier) match {
-          case scala.Left((_, _, Some(v2Table: Table))) =>
-            resolveV2Insert(i, v2Table)
-          case scala.Right(Some(v2Table: Table)) =>
-            resolveV2Insert(i, v2Table)
-          case _ =>
-            InsertIntoTable(i.table, i.partitionSpec, i.query, i.overwrite, i.ifPartitionNotExists)
+      case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _) if i.query.resolved =>
+        // ifPartitionNotExists is append with validation, but validation is not supported
+        if (i.ifPartitionNotExists) {
+          throw new AnalysisException(
+            s"Cannot write, IF NOT EXISTS is not supported for table: ${r.table.name}")
         }
-    }
-
-    private def resolveV2Insert(i: InsertIntoStatement, table: Table): LogicalPlan = {
-      val relation = DataSourceV2Relation.create(table)
-      // ifPartitionNotExists is append with validation, but validation is not supported
-      if (i.ifPartitionNotExists) {
-        throw new AnalysisException(
-          s"Cannot write, IF NOT EXISTS is not supported for table: ${relation.table.name}")
-      }
 
-      val partCols = partitionColumnNames(relation.table)
-      validatePartitionSpec(partCols, i.partitionSpec)
+        val partCols = partitionColumnNames(r.table)
+        validatePartitionSpec(partCols, i.partitionSpec)
 
-      val staticPartitions = i.partitionSpec.filter(_._2.isDefined).mapValues(_.get)
-      val query = addStaticPartitionColumns(relation, i.query, staticPartitions)
-      val dynamicPartitionOverwrite = partCols.size > staticPartitions.size &&
-        conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC
+        val staticPartitions = i.partitionSpec.filter(_._2.isDefined).mapValues(_.get)
+        val query = addStaticPartitionColumns(r, i.query, staticPartitions)
+        val dynamicPartitionOverwrite = partCols.size > staticPartitions.size &&
+          conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC
 
-      if (!i.overwrite) {
-        AppendData.byPosition(relation, query)
-      } else if (dynamicPartitionOverwrite) {
-        OverwritePartitionsDynamic.byPosition(relation, query)
-      } else {
-        OverwriteByExpression.byPosition(
-          relation, query, staticDeleteExpression(relation, staticPartitions))
-      }
+        if (!i.overwrite) {
+          AppendData.byPosition(r, query)
+        } else if (dynamicPartitionOverwrite) {
+          OverwritePartitionsDynamic.byPosition(r, query)
+        } else {
+          OverwriteByExpression.byPosition(r, query, staticDeleteExpression(r, staticPartitions))
+        }
     }
 
     private def partitionColumnNames(table: Table): Seq[String] = {
@@ -890,95 +916,6 @@ class Analyzer(
     }
   }
 
-  /**
-   * Resolve ALTER TABLE statements that use a DSv2 catalog.
-   *
-   * This rule converts unresolved ALTER TABLE statements to v2 when a v2 catalog is responsible
-   * for the table identifier. A v2 catalog is responsible for an identifier when the identifier
-   * has a catalog specified, like prod_catalog.db.table, or when a default v2 catalog is set and
-   * the table identifier does not include a catalog.
-   */
-  object ResolveAlterTable extends Rule[LogicalPlan] {
-    import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
-    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-      case alter @ AlterTableAddColumnsStatement(tableName, cols) =>
-        val changes = cols.map { col =>
-          TableChange.addColumn(col.name.toArray, col.dataType, true, col.comment.orNull)
-        }
-        resolveV2Alter(tableName, changes).getOrElse(alter)
-
-      case alter @ AlterTableAlterColumnStatement(tableName, colName, dataType, comment) =>
-        val typeChange = dataType.map { newDataType =>
-          TableChange.updateColumnType(colName.toArray, newDataType, true)
-        }
-
-        val commentChange = comment.map { newComment =>
-          TableChange.updateColumnComment(colName.toArray, newComment)
-        }
-
-        resolveV2Alter(tableName, typeChange.toSeq ++ commentChange.toSeq).getOrElse(alter)
-
-      case alter @ AlterTableRenameColumnStatement(tableName, col, newName) =>
-        val changes = Seq(TableChange.renameColumn(col.toArray, newName))
-        resolveV2Alter(tableName, changes).getOrElse(alter)
-
-      case alter @ AlterTableDropColumnsStatement(tableName, cols) =>
-        val changes = cols.map(col => TableChange.deleteColumn(col.toArray))
-        resolveV2Alter(tableName, changes).getOrElse(alter)
-
-      case alter @ AlterTableSetPropertiesStatement(tableName, props) =>
-        val changes = props.map { case (key, value) =>
-          TableChange.setProperty(key, value)
-        }
-
-        resolveV2Alter(tableName, changes.toSeq).getOrElse(alter)
-
-      case alter @ AlterTableUnsetPropertiesStatement(tableName, keys, _) =>
-        resolveV2Alter(tableName, keys.map(key => TableChange.removeProperty(key))).getOrElse(alter)
-
-      case alter @ AlterTableSetLocationStatement(tableName, newLoc) =>
-        resolveV2Alter(tableName, Seq(TableChange.setProperty("location", newLoc))).getOrElse(alter)
-    }
-
-    private def resolveV2Alter(
-        tableName: Seq[String],
-        changes: Seq[TableChange]): Option[AlterTable] = {
-      lookupV2Relation(tableName) match {
-        case scala.Left((v2Catalog, ident, tableOpt)) =>
-          Some(AlterTable(
-            v2Catalog.asTableCatalog,
-            ident,
-            tableOpt.map(DataSourceV2Relation.create).getOrElse(UnresolvedRelation(tableName)),
-            changes
-          ))
-        case scala.Right(tableOpt) =>
-          tableOpt.map { table =>
-            AlterTable(
-              sessionCatalog.get.asTableCatalog, // table being resolved means this exists
-              Identifier.of(tableName.init.toArray, tableName.last),
-              DataSourceV2Relation.create(table),
-              changes
-            )
-          }
-      }
-    }
-  }
-  /**
-   * Resolve DESCRIBE TABLE statements that use a DSv2 catalog.
-   *
-   * This rule converts unresolved DESCRIBE TABLE statements to v2 when a v2 catalog is responsible
-   * for the table identifier. A v2 catalog is responsible for an identifier when the identifier
-   * has a catalog specified, like prod_catalog.db.table, or when a default v2 catalog is set and
-   * the table identifier does not include a catalog.
-   */
-  object ResolveDescribeTable extends Rule[LogicalPlan] {
-    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-      case describe @ DescribeTableStatement(
-          CatalogObjectIdentifier(Some(v2Catalog), ident), _, isExtended) =>
-        DescribeTable(UnresolvedRelation(describe.tableName), isExtended)
-    }
-  }
-
   /**
    * Replaces [[UnresolvedAttribute]]s with concrete [[AttributeReference]]s from
    * a logical plan node's children.
@@ -1171,6 +1108,12 @@ class Analyzer(
       // To resolve duplicate expression IDs for Join and Intersect
       case j @ Join(left, right, _, _, _) if !j.duplicateResolved =>
         j.copy(right = dedupRight(left, right))
+      case f @ FlatMapCoGroupsInPandas(leftAttributes, rightAttributes, _, _, left, right) =>
+        val leftRes = leftAttributes
+          .map(x => resolveExpressionBottomUp(x, left).asInstanceOf[Attribute])
+        val rightRes = rightAttributes
+          .map(x => resolveExpressionBottomUp(x, right).asInstanceOf[Attribute])
+        f.copy(leftAttributes = leftRes, rightAttributes = rightRes)
       // intersect/except will be rewritten to join at the begininng of optimizer. Here we need to
       // deduplicate the right side plan, so that we won't produce an invalid self-join later.
       case i @ Intersect(left, right, _) if !i.duplicateResolved =>
@@ -2558,7 +2501,7 @@ class Analyzer(
    */
   object ResolveOutputRelation extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
-      case append @ AppendData(table, query, isByName)
+      case append @ AppendData(table, query, _, isByName)
           if table.resolved && query.resolved && !append.outputResolved =>
         val projection =
           TableOutputResolver.resolveOutputColumns(
@@ -2570,7 +2513,7 @@ class Analyzer(
           append
         }
 
-      case overwrite @ OverwriteByExpression(table, _, query, isByName)
+      case overwrite @ OverwriteByExpression(table, _, query, _, isByName)
           if table.resolved && query.resolved && !overwrite.outputResolved =>
         val projection =
           TableOutputResolver.resolveOutputColumns(
@@ -2582,7 +2525,7 @@ class Analyzer(
           overwrite
         }
 
-      case overwrite @ OverwritePartitionsDynamic(table, query, isByName)
+      case overwrite @ OverwritePartitionsDynamic(table, query, _, isByName)
           if table.resolved && query.resolved && !overwrite.outputResolved =>
         val projection =
           TableOutputResolver.resolveOutputColumns(
@@ -2814,36 +2757,35 @@ class Analyzer(
 
   /**
    * Performs the lookup of DataSourceV2 Tables. The order of resolution is:
-   *   1. Check if this relation is a temporary table
-   *   2. Check if it has a catalog identifier. Here we try to load the table. If we find the table,
-   *      we can return the table. The result returned by an explicit catalog will be returned on
-   *      the Left projection of the Either.
-   *   3. Try resolving the relation using the V2SessionCatalog if that is defined. If the
-   *      V2SessionCatalog returns a V1 table definition (UnresolvedTable), then we return a `None`
-   *      on the right side so that we can fallback to the V1 code paths.
-   * The basic idea is, if a value is returned on the Left, it means a v2 catalog is defined and
-   * must be used to resolve the table. If a value is returned on the right, then we can try
-   * creating a V2 relation if a V2 Table is defined. If it isn't defined, then we should defer
-   * to V1 code paths.
+   *   1. Check if this relation is a temporary table.
+   *   2. Check if it has a catalog identifier. Here we try to load the table.
+   *      If we find the table, return the v2 relation and catalog.
+   *   3. Try resolving the relation using the V2SessionCatalog if that is defined.
+   *      If the V2SessionCatalog returns a V1 table definition,
+   *      return `None` so that we can fallback to the V1 code paths.
+   *      If the V2SessionCatalog returns a V2 table, return the v2 relation and V2SessionCatalog.
    */
-  private def lookupV2Relation(
-      identifier: Seq[String]
-      ): Either[(CatalogPlugin, Identifier, Option[Table]), Option[Table]] = {
-    import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util._
-
+  private def lookupV2RelationAndCatalog(
+      identifier: Seq[String]): Option[(DataSourceV2Relation, CatalogPlugin, Identifier)] =
     identifier match {
-      case AsTemporaryViewIdentifier(ti) if catalog.isTemporaryTable(ti) =>
-        scala.Right(None)
+      case AsTemporaryViewIdentifier(ti) if catalog.isTemporaryTable(ti) => None
       case CatalogObjectIdentifier(Some(v2Catalog), ident) =>
-        scala.Left((v2Catalog, ident, loadTable(v2Catalog, ident)))
+        CatalogV2Util.loadTable(v2Catalog, ident) match {
+          case Some(table) => Some((DataSourceV2Relation.create(table), v2Catalog, ident))
+          case None => None
+        }
       case CatalogObjectIdentifier(None, ident) =>
-        catalogManager.v2SessionCatalog.flatMap(loadTable(_, ident)) match {
-          case Some(_: V1Table) => scala.Right(None)
-          case other => scala.Right(other)
+        CatalogV2Util.loadTable(catalogManager.v2SessionCatalog, ident) match {
+          case Some(_: V1Table) => None
+          case Some(table) =>
+            Some((DataSourceV2Relation.create(table), catalogManager.v2SessionCatalog, ident))
+          case None => None
         }
-      case _ => scala.Right(None)
+      case _ => None
     }
-  }
+
+  private def lookupV2Relation(identifier: Seq[String]): Option[DataSourceV2Relation] =
+    lookupV2RelationAndCatalog(identifier).map(_._1)
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
index 3036f7c21093f..123d8aff25fb9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
@@ -19,7 +19,7 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.Identifier
+import org.apache.spark.sql.connector.catalog.Identifier
 
 class CannotReplaceMissingTableException(
     tableIdentifier: Identifier,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 30e0117423f75..db4ed47fa54c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -17,16 +17,15 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnType}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.optimizer.BooleanSimplification
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.sql.AlterTableStatement
+import org.apache.spark.sql.catalyst.plans.logical.sql.InsertIntoStatement
+import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, DeleteColumn, RenameColumn, UpdateColumnComment, UpdateColumnType}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -35,7 +34,9 @@ import org.apache.spark.sql.types._
  */
 trait CheckAnalysis extends PredicateHelper {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  protected def isView(nameParts: Seq[String]): Boolean
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   /**
    * Override to provide additional checks for correct analysis.
@@ -94,6 +95,16 @@ trait CheckAnalysis extends PredicateHelper {
       case u: UnresolvedRelation =>
         u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
 
+      case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _) =>
+        failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
+
+      case u: UnresolvedV2Relation if isView(u.originalNameParts) =>
+        u.failAnalysis(
+          s"Invalid command: '${u.originalNameParts.quoted}' is a view not a table.")
+
+      case u: UnresolvedV2Relation =>
+        u.failAnalysis(s"Table not found: ${u.originalNameParts.quoted}")
+
       case operator: LogicalPlan =>
         // Check argument data types of higher-order functions downwards first.
         // If the arguments of the higher-order functions are resolved but the type check fails,
@@ -355,9 +366,6 @@ trait CheckAnalysis extends PredicateHelper {
               case _ =>
             }
 
-          case alter: AlterTableStatement =>
-            alter.failAnalysis(s"Table or view not found: ${alter.tableName.quoted}")
-
           case alter: AlterTable if alter.childrenResolved =>
             val table = alter.table
             def findField(operation: String, fieldName: Array[String]): StructField = {
@@ -491,9 +499,6 @@ trait CheckAnalysis extends PredicateHelper {
             throw new IllegalStateException(
               "Internal error: logical hint operator should have been removed during analysis")
 
-          case InsertIntoTable(u: UnresolvedRelation, _, _, _, _) =>
-            failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
-
           case f @ Filter(condition, _)
             if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty =>
             val invalidExprSqls = PlanHelper.specialExpressionsInUnsupportedOperator(f).map(_.sql)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index 856c2ed828002..a64befecb68d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -82,7 +82,7 @@ object DecimalPrecision extends TypeCoercionRule {
     PromotePrecision(Cast(e, dataType))
   }
 
-  private def nullOnOverflow: Boolean = SQLConf.get.decimalOperationsNullOnOverflow
+  private def nullOnOverflow: Boolean = !SQLConf.get.ansiEnabled
 
   override protected def coerceTypes(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     // fix decimal precision for expressions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
index 7ac8ae61ed537..9b5b059908c00 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
-import org.apache.spark.sql.catalog.v2.Identifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.connector.catalog.Identifier
 
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala
new file mode 100644
index 0000000000000..934e53703e241
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/PostgreSQLDialect.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.expressions.Cast
+import org.apache.spark.sql.catalyst.expressions.postgreSQL.PostgreCastStringToBoolean
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{BooleanType, StringType}
+
+object PostgreSQLDialect {
+  val postgreSQLDialectRules: List[Rule[LogicalPlan]] =
+    CastStringToBoolean ::
+      Nil
+
+  object CastStringToBoolean extends Rule[LogicalPlan] with Logging {
+    override def apply(plan: LogicalPlan): LogicalPlan = {
+      // The SQL configuration `spark.sql.dialect` can be changed in runtime.
+      // To make sure the configuration is effective, we have to check it during rule execution.
+      val conf = SQLConf.get
+      if (conf.usePostgreSQLDialect) {
+        plan.transformExpressions {
+          case Cast(child, dataType, _)
+            if dataType == BooleanType && child.dataType == StringType =>
+            PostgreCastStringToBoolean(child)
+        }
+      } else {
+        plan
+      }
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
new file mode 100644
index 0000000000000..3757569443e74
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.sql._
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, TableChange}
+
+/**
+ * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements
+ * to the corresponding v2 commands if the resolved catalog is not the session catalog.
+ */
+class ResolveCatalogs(val catalogManager: CatalogManager)
+  extends Rule[LogicalPlan] with LookupCatalog {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Util._
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    case AlterTableAddColumnsStatement(
+         nameParts @ NonSessionCatalog(catalog, tableName), cols) =>
+      val changes = cols.map { col =>
+        TableChange.addColumn(col.name.toArray, col.dataType, true, col.comment.orNull)
+      }
+      createAlterTable(nameParts, catalog, tableName, changes)
+
+    case AlterTableAlterColumnStatement(
+         nameParts @ NonSessionCatalog(catalog, tableName), colName, dataType, comment) =>
+      val typeChange = dataType.map { newDataType =>
+        TableChange.updateColumnType(colName.toArray, newDataType, true)
+      }
+      val commentChange = comment.map { newComment =>
+        TableChange.updateColumnComment(colName.toArray, newComment)
+      }
+      createAlterTable(nameParts, catalog, tableName, typeChange.toSeq ++ commentChange)
+
+    case AlterTableRenameColumnStatement(
+         nameParts @ NonSessionCatalog(catalog, tableName), col, newName) =>
+      val changes = Seq(TableChange.renameColumn(col.toArray, newName))
+      createAlterTable(nameParts, catalog, tableName, changes)
+
+    case AlterTableDropColumnsStatement(
+         nameParts @ NonSessionCatalog(catalog, tableName), cols) =>
+      val changes = cols.map(col => TableChange.deleteColumn(col.toArray))
+      createAlterTable(nameParts, catalog, tableName, changes)
+
+    case AlterTableSetPropertiesStatement(
+         nameParts @ NonSessionCatalog(catalog, tableName), props) =>
+      val changes = props.map { case (key, value) =>
+        TableChange.setProperty(key, value)
+      }.toSeq
+      createAlterTable(nameParts, catalog, tableName, changes)
+
+    // TODO: v2 `UNSET TBLPROPERTIES` should respect the ifExists flag.
+    case AlterTableUnsetPropertiesStatement(
+         nameParts @ NonSessionCatalog(catalog, tableName), keys, _) =>
+      val changes = keys.map(key => TableChange.removeProperty(key))
+      createAlterTable(nameParts, catalog, tableName, changes)
+
+    case AlterTableSetLocationStatement(
+         nameParts @ NonSessionCatalog(catalog, tableName), newLoc) =>
+      val changes = Seq(TableChange.setProperty("location", newLoc))
+      createAlterTable(nameParts, catalog, tableName, changes)
+
+    case AlterViewSetPropertiesStatement(
+         NonSessionCatalog(catalog, tableName), props) =>
+      throw new AnalysisException(
+        s"Can not specify catalog `${catalog.name}` for view ${tableName.quoted} " +
+          s"because view support in catalog has not been implemented yet")
+
+    case AlterViewUnsetPropertiesStatement(
+         NonSessionCatalog(catalog, tableName), keys, ifExists) =>
+      throw new AnalysisException(
+        s"Can not specify catalog `${catalog.name}` for view ${tableName.quoted} " +
+          s"because view support in catalog has not been implemented yet")
+
+    case DeleteFromStatement(
+         nameParts @ NonSessionCatalog(catalog, tableName), tableAlias, condition) =>
+      val r = UnresolvedV2Relation(nameParts, catalog.asTableCatalog, tableName.asIdentifier)
+      val aliased = tableAlias.map(SubqueryAlias(_, r)).getOrElse(r)
+      DeleteFromTable(aliased, condition)
+
+    case update: UpdateTableStatement =>
+      throw new AnalysisException(s"UPDATE TABLE is not supported temporarily.")
+
+    case DescribeTableStatement(
+         nameParts @ NonSessionCatalog(catalog, tableName), partitionSpec, isExtended) =>
+      if (partitionSpec.nonEmpty) {
+        throw new AnalysisException("DESCRIBE TABLE does not support partition for v2 tables.")
+      }
+      val r = UnresolvedV2Relation(nameParts, catalog.asTableCatalog, tableName.asIdentifier)
+      DescribeTable(r, isExtended)
+
+    case DescribeColumnStatement(
+         NonSessionCatalog(catalog, tableName), colNameParts, isExtended) =>
+      throw new AnalysisException("Describing columns is not supported for v2 tables.")
+
+    case c @ CreateTableStatement(
+         NonSessionCatalog(catalog, tableName), _, _, _, _, _, _, _, _, _) =>
+      CreateV2Table(
+        catalog.asTableCatalog,
+        tableName.asIdentifier,
+        c.tableSchema,
+        // convert the bucket spec and add it as a transform
+        c.partitioning ++ c.bucketSpec.map(_.asTransform),
+        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        ignoreIfExists = c.ifNotExists)
+
+    case c @ CreateTableAsSelectStatement(
+         NonSessionCatalog(catalog, tableName), _, _, _, _, _, _, _, _, _) =>
+      CreateTableAsSelect(
+        catalog.asTableCatalog,
+        tableName.asIdentifier,
+        // convert the bucket spec and add it as a transform
+        c.partitioning ++ c.bucketSpec.map(_.asTransform),
+        c.asSelect,
+        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        writeOptions = c.options.filterKeys(_ != "path"),
+        ignoreIfExists = c.ifNotExists)
+
+    case c @ ReplaceTableStatement(
+         NonSessionCatalog(catalog, tableName), _, _, _, _, _, _, _, _, _) =>
+      ReplaceTable(
+        catalog.asTableCatalog,
+        tableName.asIdentifier,
+        c.tableSchema,
+        // convert the bucket spec and add it as a transform
+        c.partitioning ++ c.bucketSpec.map(_.asTransform),
+        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        orCreate = c.orCreate)
+
+    case c @ ReplaceTableAsSelectStatement(
+         NonSessionCatalog(catalog, tableName), _, _, _, _, _, _, _, _, _) =>
+      ReplaceTableAsSelect(
+        catalog.asTableCatalog,
+        tableName.asIdentifier,
+        // convert the bucket spec and add it as a transform
+        c.partitioning ++ c.bucketSpec.map(_.asTransform),
+        c.asSelect,
+        convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+        writeOptions = c.options.filterKeys(_ != "path"),
+        orCreate = c.orCreate)
+
+    case DropTableStatement(NonSessionCatalog(catalog, tableName), ifExists, _) =>
+      DropTable(catalog.asTableCatalog, tableName.asIdentifier, ifExists)
+
+    case DropViewStatement(NonSessionCatalog(catalog, viewName), _) =>
+      throw new AnalysisException(
+        s"Can not specify catalog `${catalog.name}` for view ${viewName.quoted} " +
+          s"because view support in catalog has not been implemented yet")
+
+    case ShowNamespacesStatement(Some(NonSessionCatalog(catalog, nameParts)), pattern) =>
+      val namespace = if (nameParts.isEmpty) None else Some(nameParts)
+      ShowNamespaces(catalog.asNamespaceCatalog, namespace, pattern)
+
+    // TODO (SPARK-29014): we should check if the current catalog is not session catalog here.
+    case ShowNamespacesStatement(None, pattern) if defaultCatalog.isDefined =>
+      ShowNamespaces(defaultCatalog.get.asNamespaceCatalog, None, pattern)
+
+    case ShowTablesStatement(Some(NonSessionCatalog(catalog, nameParts)), pattern) =>
+      ShowTables(catalog.asTableCatalog, nameParts, pattern)
+
+    // TODO (SPARK-29014): we should check if the current catalog is not session catalog here.
+    case ShowTablesStatement(None, pattern) if defaultCatalog.isDefined =>
+      ShowTables(defaultCatalog.get.asTableCatalog, catalogManager.currentNamespace, pattern)
+
+    case UseStatement(isNamespaceSet, nameParts) =>
+      if (isNamespaceSet) {
+        SetCatalogAndNamespace(catalogManager, None, Some(nameParts))
+      } else {
+        val CurrentCatalogAndNamespace(catalog, namespace) = nameParts
+        val ns = if (namespace.isEmpty) { None } else { Some(namespace) }
+        SetCatalogAndNamespace(catalogManager, Some(catalog.name()), ns)
+      }
+  }
+
+  object NonSessionCatalog {
+    def unapply(nameParts: Seq[String]): Option[(CatalogPlugin, Seq[String])] = nameParts match {
+      case CatalogAndIdentifierParts(catalog, parts) if !isSessionCatalog(catalog) =>
+        Some(catalog -> parts)
+      case _ => None
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
index 6769773cfec45..e5d25547d4d55 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, Attribute, Cast, NamedExpression}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
@@ -99,9 +99,16 @@ object TableOutputResolver {
       // Renaming is needed for handling the following cases like
       // 1) Column names/types do not match, e.g., INSERT INTO TABLE tab1 SELECT 1, 2
       // 2) Target tables have column metadata
-      Some(Alias(
-        Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)),
-        tableAttr.name)(explicitMetadata = Option(tableAttr.metadata)))
+      storeAssignmentPolicy match {
+        case StoreAssignmentPolicy.ANSI =>
+          Some(Alias(
+            AnsiCast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)),
+            tableAttr.name)(explicitMetadata = Option(tableAttr.metadata)))
+        case _ =>
+          Some(Alias(
+            Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)),
+            tableAttr.name)(explicitMetadata = Option(tableAttr.metadata)))
+      }
     }
 
     storeAssignmentPolicy match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 3125f8cb732db..c2b47c0ac2cfe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -677,8 +677,9 @@ object TypeCoercion {
       case d: Divide if d.dataType == DoubleType => d
       case d: Divide if d.dataType.isInstanceOf[DecimalType] => d
       case Divide(left, right) if isNumericOrNull(left) && isNumericOrNull(right) =>
+        val preferIntegralDivision = conf.usePostgreSQLDialect
         (left.dataType, right.dataType) match {
-          case (_: IntegralType, _: IntegralType) if conf.preferIntegralDivision =>
+          case (_: IntegralType, _: IntegralType) if preferIntegralDivision =>
             IntegralDivide(left, right)
           case _ =>
             Divide(Cast(left, DoubleType), Cast(right, DoubleType))
@@ -849,6 +850,7 @@ object TypeCoercion {
       case Add(l @ IntegerType(), r @ DateType()) => DateAdd(r, l)
       case Subtract(l @ DateType(), r @ IntegerType()) => DateSub(l, r)
       case Subtract(l @ DateType(), r @ DateType()) => DateDiff(l, r)
+      case Subtract(l @ TimestampType(), r @ TimestampType()) => TimestampDiff(l, r)
     }
   }
 
@@ -861,7 +863,9 @@ object TypeCoercion {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
 
-      case b @ BinaryOperator(left, right) if left.dataType != right.dataType =>
+      // If DecimalType operands are involved, DecimalPrecision will handle it
+      case b @ BinaryOperator(left, right) if !left.dataType.isInstanceOf[DecimalType] &&
+          !right.dataType.isInstanceOf[DecimalType] && left.dataType != right.dataType =>
         findTightestCommonType(left.dataType, right.dataType).map { commonType =>
           if (b.inputType.acceptsType(commonType)) {
             // If the expression accepts the tightest common type, cast to that.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 288ff1a04737e..2f8cb26ffaa9c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
@@ -30,7 +31,7 @@ import org.apache.spark.sql.streaming.OutputMode
 /**
  * Analyzes the presence of unsupported operations in a logical plan.
  */
-object UnsupportedOperationChecker {
+object UnsupportedOperationChecker extends Logging {
 
   def checkForBatch(plan: LogicalPlan): Unit = {
     plan.foreachUp {
@@ -41,8 +42,50 @@ object UnsupportedOperationChecker {
     }
   }
 
-  def checkForStreaming(plan: LogicalPlan, outputMode: OutputMode): Unit = {
+  def checkStreamingQueryGlobalWatermarkLimit(
+      plan: LogicalPlan,
+      outputMode: OutputMode,
+      failWhenDetected: Boolean): Unit = {
+    def isStatefulOperationPossiblyEmitLateRows(p: LogicalPlan): Boolean = p match {
+      case s: Aggregate
+        if s.isStreaming && outputMode == InternalOutputModes.Append => true
+      case Join(left, right, joinType, _, _)
+        if left.isStreaming && right.isStreaming && joinType != Inner => true
+      case f: FlatMapGroupsWithState
+        if f.isStreaming && f.outputMode == OutputMode.Append() => true
+      case _ => false
+    }
+
+    def isStatefulOperation(p: LogicalPlan): Boolean = p match {
+      case s: Aggregate if s.isStreaming => true
+      case _ @ Join(left, right, _, _, _) if left.isStreaming && right.isStreaming => true
+      case f: FlatMapGroupsWithState if f.isStreaming => true
+      case d: Deduplicate if d.isStreaming => true
+      case _ => false
+    }
+
+    try {
+      plan.foreach { subPlan =>
+        if (isStatefulOperation(subPlan)) {
+          subPlan.find { p =>
+            (p ne subPlan) && isStatefulOperationPossiblyEmitLateRows(p)
+          }.foreach { _ =>
+            val errorMsg = "Detected pattern of possible 'correctness' issue " +
+              "due to global watermark. " +
+              "The query contains stateful operation which can emit rows older than " +
+              "the current watermark plus allowed late record delay, which are \"late rows\"" +
+              " in downstream stateful operations and these rows can be discarded. " +
+              "Please refer the programming guide doc for more details."
+            throwError(errorMsg)(plan)
+          }
+        }
+      }
+    } catch {
+      case e: AnalysisException if !failWhenDetected => logWarning(s"${e.message};\n$plan")
+    }
+  }
 
+  def checkForStreaming(plan: LogicalPlan, outputMode: OutputMode): Unit = {
     if (!plan.isStreaming) {
       throwError(
         "Queries without streaming sources cannot be executed with writeStream.start()")(plan)
@@ -339,6 +382,8 @@ object UnsupportedOperationChecker {
       // Check if there are unsupported expressions in streaming query plan.
       checkUnsupportedExpressions(subPlan)
     }
+
+    checkStreamingQueryGlobalWatermarkLimit(plan, outputMode, failWhenDetected = false)
   }
 
   def checkForContinuous(plan: LogicalPlan, outputMode: OutputMode): Unit = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 9e0e0d528a968..e5a6f30c330e6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.parser.ParserUtils
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, UnaryNode}
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 import org.apache.spark.sql.types.{DataType, Metadata, StructType}
 
 /**
@@ -42,7 +43,7 @@ class UnresolvedException[TreeType <: TreeNode[_]](tree: TreeType, function: Str
  */
 case class UnresolvedRelation(
     multipartIdentifier: Seq[String]) extends LeafNode with NamedRelation {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   /** Returns a `.` separated name for this relation. */
   def tableName: String = multipartIdentifier.quoted
@@ -59,6 +60,28 @@ object UnresolvedRelation {
     UnresolvedRelation(tableIdentifier.database.toSeq :+ tableIdentifier.table)
 }
 
+/**
+ * A variant of [[UnresolvedRelation]] which can only be resolved to a v2 relation
+ * (`DataSourceV2Relation`), not v1 relation or temp view.
+ *
+ * @param originalNameParts the original table identifier name parts before catalog is resolved.
+ * @param catalog The catalog which the table should be looked up from.
+ * @param tableName The name of the table to look up.
+ */
+case class UnresolvedV2Relation(
+    originalNameParts: Seq[String],
+    catalog: TableCatalog,
+    tableName: Identifier)
+  extends LeafNode with NamedRelation {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  override def name: String = originalNameParts.quoted
+
+  override def output: Seq[Attribute] = Nil
+
+  override lazy val resolved = false
+}
+
 /**
  * An inline table that has not been resolved yet. Once resolved, it is turned by the analyzer into
  * a [[org.apache.spark.sql.catalyst.plans.logical.LocalRelation]].
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 2a6124a4079a1..be8526454f9f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -353,9 +353,20 @@ class SessionCatalog(
     val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase))
     val table = formatTableName(tableDefinition.identifier.table)
     val tableIdentifier = TableIdentifier(table, Some(db))
-    val newTableDefinition = tableDefinition.copy(identifier = tableIdentifier)
     requireDbExists(db)
     requireTableExists(tableIdentifier)
+    val newTableDefinition = if (tableDefinition.storage.locationUri.isDefined
+      && !tableDefinition.storage.locationUri.get.isAbsolute) {
+      // make the location of the table qualified.
+      val qualifiedTableLocation =
+        makeQualifiedPath(tableDefinition.storage.locationUri.get)
+      tableDefinition.copy(
+        storage = tableDefinition.storage.copy(locationUri = Some(qualifiedTableLocation)),
+        identifier = tableIdentifier)
+    } else {
+      tableDefinition.copy(identifier = tableIdentifier)
+    }
+
     externalCatalog.alterTable(newTableDefinition)
   }
 
@@ -770,6 +781,18 @@ class SessionCatalog(
     }
   }
 
+  def isView(nameParts: Seq[String]): Boolean = {
+    nameParts.length <= 2 && {
+      import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+      val ident = nameParts.asTableIdentifier
+      try {
+        getTempViewOrPermanentTableMetadata(ident).tableType == CatalogTableType.VIEW
+      } catch {
+        case _: NoSuchTableException => false
+      }
+    }
+  }
+
   /**
    * List all tables in the specified database, including local temporary views.
    *
@@ -882,7 +905,8 @@ class SessionCatalog(
     requireTableExists(TableIdentifier(table, Option(db)))
     requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(tableName))
     requireNonEmptyValueInPartitionSpec(parts.map(_.spec))
-    externalCatalog.createPartitions(db, table, parts, ignoreIfExists)
+    externalCatalog.createPartitions(
+      db, table, partitionWithQualifiedPath(tableName, parts), ignoreIfExists)
   }
 
   /**
@@ -942,7 +966,7 @@ class SessionCatalog(
     requireTableExists(TableIdentifier(table, Option(db)))
     requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(tableName))
     requireNonEmptyValueInPartitionSpec(parts.map(_.spec))
-    externalCatalog.alterPartitions(db, table, parts)
+    externalCatalog.alterPartitions(db, table, partitionWithQualifiedPath(tableName, parts))
   }
 
   /**
@@ -1064,6 +1088,23 @@ class SessionCatalog(
     }
   }
 
+  /**
+   * Make the partition path qualified.
+   * If the partition path is relative, e.g. 'paris', it will be qualified with
+   * parent path using table location, e.g. 'file:/warehouse/table/paris'
+   */
+  private def partitionWithQualifiedPath(
+      tableIdentifier: TableIdentifier,
+      parts: Seq[CatalogTablePartition]): Seq[CatalogTablePartition] = {
+    lazy val tbl = getTableMetadata(tableIdentifier)
+    parts.map { part =>
+      if (part.storage.locationUri.isDefined && !part.storage.locationUri.get.isAbsolute) {
+        val partPath = new Path(new Path(tbl.location), new Path(part.storage.locationUri.get))
+        val qualifiedPartPath = makeQualifiedPath(CatalogUtils.stringToURI(partPath.toString))
+        part.copy(storage = part.storage.copy(locationUri = Some(qualifiedPartPath)))
+      } else part
+    }
+  }
   // ----------------------------------------------------------------------------
   // Functions
   // ----------------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index ce8c23ac6dceb..f653bf41c1624 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -117,7 +117,7 @@ case class CatalogTablePartition(
     }
     map.put("Created Time", new Date(createTime).toString)
     val lastAccess = {
-      if (-1 == lastAccessTime) "UNKNOWN" else new Date(lastAccessTime).toString
+      if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
     }
     map.put("Last Access", lastAccess)
     stats.foreach(s => map.put("Partition Statistics", s.simpleString))
@@ -320,12 +320,15 @@ case class CatalogTable(
     val map = new mutable.LinkedHashMap[String, String]()
     val tableProperties = properties.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
     val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")
+    val lastAccess = {
+      if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
+    }
 
     identifier.database.foreach(map.put("Database", _))
     map.put("Table", identifier.table)
     if (owner != null && owner.nonEmpty) map.put("Owner", owner)
     map.put("Created Time", new Date(createTime).toString)
-    map.put("Last Access", new Date(lastAccessTime).toString)
+    map.put("Last Access", lastAccess)
     map.put("Created By", "Spark " + createVersion)
     map.put("Type", tableType.name)
     provider.foreach(map.put("Provider", _))
@@ -488,7 +491,7 @@ object CatalogColumnStat extends Logging {
     dataType match {
       case BooleanType => s.toBoolean
       case DateType if version == 1 => DateTimeUtils.fromJavaDate(java.sql.Date.valueOf(s))
-      case DateType => DateFormatter().parse(s)
+      case DateType => DateFormatter(ZoneOffset.UTC).parse(s)
       case TimestampType if version == 1 =>
         DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf(s))
       case TimestampType => getTimestampFormatter().parse(s)
@@ -513,7 +516,7 @@ object CatalogColumnStat extends Logging {
    */
   def toExternalString(v: Any, colName: String, dataType: DataType): String = {
     val externalValue = dataType match {
-      case DateType => DateFormatter().format(v.asInstanceOf[Int])
+      case DateType => DateFormatter(ZoneOffset.UTC).format(v.asInstanceOf[Int])
       case TimestampType => getTimestampFormatter().format(v.asInstanceOf[Long])
       case BooleanType | _: IntegralType | FloatType | DoubleType => v
       case _: DecimalType => v.asInstanceOf[Decimal].toJavaBigDecimal
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index 9ca94501f5c58..05cb91d10868e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -45,7 +45,10 @@ class UnivocityGenerator(
     options.timestampFormat,
     options.zoneId,
     options.locale)
-  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
+  private val dateFormatter = DateFormatter(
+    options.dateFormat,
+    options.zoneId,
+    options.locale)
 
   private def makeConverter(dataType: DataType): ValueConverter = dataType match {
     case DateType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 8456b7d218ead..661525a65294d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -78,7 +78,10 @@ class UnivocityParser(
     options.timestampFormat,
     options.zoneId,
     options.locale)
-  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
+  private val dateFormatter = DateFormatter(
+    options.dateFormat,
+    options.zoneId,
+    options.locale)
 
   // Retrieve the raw record string.
   private def getCurrentInput: UTF8String = {
@@ -188,12 +191,13 @@ class UnivocityParser(
     }
   }
 
-  private val doParse = if (requiredSchema.nonEmpty) {
-    (input: String) => convert(tokenizer.parseLine(input))
-  } else {
+  private val doParse = if (options.columnPruning && requiredSchema.isEmpty) {
     // If `columnPruning` enabled and partition attributes scanned only,
     // `schema` gets empty.
     (_: String) => InternalRow.empty
+  } else {
+    // parse if the columnPruning is disabled or requiredSchema is nonEmpty
+    (input: String) => convert(tokenizer.parseLine(input))
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index afe8a23f8f150..765018f07d87a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -114,7 +114,7 @@ object RowEncoder {
         d,
         "fromDecimal",
         inputObject :: Nil,
-        returnNullable = false), d, SQLConf.get.decimalOperationsNullOnOverflow)
+        returnNullable = false), d, !SQLConf.get.ansiEnabled)
 
     case StringType => createSerializerForString(inputObject)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index baa98171e265f..42b471f20ff91 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -106,7 +106,7 @@ object Cast {
    * * Cast.castToTimestamp
    */
   def needsTimeZone(from: DataType, to: DataType): Boolean = (from, to) match {
-    case (StringType, TimestampType) => true
+    case (StringType, TimestampType | DateType) => true
     case (DateType, TimestampType) => true
     case (TimestampType, StringType) => true
     case (TimestampType, DateType) => true
@@ -243,23 +243,11 @@ object Cast {
   }
 }
 
-/**
- * Cast the child expression to the target data type.
- *
- * When cast from/to timezone related types, we need timeZoneId, which will be resolved with
- * session local timezone by an analyzer [[ResolveTimeZone]].
- */
-@ExpressionDescription(
-  usage = "_FUNC_(expr AS type) - Casts the value `expr` to the target data type `type`.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_('10' as int);
-       10
-  """)
-case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None)
-  extends UnaryExpression with TimeZoneAwareExpression with NullIntolerant {
+abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression with NullIntolerant {
 
-  def this(child: Expression, dataType: DataType) = this(child, dataType, None)
+  def child: Expression
+
+  def dataType: DataType
 
   override def toString: String = s"cast($child as ${dataType.simpleString})"
 
@@ -274,8 +262,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   override def nullable: Boolean = Cast.forceNullable(child.dataType, dataType) || child.nullable
 
-  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
-    copy(timeZoneId = Option(timeZoneId))
+  protected def ansiEnabled: Boolean
 
   // When this cast involves TimeZone, it's only resolved if the timeZoneId is set;
   // Otherwise behave like Expression.resolved.
@@ -287,9 +274,8 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // [[func]] assumes the input is no longer null because eval already does the null check.
   @inline private[this] def buildCast[T](a: Any, func: T => Any): Any = func(a.asInstanceOf[T])
 
-  private lazy val dateFormatter = DateFormatter()
+  private lazy val dateFormatter = DateFormatter(zoneId)
   private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
-  private val failOnIntegralTypeOverflow = SQLConf.get.failOnIntegralTypeOverflow
 
   // UDFToString
   private[this] def castToString(from: DataType): Any => Any = from match {
@@ -469,7 +455,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   // DateConverter
   private[this] def castToDate(from: DataType): Any => Any = from match {
     case StringType =>
-      buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s).orNull)
+      buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s, zoneId).orNull)
     case TimestampType =>
       // throw valid precision more than seconds, according to Hive.
       // Timestamp.nanos is in 0 to 999,999,999, no more than a second.
@@ -493,7 +479,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       buildCast[Int](_, d => null)
     case TimestampType =>
       buildCast[Long](_, t => timestampToLong(t))
-    case x: NumericType if failOnIntegralTypeOverflow =>
+    case x: NumericType if ansiEnabled =>
       b => x.exactNumeric.asInstanceOf[Numeric[Any]].toLong(b)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toLong(b)
@@ -508,11 +494,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       buildCast[Boolean](_, b => if (b) 1 else 0)
     case DateType =>
       buildCast[Int](_, d => null)
-    case TimestampType if failOnIntegralTypeOverflow =>
+    case TimestampType if ansiEnabled =>
       buildCast[Long](_, t => LongExactNumeric.toInt(timestampToLong(t)))
     case TimestampType =>
       buildCast[Long](_, t => timestampToLong(t).toInt)
-    case x: NumericType if failOnIntegralTypeOverflow =>
+    case x: NumericType if ansiEnabled =>
       b => x.exactNumeric.asInstanceOf[Numeric[Any]].toInt(b)
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b)
@@ -531,29 +517,29 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       buildCast[Boolean](_, b => if (b) 1.toShort else 0.toShort)
     case DateType =>
       buildCast[Int](_, d => null)
-    case TimestampType if failOnIntegralTypeOverflow =>
+    case TimestampType if ansiEnabled =>
       buildCast[Long](_, t => {
         val longValue = timestampToLong(t)
         if (longValue == longValue.toShort) {
           longValue.toShort
         } else {
-          throw new ArithmeticException(s"Casting $t to short causes overflow.")
+          throw new ArithmeticException(s"Casting $t to short causes overflow")
         }
       })
     case TimestampType =>
       buildCast[Long](_, t => timestampToLong(t).toShort)
-    case x: NumericType if failOnIntegralTypeOverflow =>
+    case x: NumericType if ansiEnabled =>
       b =>
         val intValue = try {
           x.exactNumeric.asInstanceOf[Numeric[Any]].toInt(b)
         } catch {
           case _: ArithmeticException =>
-            throw new ArithmeticException(s"Casting $b to short causes overflow.")
+            throw new ArithmeticException(s"Casting $b to short causes overflow")
         }
         if (intValue == intValue.toShort) {
           intValue.toShort
         } else {
-          throw new ArithmeticException(s"Casting $b to short causes overflow.")
+          throw new ArithmeticException(s"Casting $b to short causes overflow")
         }
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toShort
@@ -572,41 +558,39 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       buildCast[Boolean](_, b => if (b) 1.toByte else 0.toByte)
     case DateType =>
       buildCast[Int](_, d => null)
-    case TimestampType if failOnIntegralTypeOverflow =>
+    case TimestampType if ansiEnabled =>
       buildCast[Long](_, t => {
         val longValue = timestampToLong(t)
         if (longValue == longValue.toByte) {
           longValue.toByte
         } else {
-          throw new ArithmeticException(s"Casting $t to byte causes overflow.")
+          throw new ArithmeticException(s"Casting $t to byte causes overflow")
         }
       })
     case TimestampType =>
       buildCast[Long](_, t => timestampToLong(t).toByte)
-    case x: NumericType if failOnIntegralTypeOverflow =>
+    case x: NumericType if ansiEnabled =>
       b =>
         val intValue = try {
           x.exactNumeric.asInstanceOf[Numeric[Any]].toInt(b)
         } catch {
           case _: ArithmeticException =>
-            throw new ArithmeticException(s"Casting $b to byte causes overflow.")
+            throw new ArithmeticException(s"Casting $b to byte causes overflow")
         }
         if (intValue == intValue.toByte) {
           intValue.toByte
         } else {
-          throw new ArithmeticException(s"Casting $b to byte causes overflow.")
+          throw new ArithmeticException(s"Casting $b to byte causes overflow")
         }
     case x: NumericType =>
       b => x.numeric.asInstanceOf[Numeric[Any]].toInt(b).toByte
   }
 
-  private val nullOnOverflow = SQLConf.get.decimalOperationsNullOnOverflow
-
   /**
    * Change the precision / scale in a given decimal to those set in `decimalType` (if any),
    * modifying `value` in-place and returning it if successful. If an overflow occurs, it
    * either returns null or throws an exception according to the value set for
-   * `spark.sql.decimalOperations.nullOnOverflow`.
+   * `spark.sql.ansi.enabled`.
    *
    * NOTE: this modifies `value` in-place, so don't call it on external data.
    */
@@ -614,7 +598,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     if (value.changePrecision(decimalType.precision, decimalType.scale)) {
       value
     } else {
-      if (nullOnOverflow) {
+      if (!ansiEnabled) {
         null
       } else {
         throw new ArithmeticException(s"${value.toDebugString} cannot be represented as " +
@@ -625,12 +609,12 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   /**
    * Create new `Decimal` with precision and scale given in `decimalType` (if any).
-   * If overflow occurs, if `spark.sql.decimalOperations.nullOnOverflow` is true, null is returned;
+   * If overflow occurs, if `spark.sql.ansi.enabled` is false, null is returned;
    * otherwise, an `ArithmeticException` is thrown.
    */
   private[this] def toPrecision(value: Decimal, decimalType: DecimalType): Decimal =
     value.toPrecision(
-      decimalType.precision, decimalType.scale, Decimal.ROUND_HALF_UP, nullOnOverflow)
+      decimalType.precision, decimalType.scale, Decimal.ROUND_HALF_UP, !ansiEnabled)
 
 
   private[this] def castToDecimal(from: DataType, target: DecimalType): Any => Any = from match {
@@ -1056,28 +1040,35 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
 
   private[this] def castToDateCode(
       from: DataType,
-      ctx: CodegenContext): CastFunction = from match {
-    case StringType =>
-      val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]])
-      (c, evPrim, evNull) => code"""
-        scala.Option<Integer> $intOpt =
-          org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c);
-        if ($intOpt.isDefined()) {
-          $evPrim = ((Integer) $intOpt.get()).intValue();
-        } else {
-          $evNull = true;
-        }
-       """
-    case TimestampType =>
+      ctx: CodegenContext): CastFunction = {
+    def getZoneId() = {
       val zoneIdClass = classOf[ZoneId]
-      val zid = JavaCode.global(
+      JavaCode.global(
         ctx.addReferenceObj("zoneId", zoneId, zoneIdClass.getName),
         zoneIdClass)
-      (c, evPrim, evNull) =>
-        code"""$evPrim =
-          org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
-    case _ =>
-      (c, evPrim, evNull) => code"$evNull = true;"
+    }
+    from match {
+      case StringType =>
+        val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]])
+        val zid = getZoneId()
+        (c, evPrim, evNull) =>
+          code"""
+          scala.Option<Integer> $intOpt =
+            org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDate($c, $zid);
+          if ($intOpt.isDefined()) {
+            $evPrim = ((Integer) $intOpt.get()).intValue();
+          } else {
+            $evNull = true;
+          }
+         """
+      case TimestampType =>
+        val zid = getZoneId()
+        (c, evPrim, evNull) =>
+          code"""$evPrim =
+            org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToEpochDays($c, $zid);"""
+      case _ =>
+        (c, evPrim, evNull) => code"$evNull = true;"
+    }
   }
 
   private[this] def changePrecision(d: ExprValue, decimalType: DecimalType,
@@ -1088,7 +1079,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
          |$evPrim = $d;
        """.stripMargin
     } else {
-      val overflowCode = if (nullOnOverflow) {
+      val overflowCode = if (!ansiEnabled) {
         s"$evNull = true;"
       } else {
         s"""
@@ -1267,7 +1258,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   private[this] def castTimestampToIntegralTypeCode(
       ctx: CodegenContext,
       integralType: String): CastFunction = {
-    if (failOnIntegralTypeOverflow) {
+    if (ansiEnabled) {
       val longValue = ctx.freshName("longValue")
       (c, evPrim, evNull) =>
         code"""
@@ -1275,7 +1266,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
           if ($longValue == ($integralType) $longValue) {
             $evPrim = ($integralType) $longValue;
           } else {
-            throw new ArithmeticException("Casting $c to $integralType causes overflow");
+            throw new ArithmeticException("Casting " + $c + " to $integralType causes overflow");
           }
         """
     } else {
@@ -1286,7 +1277,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   private[this] def castDecimalToIntegralTypeCode(
       ctx: CodegenContext,
       integralType: String): CastFunction = {
-    if (failOnIntegralTypeOverflow) {
+    if (ansiEnabled) {
       (c, evPrim, evNull) => code"$evPrim = $c.roundTo${integralType.capitalize}();"
     } else {
       (c, evPrim, evNull) => code"$evPrim = $c.to${integralType.capitalize}();"
@@ -1294,13 +1285,13 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   }
 
   private[this] def castIntegralTypeToIntegralTypeExactCode(integralType: String): CastFunction = {
-    assert(failOnIntegralTypeOverflow)
+    assert(ansiEnabled)
     (c, evPrim, evNull) =>
       code"""
         if ($c == ($integralType) $c) {
           $evPrim = ($integralType) $c;
         } else {
-          throw new ArithmeticException("Casting $c to $integralType causes overflow");
+          throw new ArithmeticException("Casting " + $c + " to $integralType causes overflow");
         }
       """
   }
@@ -1322,7 +1313,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   private[this] def castFractionToIntegralTypeCode(
       fractionType: String,
       integralType: String): CastFunction = {
-    assert(failOnIntegralTypeOverflow)
+    assert(ansiEnabled)
     val (min, max) = lowerAndUpperBound(fractionType, integralType)
     val mathClass = classOf[Math].getName
     // When casting floating values to integral types, Spark uses the method `Numeric.toInt`
@@ -1335,7 +1326,7 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
         if ($mathClass.floor($c) <= $max && $mathClass.ceil($c) >= $min) {
           $evPrim = ($integralType) $c;
         } else {
-          throw new ArithmeticException("Casting $c to $integralType causes overflow");
+          throw new ArithmeticException("Casting " + $c + " to $integralType causes overflow");
         }
       """
   }
@@ -1359,11 +1350,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "byte")
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "byte")
-    case _: ShortType | _: IntegerType | _: LongType if failOnIntegralTypeOverflow =>
+    case _: ShortType | _: IntegerType | _: LongType if ansiEnabled =>
       castIntegralTypeToIntegralTypeExactCode("byte")
-    case _: FloatType if failOnIntegralTypeOverflow =>
+    case _: FloatType if ansiEnabled =>
       castFractionToIntegralTypeCode("float", "byte")
-    case _: DoubleType if failOnIntegralTypeOverflow =>
+    case _: DoubleType if ansiEnabled =>
       castFractionToIntegralTypeCode("double", "byte")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (byte) $c;"
@@ -1390,11 +1381,11 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "short")
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "short")
-    case _: IntegerType | _: LongType if failOnIntegralTypeOverflow =>
+    case _: IntegerType | _: LongType if ansiEnabled =>
       castIntegralTypeToIntegralTypeExactCode("short")
-    case _: FloatType if failOnIntegralTypeOverflow =>
+    case _: FloatType if ansiEnabled =>
       castFractionToIntegralTypeCode("float", "short")
-    case _: DoubleType if failOnIntegralTypeOverflow =>
+    case _: DoubleType if ansiEnabled =>
       castFractionToIntegralTypeCode("double", "short")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (short) $c;"
@@ -1419,10 +1410,10 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
       (c, evPrim, evNull) => code"$evNull = true;"
     case TimestampType => castTimestampToIntegralTypeCode(ctx, "int")
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "int")
-    case _: LongType if failOnIntegralTypeOverflow => castIntegralTypeToIntegralTypeExactCode("int")
-    case _: FloatType if failOnIntegralTypeOverflow =>
+    case _: LongType if ansiEnabled => castIntegralTypeToIntegralTypeExactCode("int")
+    case _: FloatType if ansiEnabled =>
       castFractionToIntegralTypeCode("float", "int")
-    case _: DoubleType if failOnIntegralTypeOverflow =>
+    case _: DoubleType if ansiEnabled =>
       castFractionToIntegralTypeCode("double", "int")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (int) $c;"
@@ -1449,9 +1440,9 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
     case TimestampType =>
       (c, evPrim, evNull) => code"$evPrim = (long) ${timestampToLongCode(c)};"
     case DecimalType() => castDecimalToIntegralTypeCode(ctx, "long")
-    case _: FloatType if failOnIntegralTypeOverflow =>
+    case _: FloatType if ansiEnabled =>
       castFractionToIntegralTypeCode("float", "long")
-    case _: DoubleType if failOnIntegralTypeOverflow =>
+    case _: DoubleType if ansiEnabled =>
       castFractionToIntegralTypeCode("double", "long")
     case x: NumericType =>
       (c, evPrim, evNull) => code"$evPrim = (long) $c;"
@@ -1640,6 +1631,43 @@ case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String
   }
 }
 
+/**
+ * Cast the child expression to the target data type.
+ *
+ * When cast from/to timezone related types, we need timeZoneId, which will be resolved with
+ * session local timezone by an analyzer [[ResolveTimeZone]].
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr AS type) - Casts the value `expr` to the target data type `type`.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('10' as int);
+       10
+  """)
+case class Cast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None)
+  extends CastBase {
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
+  override protected val ansiEnabled: Boolean = SQLConf.get.ansiEnabled
+}
+
+/**
+ * Cast the child expression to the target data type as per ANSI SQL standard.
+ * A runtime exception will be thrown on casting failure such as converting an out-of-range value
+ * to an integral type.
+ *
+ * When cast from/to timezone related types, we need timeZoneId, which will be resolved with
+ * session local timezone by an analyzer [[ResolveTimeZone]].
+ */
+case class AnsiCast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None)
+  extends CastBase {
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
+  override protected val ansiEnabled: Boolean = true
+}
+
 /**
  * Cast the child expression to the target data type, but will throw error if the cast might
  * truncate, e.g. long -> int, timestamp -> data.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index 72ff9361d8f75..a32052ce121df 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import scala.collection.mutable
 
+import org.apache.spark.TaskContext
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable
 
@@ -72,7 +73,10 @@ class EquivalentExpressions {
     val skip = expr.isInstanceOf[LeafExpression] ||
       // `LambdaVariable` is usually used as a loop variable, which can't be evaluated ahead of the
       // loop. So we can't evaluate sub-expressions containing `LambdaVariable` at the beginning.
-      expr.find(_.isInstanceOf[LambdaVariable]).isDefined
+      expr.find(_.isInstanceOf[LambdaVariable]).isDefined ||
+      // `PlanExpression` wraps query plan. To compare query plans of `PlanExpression` on executor,
+      // can cause error like NPE.
+      (expr.isInstanceOf[PlanExpression[_]] && TaskContext.get != null)
 
     // There are some special expressions that we should not recurse into all of its children.
     //   1. CodegenFallback: it's children will not be used to generate code (call eval() instead)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
index 5c8aa4e2e9d83..a2daec0b1ade1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
@@ -50,11 +50,11 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable
   def currentValue: InternalRow = mutableRow
 
   override def target(row: InternalRow): MutableProjection = {
-    // If `mutableRow` is `UnsafeRow`, `MutableProjection` accepts fixed-length types only
+    // If `mutableRow` is `UnsafeRow`, `MutableProjection` accepts mutable types only
     require(!row.isInstanceOf[UnsafeRow] ||
-      validExprs.forall { case (e, _) => UnsafeRow.isFixedLength(e.dataType) },
+      validExprs.forall { case (e, _) => UnsafeRow.isMutable(e.dataType) },
       "MutableProjection cannot use UnsafeRow for output data types: " +
-        validExprs.map(_._1.dataType).filterNot(UnsafeRow.isFixedLength)
+        validExprs.map(_._1.dataType).filterNot(UnsafeRow.isMutable)
           .map(_.catalogString).mkString(", "))
     mutableRow = row
     this
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
index 75feaf670c84a..159f90995c1fd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SpecificInternalRow.scala
@@ -221,7 +221,7 @@ final class SpecificInternalRow(val values: Array[MutableValue]) extends BaseGen
 
   override protected def genericGet(i: Int): Any = values(i).boxed
 
-  override def update(ordinal: Int, value: Any) {
+  override def update(ordinal: Int, value: Any): Unit = {
     if (value == null) {
       setNullAt(ordinal)
     } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index ea0ed2e8fa11b..59481ce049165 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -63,7 +63,7 @@ import org.apache.spark.sql.types._
       > SELECT _FUNC_(10.0, array(0.5, 0.4, 0.1), 100);
        [10.0,10.0,10.0]
       > SELECT _FUNC_(10.0, 0.5, 100);
-       10.0
+       10
   """,
   since = "2.1.0")
 case class ApproximatePercentile(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index 1870c58c548c9..8ce8dfa19c017 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -223,7 +223,7 @@ case class VarianceSamp(child: Expression) extends CentralMomentAgg(child) {
   examples = """
     Examples:
       > SELECT _FUNC_(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
-       1.1135657469022013
+       1.1135657469022011
       > SELECT _FUNC_(col) FROM VALUES (-1000), (-100), (10), (20) AS tab(col);
        -1.1135657469022011
   """,
@@ -245,9 +245,9 @@ case class Skewness(child: Expression) extends CentralMomentAgg(child) {
   examples = """
     Examples:
       > SELECT _FUNC_(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
-       -0.7014368047529618
+       -0.7014368047529627
       > SELECT _FUNC_(col) FROM VALUES (1), (10), (100), (10), (1) as tab(col);
-       0.19432323191698986
+       0.19432323191699075
   """,
   since = "1.6.0")
 case class Kurtosis(child: Expression) extends CentralMomentAgg(child) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
index d04fe9249d064..c2ab8adfaef67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
@@ -91,7 +91,7 @@ case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCast
   }
 
   override lazy val evaluateExpression: Expression = resultType match {
-    case d: DecimalType => CheckOverflow(sum, d, SQLConf.get.decimalOperationsNullOnOverflow)
+    case d: DecimalType => CheckOverflow(sum, d, !SQLConf.get.ansiEnabled)
     case _ => sum
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index a13929bb772f9..8d462184ed6c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -35,7 +35,7 @@ import org.apache.spark.unsafe.types.CalendarInterval
   """)
 case class UnaryMinus(child: Expression) extends UnaryExpression
     with ExpectsInputTypes with NullIntolerant {
-  private val checkOverflow = SQLConf.get.failOnIntegralTypeOverflow
+  private val checkOverflow = SQLConf.get.ansiEnabled
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection.NumericAndInterval)
 
@@ -136,7 +136,7 @@ case class Abs(child: Expression)
 
 abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
 
-  protected val checkOverflow = SQLConf.get.failOnIntegralTypeOverflow
+  protected val checkOverflow = SQLConf.get.ansiEnabled
 
   override def dataType: DataType = left.dataType
 
@@ -150,9 +150,10 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
   def calendarIntervalMethod: String =
     sys.error("BinaryArithmetics must override either calendarIntervalMethod or genCode")
 
-  /** Name of the function for the exact version of this expression in [[Math]]. */
-  def exactMathMethod: String =
-    sys.error("BinaryArithmetics must override either exactMathMethod or genCode")
+  // Name of the function for the exact version of this expression in [[Math]].
+  // If the option "spark.sql.failOnIntegralTypeOverflow" is enabled and there is corresponding
+  // function in [[Math]], the exact function will be called instead of evaluation with [[symbol]].
+  def exactMathMethod: Option[String] = None
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = dataType match {
     case _: DecimalType =>
@@ -182,9 +183,9 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant {
       })
     case IntegerType | LongType =>
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
-        val operation = if (checkOverflow) {
+        val operation = if (checkOverflow && exactMathMethod.isDefined) {
           val mathClass = classOf[Math].getName
-          s"$mathClass.$exactMathMethod($eval1, $eval2)"
+          s"$mathClass.${exactMathMethod.get}($eval1, $eval2)"
         } else {
           s"$eval1 $symbol $eval2"
         }
@@ -235,7 +236,7 @@ case class Add(left: Expression, right: Expression) extends BinaryArithmetic {
     }
   }
 
-  override def exactMathMethod: String = "addExact"
+  override def exactMathMethod: Option[String] = Some("addExact")
 }
 
 @ExpressionDescription(
@@ -265,7 +266,7 @@ case class Subtract(left: Expression, right: Expression) extends BinaryArithmeti
     }
   }
 
-  override def exactMathMethod: String = "subtractExact"
+  override def exactMathMethod: Option[String] = Some("subtractExact")
 }
 
 @ExpressionDescription(
@@ -286,7 +287,7 @@ case class Multiply(left: Expression, right: Expression) extends BinaryArithmeti
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = numeric.times(input1, input2)
 
-  override def exactMathMethod: String = "multiplyExact"
+  override def exactMathMethod: Option[String] = Some("multiplyExact")
 }
 
 // Common base trait for Divide and Remainder, since these two classes are almost identical
@@ -447,7 +448,7 @@ case class IntegralDivide(left: Expression, right: Expression) extends DivModLik
   usage = "expr1 _FUNC_ expr2 - Returns the remainder after `expr1`/`expr2`.",
   examples = """
     Examples:
-      > SELECT 2 _FUNC_ 1.8;
+      > SELECT 2 % 1.8;
        0.2
       > SELECT MOD(2, 1.8);
        0.2
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 4c1bfcfdf7f17..946fc7f421ad6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -112,7 +112,7 @@ private[codegen] case class NewFunctionSpec(
  * A context for codegen, tracking a list of objects that could be passed into generated Java
  * function.
  */
-class CodegenContext {
+class CodegenContext extends Logging {
 
   import CodeGenerator._
 
@@ -403,13 +403,14 @@ class CodegenContext {
    *  equivalentExpressions will match the tree containing `col1 + col2` and it will only
    *  be evaluated once.
    */
-  val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
+  private val equivalentExpressions: EquivalentExpressions = new EquivalentExpressions
 
   // Foreach expression that is participating in subexpression elimination, the state to use.
-  var subExprEliminationExprs = Map.empty[Expression, SubExprEliminationState]
+  // Visible for testing.
+  private[expressions] var subExprEliminationExprs = Map.empty[Expression, SubExprEliminationState]
 
   // The collection of sub-expression result resetting methods that need to be called on each row.
-  val subexprFunctions = mutable.ArrayBuffer.empty[String]
+  private val subexprFunctions = mutable.ArrayBuffer.empty[String]
 
   val outerClassName = "OuterClass"
 
@@ -993,6 +994,15 @@ class CodegenContext {
     }
   }
 
+  /**
+   * Returns the code for subexpression elimination after splitting it if necessary.
+   */
+  def subexprFunctionsCode: String = {
+    // Whole-stage codegen's subexpression elimination is handled in another code path
+    assert(currentVars == null || subexprFunctions.isEmpty)
+    splitExpressions(subexprFunctions, "subexprFunc_split", Seq("InternalRow" -> INPUT_ROW))
+  }
+
   /**
    * Perform a function which generates a sequence of ExprCodes with a given mapping between
    * expressions and common expressions, instead of using the mapping in current context.
@@ -1028,13 +1038,67 @@ class CodegenContext {
     // Get all the expressions that appear at least twice and set up the state for subexpression
     // elimination.
     val commonExprs = equivalentExpressions.getAllEquivalentExprs.filter(_.size > 1)
-    val codes = commonExprs.map { e =>
-      val expr = e.head
-      // Generate the code for this expression tree.
-      val eval = expr.genCode(this)
-      val state = SubExprEliminationState(eval.isNull, eval.value)
-      e.foreach(localSubExprEliminationExprs.put(_, state))
-      eval.code.toString
+    val commonExprVals = commonExprs.map(_.head.genCode(this))
+
+    lazy val nonSplitExprCode = {
+      commonExprs.zip(commonExprVals).map { case (exprs, eval) =>
+        // Generate the code for this expression tree.
+        val state = SubExprEliminationState(eval.isNull, eval.value)
+        exprs.foreach(localSubExprEliminationExprs.put(_, state))
+        eval.code.toString
+      }
+    }
+
+    val codes = if (commonExprVals.map(_.code.length).sum > SQLConf.get.methodSplitThreshold) {
+      if (commonExprs.map(calculateParamLength).forall(isValidParamLength)) {
+        commonExprs.zipWithIndex.map { case (exprs, i) =>
+          val expr = exprs.head
+          val eval = commonExprVals(i)
+
+          val isNullLiteral = eval.isNull match {
+            case TrueLiteral | FalseLiteral => true
+            case _ => false
+          }
+          val (isNull, isNullEvalCode) = if (!isNullLiteral) {
+            val v = addMutableState(JAVA_BOOLEAN, "subExprIsNull")
+            (JavaCode.isNullGlobal(v), s"$v = ${eval.isNull};")
+          } else {
+            (eval.isNull, "")
+          }
+
+          // Generate the code for this expression tree and wrap it in a function.
+          val fnName = freshName("subExpr")
+          val inputVars = getLocalInputVariableValues(this, expr).toSeq
+          val argList = inputVars.map(v => s"${v.javaType.getName} ${v.variableName}")
+          val returnType = javaType(expr.dataType)
+          val fn =
+            s"""
+               |private $returnType $fnName(${argList.mkString(", ")}) {
+               |  ${eval.code}
+               |  $isNullEvalCode
+               |  return ${eval.value};
+               |}
+               """.stripMargin
+
+          val value = freshName("subExprValue")
+          val state = SubExprEliminationState(isNull, JavaCode.variable(value, expr.dataType))
+          exprs.foreach(localSubExprEliminationExprs.put(_, state))
+          val inputVariables = inputVars.map(_.variableName).mkString(", ")
+          s"$returnType $value = ${addNewFunction(fnName, fn)}($inputVariables);"
+        }
+      } else {
+        val errMsg = "Failed to split subexpression code into small functions because the " +
+          "parameter length of at least one split function went over the JVM limit: " +
+          MAX_JVM_METHOD_PARAMS_LENGTH
+        if (Utils.isTesting) {
+          throw new IllegalStateException(errMsg)
+        } else {
+          logInfo(errMsg)
+          nonSplitExprCode
+        }
+      }
+    } else {
+      nonSplitExprCode
     }
     SubExprCodes(codes, localSubExprEliminationExprs.toMap)
   }
@@ -1201,6 +1265,15 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
   }
 }
 
+/**
+ * Java bytecode statistics of a compiled class by Janino.
+ */
+case class ByteCodeStats(maxMethodCodeSize: Int, maxConstPoolSize: Int, numInnerClasses: Int)
+
+object ByteCodeStats {
+  val UNAVAILABLE = ByteCodeStats(-1, -1, -1)
+}
+
 object CodeGenerator extends Logging {
 
   // This is the default value of HugeMethodLimit in the OpenJDK HotSpot JVM,
@@ -1210,6 +1283,9 @@ object CodeGenerator extends Logging {
   // The max valid length of method parameters in JVM.
   final val MAX_JVM_METHOD_PARAMS_LENGTH = 255
 
+  // The max number of constant pool entries in JVM.
+  final val MAX_JVM_CONSTANT_POOL_SIZE = 65535
+
   // This is the threshold over which the methods in an inner class are grouped in a single
   // method which is going to be called by the outer class instead of the many small ones
   final val MERGE_SPLIT_METHODS_THRESHOLD = 3
@@ -1232,9 +1308,9 @@ object CodeGenerator extends Logging {
   /**
    * Compile the Java source code into a Java class, using Janino.
    *
-   * @return a pair of a generated class and the max bytecode size of generated functions.
+   * @return a pair of a generated class and the bytecode statistics of generated functions.
    */
-  def compile(code: CodeAndComment): (GeneratedClass, Int) = try {
+  def compile(code: CodeAndComment): (GeneratedClass, ByteCodeStats) = try {
     cache.get(code)
   } catch {
     // Cache.get() may wrap the original exception. See the following URL
@@ -1247,7 +1323,7 @@ object CodeGenerator extends Logging {
   /**
    * Compile the Java source code into a Java class, using Janino.
    */
-  private[this] def doCompile(code: CodeAndComment): (GeneratedClass, Int) = {
+  private[this] def doCompile(code: CodeAndComment): (GeneratedClass, ByteCodeStats) = {
     val evaluator = new ClassBodyEvaluator()
 
     // A special classloader used to wrap the actual parent classloader of
@@ -1286,32 +1362,40 @@ object CodeGenerator extends Logging {
       s"\n${CodeFormatter.format(code)}"
     })
 
-    val maxCodeSize = try {
+    val codeStats = try {
       evaluator.cook("generated.java", code.body)
       updateAndGetCompilationStats(evaluator)
     } catch {
       case e: InternalCompilerException =>
         val msg = s"failed to compile: $e"
         logError(msg, e)
-        val maxLines = SQLConf.get.loggingMaxLinesForCodegen
-        logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
+        logGeneratedCode(code)
         throw new InternalCompilerException(msg, e)
       case e: CompileException =>
         val msg = s"failed to compile: $e"
         logError(msg, e)
-        val maxLines = SQLConf.get.loggingMaxLinesForCodegen
-        logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
+        logGeneratedCode(code)
         throw new CompileException(msg, e.getLocation)
     }
 
-    (evaluator.getClazz().getConstructor().newInstance().asInstanceOf[GeneratedClass], maxCodeSize)
+    (evaluator.getClazz().getConstructor().newInstance().asInstanceOf[GeneratedClass], codeStats)
+  }
+
+  private def logGeneratedCode(code: CodeAndComment): Unit = {
+    val maxLines = SQLConf.get.loggingMaxLinesForCodegen
+    if (Utils.isTesting) {
+      logError(s"\n${CodeFormatter.format(code, maxLines)}")
+    } else {
+      logInfo(s"\n${CodeFormatter.format(code, maxLines)}")
+    }
   }
 
   /**
-   * Returns the max bytecode size of the generated functions by inspecting janino private fields.
+   * Returns the bytecode statistics (max method bytecode size, max constant pool size, and
+   * # of inner classes) of generated classes by inspecting Janino classes.
    * Also, this method updates the metrics information.
    */
-  private def updateAndGetCompilationStats(evaluator: ClassBodyEvaluator): Int = {
+  private def updateAndGetCompilationStats(evaluator: ClassBodyEvaluator): ByteCodeStats = {
     // First retrieve the generated classes.
     val classes = {
       val resultField = classOf[SimpleCompiler].getDeclaredField("result")
@@ -1326,11 +1410,13 @@ object CodeGenerator extends Logging {
     val codeAttr = Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute")
     val codeAttrField = codeAttr.getDeclaredField("code")
     codeAttrField.setAccessible(true)
-    val codeSizes = classes.flatMap { case (_, classBytes) =>
-      CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classBytes.length)
+    val codeStats = classes.map { case (_, classBytes) =>
+      val classCodeSize = classBytes.length
+      CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classCodeSize)
       try {
         val cf = new ClassFile(new ByteArrayInputStream(classBytes))
-        val stats = cf.methodInfos.asScala.flatMap { method =>
+        val constPoolSize = cf.getConstantPoolSize
+        val methodCodeSizes = cf.methodInfos.asScala.flatMap { method =>
           method.getAttributes().filter(_.getClass eq codeAttr).map { a =>
             val byteCodeSize = codeAttrField.get(a).asInstanceOf[Array[Byte]].length
             CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(byteCodeSize)
@@ -1343,19 +1429,20 @@ object CodeGenerator extends Logging {
             byteCodeSize
           }
         }
-        Some(stats)
+        (methodCodeSizes.max, constPoolSize)
       } catch {
         case NonFatal(e) =>
           logWarning("Error calculating stats of compiled class.", e)
-          None
+          (-1, -1)
       }
-    }.flatten
-
-    if (codeSizes.nonEmpty) {
-      codeSizes.max
-    } else {
-      0
     }
+
+    val (maxMethodSizes, constPoolSize) = codeStats.unzip
+    ByteCodeStats(
+      maxMethodCodeSize = maxMethodSizes.max,
+      maxConstPoolSize = constPoolSize.max,
+      // Minus 2 for `GeneratedClass` and an outer-most generated class
+      numInnerClasses = classes.size - 2)
   }
 
   /**
@@ -1370,8 +1457,8 @@ object CodeGenerator extends Logging {
   private val cache = CacheBuilder.newBuilder()
     .maximumSize(SQLConf.get.codegenCacheMaxEntries)
     .build(
-      new CacheLoader[CodeAndComment, (GeneratedClass, Int)]() {
-        override def load(code: CodeAndComment): (GeneratedClass, Int) = {
+      new CacheLoader[CodeAndComment, (GeneratedClass, ByteCodeStats)]() {
+        override def load(code: CodeAndComment): (GeneratedClass, ByteCodeStats) = {
           val startTime = System.nanoTime()
           val result = doCompile(code)
           val endTime = System.nanoTime()
@@ -1620,7 +1707,7 @@ object CodeGenerator extends Logging {
   def getLocalInputVariableValues(
       ctx: CodegenContext,
       expr: Expression,
-      subExprs: Map[Expression, SubExprEliminationState]): Set[VariableValue] = {
+      subExprs: Map[Expression, SubExprEliminationState] = Map.empty): Set[VariableValue] = {
     val argSet = mutable.Set[VariableValue]()
     if (ctx.INPUT_ROW != null) {
       argSet += JavaCode.variable(ctx.INPUT_ROW, classOf[InternalRow])
@@ -1724,6 +1811,14 @@ object CodeGenerator extends Logging {
 
   def boxedType(dt: DataType): String = boxedType(javaType(dt))
 
+  def typeName(clazz: Class[_]): String = {
+    if (clazz.isArray) {
+      typeName(clazz.getComponentType) + "[]"
+    } else {
+      clazz.getName
+    }
+  }
+
   /**
    * Returns the representation of default value for a given Java Type.
    * @param jt the string name of the Java type
@@ -1775,6 +1870,10 @@ object CodeGenerator extends Logging {
    * length less than a pre-defined constant.
    */
   def isValidParamLength(paramLength: Int): Boolean = {
-    paramLength <= MAX_JVM_METHOD_PARAMS_LENGTH
+    // This config is only for testing
+    SQLConf.get.getConfString("spark.sql.CodeGenerator.validParamLength", null) match {
+      case null | "" => paramLength <= MAX_JVM_METHOD_PARAMS_LENGTH
+      case validLength => paramLength <= validLength.toInt
+    }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 838bd1c679e4d..2e018de07101e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -92,7 +92,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], MutableP
     }
 
     // Evaluate all the subexpressions.
-    val evalSubexpr = ctx.subexprFunctions.mkString("\n")
+    val evalSubexpr = ctx.subexprFunctionsCode
 
     val allProjections = ctx.splitExpressionsWithCurrentInputs(projectionCodes.map(_._1))
     val allUpdates = ctx.splitExpressionsWithCurrentInputs(projectionCodes.map(_._2))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index fb1d8a3c8e739..8da7f65bdeee6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -299,7 +299,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro
       v => s"$v = new $rowWriterClass(${expressions.length}, ${numVarLenFields * 32});")
 
     // Evaluate all the subexpression.
-    val evalSubexpr = ctx.subexprFunctions.mkString("\n")
+    val evalSubexpr = ctx.subexprFunctionsCode
 
     val writeExpressions = writeExpressionsToBuffer(
       ctx, ctx.INPUT_ROW, exprEvals, exprSchemas, rowWriter, isTopLevel = true)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 5314821ea3a59..1320648f4cfc0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -1428,7 +1428,7 @@ case class ArraysOverlap(left: Expression, right: Expression)
  */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(x, start, length) - Subsets array x starting from index start (or starting from the end if start is negative) with the specified length.",
+  usage = "_FUNC_(x, start, length) - Subsets array x starting from index start (array indices start at 1, or starting from the end if start is negative) with the specified length.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(1, 2, 3, 4), 2, 2);
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 319a7fc87e59a..cae3c0528e136 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -422,9 +422,9 @@ case class CreateNamedStructUnsafe(children: Seq[Expression]) extends CreateName
   examples = """
     Examples:
       > SELECT _FUNC_('a:1,b:2,c:3', ',', ':');
-       map("a":"1","b":"2","c":"3")
+       {"a":"1","b":"2","c":"3"}
       > SELECT _FUNC_('a');
-       map("a":null)
+       {"a":null}
   """)
 // scalastyle:on line.size.limit
 case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 65b10f36373d1..67c24f687af08 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -40,8 +40,8 @@ import org.apache.spark.unsafe.types.UTF8String
   examples = """
     Examples:
       > SELECT _FUNC_('1, 0.8', 'a INT, b DOUBLE');
-       {"a":1, "b":0.8}
-      > SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'))
+       {"a":1,"b":0.8}
+      > SELECT _FUNC_('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
        {"time":2015-08-26 00:00:00.0}
   """,
   since = "3.0.0")
@@ -199,7 +199,7 @@ case class SchemaOfCsv(
       > SELECT _FUNC_(named_struct('a', 1, 'b', 2));
        1,2
       > SELECT _FUNC_(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'));
-       "26/08/2015"
+       26/08/2015
   """,
   since = "3.0.0")
 // scalastyle:on line.size.limit
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 9d43701f03056..5aea884ad5003 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -76,9 +76,7 @@ case class CurrentDate(timeZoneId: Option[String] = None)
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
-  override def eval(input: InternalRow): Any = {
-    localDateToDays(LocalDate.now(zoneId))
-  }
+  override def eval(input: InternalRow): Any = currentDate(zoneId)
 
   override def prettyName: String = "current_date"
 }
@@ -98,9 +96,7 @@ case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
 
   override def dataType: DataType = TimestampType
 
-  override def eval(input: InternalRow): Any = {
-    instantToMicros(Instant.now())
-  }
+  override def eval(input: InternalRow): Any = currentTimestamp()
 
   override def prettyName: String = "current_timestamp"
 }
@@ -302,6 +298,30 @@ case class Second(child: Expression, timeZoneId: Option[String] = None)
   }
 }
 
+case class SecondWithFraction(child: Expression, timeZoneId: Option[String] = None)
+  extends UnaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes {
+
+  def this(child: Expression) = this(child, None)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType)
+
+  // 2 digits for seconds, and 6 digits for the fractional part with microsecond precision.
+  override def dataType: DataType = DecimalType(8, 6)
+
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
+
+  override protected def nullSafeEval(timestamp: Any): Any = {
+    DateTimeUtils.getSecondsWithFraction(timestamp.asInstanceOf[Long], timeZone)
+  }
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val tz = ctx.addReferenceObj("timeZone", timeZone)
+    val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
+    defineCodeGen(ctx, ev, c => s"$dtu.getSecondsWithFraction($c, $tz)")
+  }
+}
+
 case class Milliseconds(child: Expression, timeZoneId: Option[String] = None)
   extends UnaryExpression with ImplicitCastInputTypes with TimeZoneAwareExpression {
 
@@ -592,19 +612,35 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
+  @transient private lazy val formatter: Option[TimestampFormatter] = {
+    if (right.foldable) {
+      Option(right.eval()).map(format => TimestampFormatter(format.toString, zoneId))
+    } else None
+  }
+
   override protected def nullSafeEval(timestamp: Any, format: Any): Any = {
-    val df = TimestampFormatter(format.toString, zoneId)
-    UTF8String.fromString(df.format(timestamp.asInstanceOf[Long]))
+    val tf = if (formatter.isEmpty) {
+      TimestampFormatter(format.toString, zoneId)
+    } else {
+      formatter.get
+    }
+    UTF8String.fromString(tf.format(timestamp.asInstanceOf[Long]))
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
-    val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
-    val locale = ctx.addReferenceObj("locale", Locale.US)
-    defineCodeGen(ctx, ev, (timestamp, format) => {
-      s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $zid, $locale)
+    formatter.map { tf =>
+      val timestampFormatter = ctx.addReferenceObj("timestampFormatter", tf)
+      defineCodeGen(ctx, ev, (timestamp, _) => {
+        s"""UTF8String.fromString($timestampFormatter.format($timestamp))"""
+      })
+    }.getOrElse {
+      val tf = TimestampFormatter.getClass.getName.stripSuffix("$")
+      val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
+      defineCodeGen(ctx, ev, (timestamp, format) => {
+        s"""UTF8String.fromString($tf$$.MODULE$$.apply($format.toString(), $zid)
           .format($timestamp))"""
-    })
+      })
+    }
   }
 
   override def prettyName: String = "date_format"
@@ -619,7 +655,7 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
   examples = """
     Examples:
       > SELECT _FUNC_('2016-04-08', 'yyyy-MM-dd');
-       1460041200
+       1460098800
   """,
   since = "1.6.0")
 case class ToUnixTimestamp(
@@ -647,7 +683,7 @@ case class ToUnixTimestamp(
  * Converts time string with given pattern to Unix time stamp (in seconds), returns null if fail.
  * See [https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html].
  * Note that hive Language Manual says it returns 0 if fail, but in fact it returns null.
- * If the second parameter is missing, use "yyyy-MM-dd HH:mm:ss".
+ * If the second parameter is missing, use "uuuu-MM-dd HH:mm:ss".
  * If no parameters provided, the first parameter will be current_timestamp.
  * If the first parameter is a Date or Timestamp instead of String, we will ignore the
  * second parameter.
@@ -830,7 +866,7 @@ abstract class UnixTime extends ToTimestamp {
   examples = """
     Examples:
       > SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss');
-       1970-01-01 00:00:00
+       1969-12-31 16:00:00
   """,
   since = "1.5.0")
 case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[String] = None)
@@ -1085,7 +1121,7 @@ case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[S
   usage = "_FUNC_(timestamp, timezone) - Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14 03:40:00.0'.",
   examples = """
     Examples:
-      > SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');
+      > SELECT _FUNC_('2016-08-31', 'Asia/Seoul');
        2016-08-31 09:00:00
   """,
   since = "1.5.0",
@@ -1754,10 +1790,10 @@ case class MakeDate(year: Expression, month: Expression, day: Expression)
       > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
        2014-12-28 06:30:45.887
       > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET');
-       2014-12-28 10:30:45.887
-      > SELECT _FUNC_(2019, 6, 30, 23, 59, 60)
+       2014-12-27 21:30:45.887
+      > SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
        2019-07-01 00:00:00
-      > SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 13);
+      > SELECT _FUNC_(2019, 13, 1, 10, 11, 12, 'PST');
        NULL
       > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
        NULL
@@ -1985,7 +2021,7 @@ object DatePart {
     case "DOY" => DayOfYear(source)
     case "HOUR" | "H" | "HOURS" | "HR" | "HRS" => Hour(source)
     case "MINUTE" | "M" | "MIN" | "MINS" | "MINUTES" => Minute(source)
-    case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => Second(source)
+    case "SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => SecondWithFraction(source)
     case "MILLISECONDS" | "MSEC" | "MSECS" | "MILLISECON" | "MSECONDS" | "MS" =>
       Milliseconds(source)
     case "MICROSECONDS" | "USEC" | "USECS" | "USECONDS" | "MICROSECON" | "US" =>
@@ -2029,6 +2065,8 @@ object DatePart {
        33
       > SELECT _FUNC_('doy', DATE'2019-08-12');
        224
+      > SELECT _FUNC_('SECONDS', timestamp'2019-10-01 00:00:01.000001');
+       1.000001
   """,
   since = "3.0.0")
 case class DatePart(field: Expression, source: Expression, child: Expression)
@@ -2039,10 +2077,15 @@ case class DatePart(field: Expression, source: Expression, child: Expression)
       if (!field.foldable) {
         throw new AnalysisException("The field parameter needs to be a foldable string value.")
       }
-      val fieldStr = field.eval().asInstanceOf[UTF8String].toString
-      DatePart.parseExtractField(fieldStr, source, {
-        throw new AnalysisException(s"Literals of type '$fieldStr' are currently not supported.")
-      })
+      val fieldEval = field.eval()
+      if (fieldEval == null) {
+        Literal(null, DoubleType)
+      } else {
+        val fieldStr = fieldEval.asInstanceOf[UTF8String].toString
+        DatePart.parseExtractField(fieldStr, source, {
+          throw new AnalysisException(s"Literals of type '$fieldStr' are currently not supported.")
+        })
+      }
     })
   }
 
@@ -2050,3 +2093,26 @@ case class DatePart(field: Expression, source: Expression, child: Expression)
   override def sql: String = s"$prettyName(${field.sql}, ${source.sql})"
   override def prettyName: String = "date_part"
 }
+
+/**
+ * Returns the interval from startTimestamp to endTimestamp in which the `months` field
+ * is set to 0 and the `microseconds` field is initialized to the microsecond difference
+ * between the given timestamps.
+ */
+case class TimestampDiff(endTimestamp: Expression, startTimestamp: Expression)
+  extends BinaryExpression with ImplicitCastInputTypes {
+
+  override def left: Expression = endTimestamp
+  override def right: Expression = startTimestamp
+  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, TimestampType)
+  override def dataType: DataType = CalendarIntervalType
+
+  override def nullSafeEval(end: Any, start: Any): Any = {
+    new CalendarInterval(0, end.asInstanceOf[Long] - start.asInstanceOf[Long])
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    defineCodeGen(ctx, ev, (end, start) =>
+      s"new org.apache.spark.unsafe.types.CalendarInterval(0, $end - $start)")
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
index b5b712cda8ea3..7b2489e682661 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
@@ -47,7 +47,7 @@ case class UnscaledValue(child: Expression) extends UnaryExpression {
  */
 case class MakeDecimal(child: Expression, precision: Int, scale: Int) extends UnaryExpression {
 
-  private val nullOnOverflow = SQLConf.get.decimalOperationsNullOnOverflow
+  private val nullOnOverflow = !SQLConf.get.ansiEnabled
 
   override def dataType: DataType = DecimalType(precision, scale)
   override def nullable: Boolean = child.nullable || nullOnOverflow
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index 82a7d9825e30a..b0a23c62284d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -127,16 +127,16 @@ case class UserDefinedGenerator(
  *   3      NULL
  * }}}
  */
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = "_FUNC_(n, expr1, ..., exprk) - Separates `expr1`, ..., `exprk` into `n` rows. Uses column names col0, col1, etc. by default unless specified otherwise.",
   examples = """
     Examples:
       > SELECT _FUNC_(2, 1, 2, 3);
-       1  2
-       3  NULL
+       1	2
+       3	NULL
   """)
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class Stack(children: Seq[Expression]) extends Generator {
 
   private lazy val numRows = children.head.eval().asInstanceOf[Int]
@@ -375,16 +375,16 @@ case class Explode(child: Expression) extends ExplodeBase {
  *   1  20
  * }}}
  */
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(10,20));
-       0  10
-       1  20
+       0	10
+       1	20
   """)
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class PosExplode(child: Expression) extends ExplodeBase {
   override val position = true
 }
@@ -392,16 +392,16 @@ case class PosExplode(child: Expression) extends ExplodeBase {
 /**
  * Explodes an array of structs into a table.
  */
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
   examples = """
     Examples:
       > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
-       1  a
-       2  b
+       1	a
+       2	b
   """)
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator {
   override val inline: Boolean = true
   override val position: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
index 221b97bdc7856..2da3f6c74b0e2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala
@@ -38,7 +38,7 @@ trait GroupingSet extends Expression with CodegenFallback {
   override def eval(input: InternalRow): Any = throw new UnsupportedOperationException
 }
 
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_([col1[, col2 ..]]) - create a multi-dimensional cube using the specified columns
@@ -47,19 +47,19 @@ trait GroupingSet extends Expression with CodegenFallback {
   examples = """
     Examples:
       > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
-        NULL    2       1
-        NULL    NULL    2
-        Alice   2       1
-        Bob     5       1
-        NULL    5       1
-        Bob     NULL    1
-        Alice   NULL    1
+        Bob	5	1
+        Alice	2	1
+        NULL	NULL	2
+        NULL	5	1
+        Bob	NULL	1
+        Alice	NULL	1
+        NULL	2	1
   """,
   since = "2.0.0")
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
 
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_([col1[, col2 ..]]) - create a multi-dimensional rollup using the specified columns
@@ -68,21 +68,21 @@ case class Cube(groupByExprs: Seq[Expression]) extends GroupingSet {}
   examples = """
     Examples:
       > SELECT name, age, count(*) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY _FUNC_(name, age);
-        NULL    NULL    2
-        Alice   2       1
-        Bob     5       1
-        Bob     NULL    1
-        Alice   NULL    1
+        Bob	5	1
+        Alice	2	1
+        NULL	NULL	2
+        Bob	NULL	1
+        Alice	NULL	1
   """,
   since = "2.0.0")
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
 
 /**
  * Indicates whether a specified column expression in a GROUP BY list is aggregated or not.
  * GROUPING returns 1 for aggregated or 0 for not aggregated in the result set.
  */
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_(col) - indicates whether a specified column in a GROUP BY is aggregated or
@@ -91,12 +91,12 @@ case class Rollup(groupByExprs: Seq[Expression]) extends GroupingSet {}
   examples = """
     Examples:
       > SELECT name, _FUNC_(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name);
-        Alice   0       2
-        NULL    1       7
-        Bob     0       5
+        Bob	0	5
+        Alice	0	2
+        NULL	1	7
   """,
   since = "2.0.0")
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class Grouping(child: Expression) extends Expression with Unevaluable {
   @transient
   override lazy val references: AttributeSet =
@@ -111,7 +111,7 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
  *
  * If groupByExprs is empty, it means all grouping expressions in GroupingSets.
  */
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = """
     _FUNC_([col1[, col2 ..]]) - returns the level of grouping, equals to
@@ -120,20 +120,20 @@ case class Grouping(child: Expression) extends Expression with Unevaluable {
   examples = """
     Examples:
       > SELECT name, _FUNC_(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);
-        NULL    2       2       165.0
-        Alice   0       2       165.0
-        NULL    2       5       180.0
-        NULL    3       7       172.5
-        Bob     0       5       180.0
-        Bob     1       5       180.0
-        Alice   1       2       165.0
+        NULL	2	5	180.0
+        Alice	0	2	165.0
+        NULL	3	7	172.5
+        NULL	2	2	165.0
+        Bob	1	5	180.0
+        Alice	1	2	165.0
+        Bob	0	5	180.0
   """,
   note = """
     Input columns should match with grouping columns exactly, or empty (means all the grouping
     columns).
   """,
   since = "2.0.0")
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class GroupingID(groupByExprs: Seq[Expression]) extends Expression with Unevaluable {
   @transient
   override lazy val references: AttributeSet =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index ed26bb375de25..317ebb62c07ec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -344,8 +344,13 @@ case class MapFilter(
     Examples:
       > SELECT _FUNC_(array(1, 2, 3), x -> x % 2 == 1);
        [1,3]
+      > SELECT _FUNC_(array(0, 2, 3), (x, i) -> x > i);
+       [2,3]
   """,
-  since = "2.4.0")
+  since = "2.4.0",
+  note = """
+    The inner function may use the index argument since 3.0.0.
+  """)
 case class ArrayFilter(
     argument: Expression,
     function: Expression)
@@ -357,10 +362,19 @@ case class ArrayFilter(
 
   override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayFilter = {
     val ArrayType(elementType, containsNull) = argument.dataType
-    copy(function = f(function, (elementType, containsNull) :: Nil))
+    function match {
+      case LambdaFunction(_, arguments, _) if arguments.size == 2 =>
+        copy(function = f(function, (elementType, containsNull) :: (IntegerType, false) :: Nil))
+      case _ =>
+        copy(function = f(function, (elementType, containsNull) :: Nil))
+    }
   }
 
-  @transient lazy val LambdaFunction(_, Seq(elementVar: NamedLambdaVariable), _) = function
+  @transient lazy val (elementVar, indexVar) = {
+    val LambdaFunction(_, (elementVar: NamedLambdaVariable) +: tail, _) = function
+    val indexVar = tail.headOption.map(_.asInstanceOf[NamedLambdaVariable])
+    (elementVar, indexVar)
+  }
 
   override def nullSafeEval(inputRow: InternalRow, argumentValue: Any): Any = {
     val arr = argumentValue.asInstanceOf[ArrayData]
@@ -369,6 +383,9 @@ case class ArrayFilter(
     var i = 0
     while (i < arr.numElements) {
       elementVar.value.set(arr.get(i, elementVar.dataType))
+      if (indexVar.isDefined) {
+        indexVar.get.value.set(i)
+      }
       if (f.eval(inputRow).asInstanceOf[Boolean]) {
         buffer += elementVar.value.get
       }
@@ -463,7 +480,7 @@ case class ArrayExists(
       > SELECT _FUNC_(array(1, null, 3), x -> x % 2 == 0);
        false
       > SELECT _FUNC_(array(2, null, 8), x -> x % 2 == 0);
-       null
+       NULL
   """,
   since = "3.0.0")
 case class ArrayForAll(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 655e44e4e4919..a13a6836c6be6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -331,15 +331,15 @@ case class GetJsonObject(json: Expression, path: Expression)
   }
 }
 
-// scalastyle:off line.size.limit
+// scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = "_FUNC_(jsonStr, p1, p2, ..., pn) - Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.",
   examples = """
     Examples:
       > SELECT _FUNC_('{"a":1, "b":2}', 'a', 'b');
-       1  2
+       1	2
   """)
-// scalastyle:on line.size.limit
+// scalastyle:on line.size.limit line.contains.tab
 case class JsonTuple(children: Seq[Expression])
   extends Generator with CodegenFallback {
 
@@ -502,9 +502,9 @@ case class JsonTuple(children: Seq[Expression])
   examples = """
     Examples:
       > SELECT _FUNC_('{"a":1, "b":0.8}', 'a INT, b DOUBLE');
-       {"a":1, "b":0.8}
+       {"a":1,"b":0.8}
       > SELECT _FUNC_('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
-       {"time":"2015-08-26 00:00:00.0"}
+       {"time":2015-08-26 00:00:00.0}
   """,
   since = "2.2.0")
 // scalastyle:on line.size.limit
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 9cef3ecadc543..4793b5942a79e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -371,7 +371,9 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
         case _ => v + "D"
       }
     case (v: Decimal, t: DecimalType) => v + "BD"
-    case (v: Int, DateType) => s"DATE '${DateFormatter().format(v)}'"
+    case (v: Int, DateType) =>
+      val formatter = DateFormatter(DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
+      s"DATE '${formatter.format(v)}'"
     case (v: Long, TimestampType) =>
       val formatter = TimestampFormatter.getFractionFormatter(
         DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index b9f089ec056c2..d5b959b91c23d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1291,7 +1291,7 @@ abstract class RoundBase(child: Expression, scale: Expression,
   examples = """
     Examples:
       > SELECT _FUNC_(2.5, 0);
-       3.0
+       3
   """)
 // scalastyle:on line.size.limit
 case class Round(child: Expression, scale: Expression)
@@ -1311,7 +1311,7 @@ case class Round(child: Expression, scale: Expression)
   examples = """
     Examples:
       > SELECT _FUNC_(2.5, 0);
-       2.0
+       2
   """)
 // scalastyle:on line.size.limit
 case class BRound(child: Expression, scale: Expression)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastStringToBoolean.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastStringToBoolean.scala
new file mode 100644
index 0000000000000..0e87707d01e47
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/PostgreCastStringToBoolean.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions.postgreSQL
+
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.expressions.{Expression, NullIntolerant, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, JavaCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.catalyst.util.postgreSQL.StringUtils
+import org.apache.spark.sql.types.{BooleanType, DataType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+case class PostgreCastStringToBoolean(child: Expression)
+  extends UnaryExpression with NullIntolerant {
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (child.dataType == StringType) {
+      TypeCheckResult.TypeCheckSuccess
+    } else {
+      TypeCheckResult.TypeCheckFailure(
+        s"The expression ${getClass.getSimpleName} only accepts string input data type")
+    }
+  }
+
+  override def nullSafeEval(input: Any): Any = {
+    val s = input.asInstanceOf[UTF8String].trim().toLowerCase()
+    if (StringUtils.isTrueString(s)) {
+      true
+    } else if (StringUtils.isFalseString(s)) {
+      false
+    } else {
+      null
+    }
+  }
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val stringUtils = inline"${StringUtils.getClass.getName.stripSuffix("$")}"
+    val eval = child.genCode(ctx)
+    val javaType = JavaCode.javaType(dataType)
+    val preprocessedString = ctx.freshName("preprocessedString")
+    val castCode =
+      code"""
+        boolean ${ev.isNull} = ${eval.isNull};
+        $javaType ${ev.value} = false;
+        if (!${eval.isNull}) {
+          UTF8String $preprocessedString = ${eval.value}.trim().toLowerCase();
+          if ($stringUtils.isTrueString($preprocessedString)) {
+            ${ev.value} = true;
+          } else if ($stringUtils.isFalseString($preprocessedString)) {
+            ${ev.value} = false;
+          } else {
+            ${ev.isNull} = true;
+          }
+        }
+      """
+    ev.copy(code = eval.code + castCode)
+  }
+
+  override def dataType: DataType = BooleanType
+
+  override def nullable: Boolean = true
+
+  override def toString: String = s"PostgreCastStringToBoolean($child as ${dataType.simpleString})"
+
+  override def sql: String = s"CAST(${child.sql} AS ${dataType.sql})"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 7ce113120e4c6..4c0998412f729 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -457,17 +457,25 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
           break;
        """)
 
+    val switchCode = if (caseBranches.size > 0) {
+      code"""
+        switch (${valueGen.value}) {
+          ${caseBranches.mkString("\n")}
+          default:
+            ${ev.isNull} = $hasNull;
+        }
+       """
+    } else {
+      s"${ev.isNull} = $hasNull;"
+    }
+
     ev.copy(code =
       code"""
         ${valueGen.code}
         ${CodeGenerator.JAVA_BOOLEAN} ${ev.isNull} = ${valueGen.isNull};
         ${CodeGenerator.JAVA_BOOLEAN} ${ev.value} = false;
         if (!${valueGen.isNull}) {
-          switch (${valueGen.value}) {
-            ${caseBranches.mkString("\n")}
-            default:
-              ${ev.isNull} = $hasNull;
-          }
+          $switchCode
         }
        """)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 9229ef2039fed..f52e903e75094 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -65,7 +65,7 @@ abstract class StringRegexExpression extends BinaryExpression
   override def sql: String = s"${left.sql} ${prettyName.toUpperCase(Locale.ROOT)} ${right.sql}"
 }
 
-
+// scalastyle:off line.contains.tab
 /**
  * Simple RegEx pattern matching function
  */
@@ -96,13 +96,20 @@ abstract class StringRegexExpression extends BinaryExpression
   """,
   examples = """
     Examples:
-      > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%'
+      > SET spark.sql.parser.escapedStringLiterals=true;
+      spark.sql.parser.escapedStringLiterals	true
+      > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%';
+      true
+      > SET spark.sql.parser.escapedStringLiterals=false;
+      spark.sql.parser.escapedStringLiterals	false
+      > SELECT '%SystemDrive%\\Users\\John' _FUNC_ '\%SystemDrive\%\\\\Users%';
       true
   """,
   note = """
     Use RLIKE to match with standard regular expressions.
   """,
   since = "1.0.0")
+// scalastyle:on line.contains.tab
 case class Like(left: Expression, right: Expression) extends StringRegexExpression {
 
   override def escape(v: String): String = StringUtils.escapeLikeRegex(v)
@@ -153,6 +160,7 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
   }
 }
 
+// scalastyle:off line.contains.tab
 @ExpressionDescription(
   usage = "str _FUNC_ regexp - Returns true if `str` matches `regexp`, or false otherwise.",
   arguments = """
@@ -170,18 +178,20 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi
   """,
   examples = """
     Examples:
-      When spark.sql.parser.escapedStringLiterals is disabled (default).
-      > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*'
+      > SET spark.sql.parser.escapedStringLiterals=true;
+      spark.sql.parser.escapedStringLiterals	true
+      > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\\Users.*';
       true
-
-      When spark.sql.parser.escapedStringLiterals is enabled.
-      > SELECT '%SystemDrive%\Users\John' _FUNC_ '%SystemDrive%\Users.*'
+      > SET spark.sql.parser.escapedStringLiterals=false;
+      spark.sql.parser.escapedStringLiterals	false
+      > SELECT '%SystemDrive%\\Users\\John' _FUNC_ '%SystemDrive%\\\\Users.*';
       true
   """,
   note = """
     Use LIKE to match with simple string pattern.
   """,
   since = "1.0.0")
+// scalastyle:on line.contains.tab
 case class RLike(left: Expression, right: Expression) extends StringRegexExpression {
 
   override def escape(v: String): String = v
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index d7a5fb27a3d56..211ae3f02a0d8 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -472,6 +472,19 @@ object Overlay {
     builder.append(input.substringSQL(pos + length, Int.MaxValue))
     builder.build()
   }
+
+  def calculate(input: Array[Byte], replace: Array[Byte], pos: Int, len: Int): Array[Byte] = {
+    // If you specify length, it must be a positive whole number or zero.
+    // Otherwise it will be ignored.
+    // The default value for length is the length of replace.
+    val length = if (len >= 0) {
+      len
+    } else {
+      replace.length
+    }
+    ByteArray.concat(ByteArray.subStringSQL(input, 1, pos - 1),
+      replace, ByteArray.subStringSQL(input, pos + length, Int.MaxValue))
+  }
 }
 
 // scalastyle:off line.size.limit
@@ -487,6 +500,14 @@ object Overlay {
        Spark ANSI SQL
       > SELECT _FUNC_('Spark SQL' PLACING 'tructured' FROM 2 FOR 4);
        Structured SQL
+      > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('_', 'utf-8') FROM 6);
+       Spark_SQL
+      > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('CORE', 'utf-8') FROM 7);
+       Spark CORE
+      > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('ANSI ', 'utf-8') FROM 7 FOR 0);
+       Spark ANSI SQL
+      > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('tructured', 'utf-8') FROM 2 FOR 4);
+       Structured SQL
   """)
 // scalastyle:on line.size.limit
 case class Overlay(input: Expression, replace: Expression, pos: Expression, len: Expression)
@@ -496,19 +517,42 @@ case class Overlay(input: Expression, replace: Expression, pos: Expression, len:
     this(str, replace, pos, Literal.create(-1, IntegerType))
   }
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = input.dataType
 
-  override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringType, StringType, IntegerType, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType),
+    TypeCollection(StringType, BinaryType), IntegerType, IntegerType)
 
   override def children: Seq[Expression] = input :: replace :: pos :: len :: Nil
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val inputTypeCheck = super.checkInputDataTypes()
+    if (inputTypeCheck.isSuccess) {
+      TypeUtils.checkForSameTypeInputExpr(
+        input.dataType :: replace.dataType :: Nil, s"function $prettyName")
+    } else {
+      inputTypeCheck
+    }
+  }
+
+  private lazy val replaceFunc = input.dataType match {
+    case StringType =>
+      (inputEval: Any, replaceEval: Any, posEval: Int, lenEval: Int) => {
+        Overlay.calculate(
+          inputEval.asInstanceOf[UTF8String],
+          replaceEval.asInstanceOf[UTF8String],
+          posEval, lenEval)
+      }
+    case BinaryType =>
+      (inputEval: Any, replaceEval: Any, posEval: Int, lenEval: Int) => {
+        Overlay.calculate(
+          inputEval.asInstanceOf[Array[Byte]],
+          replaceEval.asInstanceOf[Array[Byte]],
+          posEval, lenEval)
+      }
+  }
+
   override def nullSafeEval(inputEval: Any, replaceEval: Any, posEval: Any, lenEval: Any): Any = {
-    val inputStr = inputEval.asInstanceOf[UTF8String]
-    val replaceStr = replaceEval.asInstanceOf[UTF8String]
-    val position = posEval.asInstanceOf[Int]
-    val length = lenEval.asInstanceOf[Int]
-    Overlay.calculate(inputStr, replaceStr, position, length)
+    replaceFunc(inputEval, replaceEval, posEval.asInstanceOf[Int], lenEval.asInstanceOf[Int])
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -803,7 +847,7 @@ object StringTrimLeft {
   usage = """
     _FUNC_(str) - Removes the leading space characters from `str`.
 
-    _FUNC_(trimStr, str) - Removes the leading string contains the characters from the trim string
+    _FUNC_(str, trimStr) - Removes the leading string contains the characters from the trim string
   """,
   arguments = """
     Arguments:
@@ -814,7 +858,7 @@ object StringTrimLeft {
     Examples:
       > SELECT _FUNC_('    SparkSQL   ');
        SparkSQL
-      > SELECT _FUNC_('Sp', 'SSparkSQLS');
+      > SELECT _FUNC_('SparkSQLS', 'Sp');
        arkSQLS
   """,
   since = "1.5.0")
@@ -905,7 +949,7 @@ object StringTrimRight {
   usage = """
     _FUNC_(str) - Removes the trailing space characters from `str`.
 
-    _FUNC_(trimStr, str) - Removes the trailing string which contains the characters from the trim string from the `str`
+    _FUNC_(str, trimStr) - Removes the trailing string which contains the characters from the trim string from the `str`
   """,
   arguments = """
     Arguments:
@@ -916,7 +960,7 @@ object StringTrimRight {
     Examples:
       > SELECT _FUNC_('    SparkSQL   ');
        SparkSQL
-      > SELECT _FUNC_('LQSa', 'SSparkSQLS');
+      > SELECT _FUNC_('SSparkSQLS', 'SQLS');
        SSpark
   """,
   since = "1.5.0")
@@ -1255,11 +1299,11 @@ object ParseUrl {
   usage = "_FUNC_(url, partToExtract[, key]) - Extracts a part from a URL.",
   examples = """
     Examples:
-      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST')
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST');
        spark.apache.org
-      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY')
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY');
        query=1
-      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query')
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query');
        1
   """,
   since = "2.0.0")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 0b674d025d1ac..74df1e7b8e8c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -606,7 +606,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
     zero,
     zero,
     zero,
-    (n / buckets).cast(IntegerType),
+    (n.cast(DecimalType.IntDecimal) / buckets.cast(DecimalType.IntDecimal)).cast(IntegerType),
     (n % buckets).cast(IntegerType)
   )
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
index aacf1a44e2ad0..073b45af51caf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
@@ -194,7 +194,7 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
   examples = """
     Examples:
       > SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');
-       ['b1','b2','b3']
+       ["b1","b2","b3"]
   """)
 // scalastyle:on line.size.limit
 case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index 3378040d1b640..3ee7e484690d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -81,7 +81,10 @@ private[sql] class JacksonGenerator(
     options.timestampFormat,
     options.zoneId,
     options.locale)
-  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
+  private val dateFormatter = DateFormatter(
+    options.dateFormat,
+    options.zoneId,
+    options.locale)
 
   private def makeWriter(dataType: DataType): ValueWriter = dataType match {
     case NullType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 19bc5bf3b29e3..b534b5a3d2d64 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -59,7 +59,10 @@ class JacksonParser(
     options.timestampFormat,
     options.zoneId,
     options.locale)
-  private val dateFormatter = DateFormatter(options.dateFormat, options.locale)
+  private val dateFormatter = DateFormatter(
+    options.dateFormat,
+    options.zoneId,
+    options.locale)
 
   /**
    * Create a converter which converts the JSON documents held by the `JsonParser`
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index b992ab130bbcc..eab4c3efe4f7f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -35,7 +36,7 @@ import org.apache.spark.util.Utils
  * Abstract class all optimizers should inherit of, contains the standard batches (extending
  * Optimizers can override this.
  */
-abstract class Optimizer(sessionCatalog: SessionCatalog)
+abstract class Optimizer(catalogManager: CatalogManager)
   extends RuleExecutor[LogicalPlan] {
 
   // Check for structural integrity of the plan in test mode.
@@ -129,7 +130,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       EliminateView,
       ReplaceExpressions,
       ComputeCurrentTime,
-      GetCurrentDatabase(sessionCatalog),
+      GetCurrentDatabase(catalogManager),
       RewriteDistinctAggregates,
       ReplaceDeduplicateWithAggregate) ::
     //////////////////////////////////////////////////////////////////////////////////////////
@@ -212,7 +213,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       EliminateView.ruleName ::
       ReplaceExpressions.ruleName ::
       ComputeCurrentTime.ruleName ::
-      GetCurrentDatabase(sessionCatalog).ruleName ::
+      GetCurrentDatabase(catalogManager).ruleName ::
       RewriteDistinctAggregates.ruleName ::
       ReplaceDeduplicateWithAggregate.ruleName ::
       ReplaceIntersectWithSemiJoin.ruleName ::
@@ -318,10 +319,10 @@ object EliminateDistinct extends Rule[LogicalPlan] {
 object SimpleTestOptimizer extends SimpleTestOptimizer
 
 class SimpleTestOptimizer extends Optimizer(
-  new SessionCatalog(
-    new InMemoryCatalog,
-    EmptyFunctionRegistry,
-    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true)))
+  new CatalogManager(
+    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true),
+    FakeV2SessionCatalog,
+    new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf())))
 
 /**
  * Remove redundant aliases from a query plan. A redundant alias is an alias that does not change
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 39709529c00d3..0a6737ba42118 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -373,6 +373,9 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
       case Not(a And b) => Or(Not(a), Not(b))
 
       case Not(Not(e)) => e
+
+      case Not(IsNull(e)) => IsNotNull(e)
+      case Not(IsNotNull(e)) => IsNull(e)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 69ba76827c781..9d7564175314a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -21,12 +21,12 @@ import java.time.LocalDate
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.types._
 
 
@@ -78,11 +78,14 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
 
 
 /** Replaces the expression of CurrentDatabase with the current database name. */
-case class GetCurrentDatabase(sessionCatalog: SessionCatalog) extends Rule[LogicalPlan] {
+case class GetCurrentDatabase(catalogManager: CatalogManager) extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    val currentNamespace = catalogManager.currentNamespace.quoted
+
     plan transformAllExpressions {
       case CurrentDatabase() =>
-        Literal.create(sessionCatalog.getCurrentDatabase, StringType)
+        Literal.create(currentNamespace, StringType)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 90a533735aed2..f3c603abb9a04 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -28,8 +28,6 @@ import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2
-import org.apache.spark.sql.catalog.v2.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat}
@@ -38,8 +36,9 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last}
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, AlterTableAlterColumnStatement, AlterTableDropColumnsStatement, AlterTableRenameColumnStatement, AlterTableSetLocationStatement, AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement, AlterViewSetPropertiesStatement, AlterViewUnsetPropertiesStatement, CreateTableAsSelectStatement, CreateTableStatement, DeleteFromStatement, DescribeColumnStatement, DescribeTableStatement, DropTableStatement, DropViewStatement, InsertIntoStatement, QualifiedColType, ReplaceTableAsSelectStatement, ReplaceTableStatement, ShowTablesStatement}
+import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, AlterTableAlterColumnStatement, AlterTableDropColumnsStatement, AlterTableRenameColumnStatement, AlterTableSetLocationStatement, AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement, AlterViewSetPropertiesStatement, AlterViewUnsetPropertiesStatement, CreateTableAsSelectStatement, CreateTableStatement, DeleteFromStatement, DescribeColumnStatement, DescribeTableStatement, DropTableStatement, DropViewStatement, InsertIntoStatement, QualifiedColType, ReplaceTableAsSelectStatement, ReplaceTableStatement, ShowNamespacesStatement, ShowTablesStatement, UpdateTableStatement, UseStatement}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
+import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -344,12 +343,52 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     val tableId = visitMultipartIdentifier(ctx.multipartIdentifier)
     val tableAlias = if (ctx.tableAlias() != null) {
       val ident = ctx.tableAlias().strictIdentifier()
-      if (ident != null) { Some(ident.getText) } else { None }
+      // We do not allow columns aliases after table alias.
+      if (ctx.tableAlias().identifierList() != null) {
+        throw new ParseException("Columns aliases is not allowed in DELETE.",
+          ctx.tableAlias().identifierList())
+      }
+      if (ident != null) Some(ident.getText) else None
+    } else {
+      None
+    }
+    val predicate = if (ctx.whereClause() != null) {
+      Some(expression(ctx.whereClause().booleanExpression()))
     } else {
       None
     }
 
-    DeleteFromStatement(tableId, tableAlias, expression(ctx.whereClause().booleanExpression()))
+    DeleteFromStatement(tableId, tableAlias, predicate)
+  }
+
+  override def visitUpdateTable(ctx: UpdateTableContext): LogicalPlan = withOrigin(ctx) {
+    val tableId = visitMultipartIdentifier(ctx.multipartIdentifier)
+    val tableAlias = if (ctx.tableAlias() != null) {
+      val ident = ctx.tableAlias().strictIdentifier()
+      // We do not allow columns aliases after table alias.
+      if (ctx.tableAlias().identifierList() != null) {
+        throw new ParseException("Columns aliases is not allowed in UPDATE.",
+          ctx.tableAlias().identifierList())
+      }
+      if (ident != null) Some(ident.getText) else None
+    } else {
+      None
+    }
+    val (attrs, values) = ctx.setClause().assign().asScala.map {
+      kv => visitMultipartIdentifier(kv.key) -> expression(kv.value)
+    }.unzip
+    val predicate = if (ctx.whereClause() != null) {
+      Some(expression(ctx.whereClause().booleanExpression()))
+    } else {
+      None
+    }
+
+    UpdateTableStatement(
+      tableId,
+      tableAlias,
+      attrs,
+      values,
+      predicate)
   }
 
   /**
@@ -1354,7 +1393,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   }
 
   override def visitCurrentDatetime(ctx: CurrentDatetimeContext): Expression = withOrigin(ctx) {
-    if (conf.ansiParserEnabled) {
+    if (conf.ansiEnabled) {
       ctx.name.getType match {
         case SqlBaseParser.CURRENT_DATE =>
           CurrentDate()
@@ -1725,7 +1764,8 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     }
     try {
       valueType match {
-        case "DATE" => toLiteral(stringToDate, DateType)
+        case "DATE" =>
+          toLiteral(stringToDate(_, getZoneId(SQLConf.get.sessionLocalTimeZone)), DateType)
         case "TIMESTAMP" =>
           val zoneId = getZoneId(SQLConf.get.sessionLocalTimeZone)
           toLiteral(stringToTimestamp(_, zoneId), TimestampType)
@@ -2177,7 +2217,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
   override def visitTransformList(ctx: TransformListContext): Seq[Transform] = withOrigin(ctx) {
     def getFieldReference(
         ctx: ApplyTransformContext,
-        arg: v2.expressions.Expression): FieldReference = {
+        arg: V2Expression): FieldReference = {
       lazy val name: String = ctx.identifier.getText
       arg match {
         case ref: FieldReference =>
@@ -2190,7 +2230,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
 
     def getSingleFieldReference(
         ctx: ApplyTransformContext,
-        arguments: Seq[v2.expressions.Expression]): FieldReference = {
+        arguments: Seq[V2Expression]): FieldReference = {
       lazy val name: String = ctx.identifier.getText
       if (arguments.size > 1) {
         throw new ParseException(s"Too many arguments for transform $name", ctx)
@@ -2247,7 +2287,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
    * Parse an argument to a transform. An argument may be a field reference (qualified name) or
    * a value literal.
    */
-  override def visitTransformArgument(ctx: TransformArgumentContext): v2.expressions.Expression = {
+  override def visitTransformArgument(ctx: TransformArgumentContext): V2Expression = {
     withOrigin(ctx) {
       val reference = Option(ctx.qualifiedName)
           .map(typedVisit[Seq[String]])
@@ -2260,6 +2300,15 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
     }
   }
 
+  /**
+   * Create a [[ShowNamespacesStatement]] command.
+   */
+  override def visitShowNamespaces(ctx: ShowNamespacesContext): LogicalPlan = withOrigin(ctx) {
+    ShowNamespacesStatement(
+      Option(ctx.multipartIdentifier).map(visitMultipartIdentifier),
+      Option(ctx.pattern).map(string))
+  }
+
   /**
    * Create a table, returning a [[CreateTableStatement]] logical plan.
    *
@@ -2413,6 +2462,14 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging
       ctx.EXISTS != null)
   }
 
+  /**
+   * Create a [[UseStatement]] logical plan.
+   */
+  override def visitUse(ctx: UseContext): LogicalPlan = withOrigin(ctx) {
+    val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier)
+    UseStatement(ctx.NAMESPACE != null, nameParts)
+  }
+
   /**
    * Create a [[ShowTablesStatement]] command.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 932e795f1d0bc..85998e33140d0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -92,7 +92,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
     lexer.removeErrorListeners()
     lexer.addErrorListener(ParseErrorListener)
     lexer.legacy_setops_precedence_enbled = SQLConf.get.setOpsPrecedenceEnforced
-    lexer.ansi = SQLConf.get.ansiParserEnabled
+    lexer.ansi = SQLConf.get.ansiEnabled
 
     val tokenStream = new CommonTokenStream(lexer)
     val parser = new SqlBaseParser(tokenStream)
@@ -100,7 +100,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
     parser.removeErrorListeners()
     parser.addErrorListener(ParseErrorListener)
     parser.legacy_setops_precedence_enbled = SQLConf.get.setOpsPrecedenceEnforced
-    parser.ansi = SQLConf.get.ansiParserEnabled
+    parser.ansi = SQLConf.get.ansiEnabled
 
     try {
       try {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 89347f4b1f7bf..a3779698a5ac3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -116,7 +116,7 @@ object ParserUtils {
     var enclosure: Character = null
     val sb = new StringBuilder(b.length())
 
-    def appendEscapedChar(n: Char) {
+    def appendEscapedChar(n: Char): Unit = {
       n match {
         case '0' => sb.append('\u0000')
         case '\'' => sb.append('\'')
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 0be61cf147041..c0d53104874ab 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -17,18 +17,17 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog, TableChange}
-import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, ColumnChange}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.AliasIdentifier
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
-import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier, SupportsNamespaces, TableCatalog, TableChange}
+import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types._
 import org.apache.spark.util.random.RandomSampler
 
@@ -489,7 +488,7 @@ case class ReplaceTableAsSelect(
   override def tableSchema: StructType = query.schema
   override def children: Seq[LogicalPlan] = Seq(query)
 
-  override lazy val resolved: Boolean = {
+  override lazy val resolved: Boolean = childrenResolved && {
     // the table schema is created from the query schema, so the only resolution needed is to check
     // that the columns referenced by the table's partitioning exist in the query schema
     val references = partitioning.flatMap(_.references).toSet
@@ -507,15 +506,22 @@ case class ReplaceTableAsSelect(
 case class AppendData(
     table: NamedRelation,
     query: LogicalPlan,
+    writeOptions: Map[String, String],
     isByName: Boolean) extends V2WriteCommand
 
 object AppendData {
-  def byName(table: NamedRelation, df: LogicalPlan): AppendData = {
-    new AppendData(table, df, isByName = true)
+  def byName(
+      table: NamedRelation,
+      df: LogicalPlan,
+      writeOptions: Map[String, String] = Map.empty): AppendData = {
+    new AppendData(table, df, writeOptions, isByName = true)
   }
 
-  def byPosition(table: NamedRelation, query: LogicalPlan): AppendData = {
-    new AppendData(table, query, isByName = false)
+  def byPosition(
+      table: NamedRelation,
+      query: LogicalPlan,
+      writeOptions: Map[String, String] = Map.empty): AppendData = {
+    new AppendData(table, query, writeOptions, isByName = false)
   }
 }
 
@@ -526,19 +532,26 @@ case class OverwriteByExpression(
     table: NamedRelation,
     deleteExpr: Expression,
     query: LogicalPlan,
+    writeOptions: Map[String, String],
     isByName: Boolean) extends V2WriteCommand {
   override lazy val resolved: Boolean = outputResolved && deleteExpr.resolved
 }
 
 object OverwriteByExpression {
   def byName(
-      table: NamedRelation, df: LogicalPlan, deleteExpr: Expression): OverwriteByExpression = {
-    OverwriteByExpression(table, deleteExpr, df, isByName = true)
+      table: NamedRelation,
+      df: LogicalPlan,
+      deleteExpr: Expression,
+      writeOptions: Map[String, String] = Map.empty): OverwriteByExpression = {
+    OverwriteByExpression(table, deleteExpr, df, writeOptions, isByName = true)
   }
 
   def byPosition(
-      table: NamedRelation, query: LogicalPlan, deleteExpr: Expression): OverwriteByExpression = {
-    OverwriteByExpression(table, deleteExpr, query, isByName = false)
+      table: NamedRelation,
+      query: LogicalPlan,
+      deleteExpr: Expression,
+      writeOptions: Map[String, String] = Map.empty): OverwriteByExpression = {
+    OverwriteByExpression(table, deleteExpr, query, writeOptions, isByName = false)
   }
 }
 
@@ -548,18 +561,36 @@ object OverwriteByExpression {
 case class OverwritePartitionsDynamic(
     table: NamedRelation,
     query: LogicalPlan,
+    writeOptions: Map[String, String],
     isByName: Boolean) extends V2WriteCommand
 
 object OverwritePartitionsDynamic {
-  def byName(table: NamedRelation, df: LogicalPlan): OverwritePartitionsDynamic = {
-    OverwritePartitionsDynamic(table, df, isByName = true)
+  def byName(
+      table: NamedRelation,
+      df: LogicalPlan,
+      writeOptions: Map[String, String] = Map.empty): OverwritePartitionsDynamic = {
+    OverwritePartitionsDynamic(table, df, writeOptions, isByName = true)
   }
 
-  def byPosition(table: NamedRelation, query: LogicalPlan): OverwritePartitionsDynamic = {
-    OverwritePartitionsDynamic(table, query, isByName = false)
+  def byPosition(
+      table: NamedRelation,
+      query: LogicalPlan,
+      writeOptions: Map[String, String] = Map.empty): OverwritePartitionsDynamic = {
+    OverwritePartitionsDynamic(table, query, writeOptions, isByName = false)
   }
 }
 
+/**
+ * The logical plan of the SHOW NAMESPACES command that works for v2 catalogs.
+ */
+case class ShowNamespaces(
+    catalog: SupportsNamespaces,
+    namespace: Option[Seq[String]],
+    pattern: Option[String]) extends Command {
+  override val output: Seq[Attribute] = Seq(
+    AttributeReference("namespace", StringType, nullable = false)())
+}
+
 case class DescribeTable(table: NamedRelation, isExtended: Boolean) extends Command {
 
   override def children: Seq[LogicalPlan] = Seq(table)
@@ -569,7 +600,7 @@ case class DescribeTable(table: NamedRelation, isExtended: Boolean) extends Comm
 
 case class DeleteFromTable(
     child: LogicalPlan,
-    condition: Expression) extends Command {
+    condition: Option[Expression]) extends Command {
 
   override def children: Seq[LogicalPlan] = child :: Nil
 }
@@ -629,40 +660,12 @@ case class ShowTables(
 }
 
 /**
- * Insert some data into a table. Note that this plan is unresolved and has to be replaced by the
- * concrete implementations during analysis.
- *
- * @param table the logical plan representing the table. In the future this should be a
- *              [[org.apache.spark.sql.catalyst.catalog.CatalogTable]] once we converge Hive tables
- *              and data source tables.
- * @param partition a map from the partition key to the partition value (optional). If the partition
- *                  value is optional, dynamic partition insert will be performed.
- *                  As an example, `INSERT INTO tbl PARTITION (a=1, b=2) AS ...` would have
- *                  Map('a' -> Some('1'), 'b' -> Some('2')),
- *                  and `INSERT INTO tbl PARTITION (a=1, b) AS ...`
- *                  would have Map('a' -> Some('1'), 'b' -> None).
- * @param query the logical plan representing data to write to.
- * @param overwrite overwrite existing table or partitions.
- * @param ifPartitionNotExists If true, only write if the partition does not exist.
- *                             Only valid for static partitions.
+ * The logical plan of the USE/USE NAMESPACE command that works for v2 catalogs.
  */
-case class InsertIntoTable(
-    table: LogicalPlan,
-    partition: Map[String, Option[String]],
-    query: LogicalPlan,
-    overwrite: Boolean,
-    ifPartitionNotExists: Boolean)
-  extends LogicalPlan {
-  // IF NOT EXISTS is only valid in INSERT OVERWRITE
-  assert(overwrite || !ifPartitionNotExists)
-  // IF NOT EXISTS is only valid in static partitions
-  assert(partition.values.forall(_.nonEmpty) || !ifPartitionNotExists)
-
-  // We don't want `table` in children as sometimes we don't want to transform it.
-  override def children: Seq[LogicalPlan] = query :: Nil
-  override def output: Seq[Attribute] = Seq.empty
-  override lazy val resolved: Boolean = false
-}
+case class SetCatalogAndNamespace(
+    catalogManager: CatalogManager,
+    catalogName: Option[String],
+    namespace: Option[Seq[String]]) extends Command
 
 /**
  * Insert query result into a directory.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
index dc2185194d84e..c4f741cd2cec8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression, PythonUDF}
 
 /**
- * FlatMap groups using an udf: pandas.Dataframe -> pandas.DataFrame.
+ * FlatMap groups using a udf: pandas.Dataframe -> pandas.DataFrame.
  * This is used by DataFrame.groupby().apply().
  */
 case class FlatMapGroupsInPandas(
@@ -40,7 +40,7 @@ case class FlatMapGroupsInPandas(
 }
 
 /**
- * Map partitions using an udf: iter(pandas.Dataframe) -> iter(pandas.DataFrame).
+ * Map partitions using a udf: iter(pandas.Dataframe) -> iter(pandas.DataFrame).
  * This is used by DataFrame.mapInPandas()
  */
 case class MapInPandas(
@@ -51,6 +51,21 @@ case class MapInPandas(
   override val producedAttributes = AttributeSet(output)
 }
 
+/**
+ * Flatmap cogroups using a udf: pandas.Dataframe, pandas.Dataframe -> pandas.Dataframe
+ * This is used by DataFrame.groupby().cogroup().apply().
+ */
+case class FlatMapCoGroupsInPandas(
+    leftAttributes: Seq[Attribute],
+    rightAttributes: Seq[Attribute],
+    functionExpr: Expression,
+    output: Seq[Attribute],
+    left: LogicalPlan,
+    right: LogicalPlan) extends BinaryNode {
+
+  override val producedAttributes = AttributeSet(output)
+}
+
 trait BaseEvalPython extends UnaryNode {
 
   def udfs: Seq[PythonUDF]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/AlterTableStatements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/AlterTableStatements.scala
index 8c1b54be46cf6..9d7dec9ae0ce0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/AlterTableStatements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/AlterTableStatements.scala
@@ -24,16 +24,12 @@ import org.apache.spark.sql.types.DataType
  */
 case class QualifiedColType(name: Seq[String], dataType: DataType, comment: Option[String])
 
-trait AlterTableStatement extends ParsedStatement {
-  val tableName: Seq[String]
-}
-
 /**
  * ALTER TABLE ... ADD COLUMNS command, as parsed from SQL.
  */
 case class AlterTableAddColumnsStatement(
     tableName: Seq[String],
-    columnsToAdd: Seq[QualifiedColType]) extends AlterTableStatement
+    columnsToAdd: Seq[QualifiedColType]) extends ParsedStatement
 
 /**
  * ALTER TABLE ... CHANGE COLUMN command, as parsed from SQL.
@@ -42,7 +38,7 @@ case class AlterTableAlterColumnStatement(
     tableName: Seq[String],
     column: Seq[String],
     dataType: Option[DataType],
-    comment: Option[String]) extends AlterTableStatement
+    comment: Option[String]) extends ParsedStatement
 
 /**
  * ALTER TABLE ... RENAME COLUMN command, as parsed from SQL.
@@ -50,21 +46,21 @@ case class AlterTableAlterColumnStatement(
 case class AlterTableRenameColumnStatement(
     tableName: Seq[String],
     column: Seq[String],
-    newName: String) extends AlterTableStatement
+    newName: String) extends ParsedStatement
 
 /**
  * ALTER TABLE ... DROP COLUMNS command, as parsed from SQL.
  */
 case class AlterTableDropColumnsStatement(
     tableName: Seq[String],
-    columnsToDrop: Seq[Seq[String]]) extends AlterTableStatement
+    columnsToDrop: Seq[Seq[String]]) extends ParsedStatement
 
 /**
  * ALTER TABLE ... SET TBLPROPERTIES command, as parsed from SQL.
  */
 case class AlterTableSetPropertiesStatement(
     tableName: Seq[String],
-    properties: Map[String, String]) extends AlterTableStatement
+    properties: Map[String, String]) extends ParsedStatement
 
 /**
  * ALTER TABLE ... UNSET TBLPROPERTIES command, as parsed from SQL.
@@ -72,11 +68,11 @@ case class AlterTableSetPropertiesStatement(
 case class AlterTableUnsetPropertiesStatement(
     tableName: Seq[String],
     propertyKeys: Seq[String],
-    ifExists: Boolean) extends AlterTableStatement
+    ifExists: Boolean) extends ParsedStatement
 
 /**
  * ALTER TABLE ... SET LOCATION command, as parsed from SQL.
  */
 case class AlterTableSetLocationStatement(
     tableName: Seq[String],
-    location: String) extends AlterTableStatement
+    location: String) extends ParsedStatement
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
index 190711303e32d..551a0ce44b4d5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/CreateTableStatement.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.sql.catalyst.plans.logical.sql
 
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DeleteFromStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DeleteFromStatement.scala
index 21e24127eee31..035bc09d31871 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DeleteFromStatement.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/DeleteFromStatement.scala
@@ -23,5 +23,5 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 case class DeleteFromStatement(
     tableName: Seq[String],
     tableAlias: Option[String],
-    condition: Expression)
+    condition: Option[Expression])
     extends ParsedStatement
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ReplaceTableStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ReplaceTableStatement.scala
index 2808892b089b9..b9767ff9b889a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ReplaceTableStatement.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ReplaceTableStatement.scala
@@ -17,10 +17,10 @@
 
 package org.apache.spark.sql.catalyst.plans.logical.sql
 
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ShowNamespacesStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ShowNamespacesStatement.scala
new file mode 100644
index 0000000000000..95d48a8e80d23
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/ShowNamespacesStatement.scala
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical.sql
+
+/**
+ * A SHOW NAMESPACES statement, as parsed from SQL.
+ */
+case class ShowNamespacesStatement(namespace: Option[Seq[String]], pattern: Option[String])
+  extends ParsedStatement
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/UpdateTableStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/UpdateTableStatement.scala
new file mode 100644
index 0000000000000..954374c15b932
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/UpdateTableStatement.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical.sql
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+
+case class UpdateTableStatement(
+    tableName: Seq[String],
+    tableAlias: Option[String],
+    attrs: Seq[Seq[String]],
+    values: Seq[Expression],
+    condition: Option[Expression]) extends ParsedStatement
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/UseStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/UseStatement.scala
new file mode 100644
index 0000000000000..1fd76a252d826
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/sql/UseStatement.scala
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical.sql
+
+/**
+ * A USE statement, as parsed from SQL.
+ */
+case class UseStatement(isNamespaceSet: Boolean, nameParts: Seq[String]) extends ParsedStatement
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 00fb81e361fdf..ba1eeb38e247e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -27,9 +27,8 @@ import org.json4s.JsonAST._
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.IdentifierWithDatabase
 import org.apache.spark.sql.catalyst.ScalaReflection._
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, FunctionResource}
 import org.apache.spark.sql.catalyst.errors._
 import org.apache.spark.sql.catalyst.expressions._
@@ -780,9 +779,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product {
   private def shouldConvertToJson(product: Product): Boolean = product match {
     case exprId: ExprId => true
     case field: StructField => true
-    case id: TableIdentifier => true
+    case id: IdentifierWithDatabase => true
     case join: JoinType => true
-    case id: FunctionIdentifier => true
     case spec: BucketSpec => true
     case catalog: CatalogTable => true
     case partition: Partitioning => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index 4940aa83a3017..7f982b019c8d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.LocalDate
+import java.time.{LocalDate, ZoneId}
 import java.util.Locale
 
+import DateTimeUtils.{convertSpecialDate, localDateToDays}
+
 sealed trait DateFormatter extends Serializable {
   def parse(s: String): Int // returns days since epoch
   def format(days: Int): String
@@ -27,14 +29,18 @@ sealed trait DateFormatter extends Serializable {
 
 class Iso8601DateFormatter(
     pattern: String,
+    zoneId: ZoneId,
     locale: Locale) extends DateFormatter with DateTimeFormatterHelper {
 
   @transient
   private lazy val formatter = getOrCreateFormatter(pattern, locale)
 
   override def parse(s: String): Int = {
-    val localDate = LocalDate.parse(s, formatter)
-    DateTimeUtils.localDateToDays(localDate)
+    val specialDate = convertSpecialDate(s.trim, zoneId)
+    specialDate.getOrElse {
+      val localDate = LocalDate.parse(s, formatter)
+      localDateToDays(localDate)
+    }
   }
 
   override def format(days: Int): String = {
@@ -46,11 +52,13 @@ object DateFormatter {
   val defaultPattern: String = "uuuu-MM-dd"
   val defaultLocale: Locale = Locale.US
 
-  def apply(format: String, locale: Locale): DateFormatter = {
-    new Iso8601DateFormatter(format, locale)
+  def apply(format: String, zoneId: ZoneId, locale: Locale): DateFormatter = {
+    new Iso8601DateFormatter(format, zoneId, locale)
   }
 
-  def apply(format: String): DateFormatter = apply(format, defaultLocale)
+  def apply(format: String, zoneId: ZoneId): DateFormatter = {
+    apply(format, zoneId, defaultLocale)
+  }
 
-  def apply(): DateFormatter = apply(defaultPattern)
+  def apply(zoneId: ZoneId): DateFormatter = apply(defaultPattern, zoneId)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index 65a9bee5eaedd..34e8012106bbe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.time._
 import java.time.temporal.{ChronoField, ChronoUnit, IsoFields}
@@ -218,6 +219,8 @@ object DateTimeUtils {
     var i = 0
     var currentSegmentValue = 0
     val bytes = s.trim.getBytes
+    val specialTimestamp = convertSpecialTimestamp(bytes, timeZoneId)
+    if (specialTimestamp.isDefined) return specialTimestamp
     var j = 0
     var digitsMilli = 0
     var justTime = false
@@ -375,7 +378,7 @@ object DateTimeUtils {
    * `yyyy-[m]m-[d]d *`
    * `yyyy-[m]m-[d]dT*`
    */
-  def stringToDate(s: UTF8String): Option[SQLDate] = {
+  def stringToDate(s: UTF8String, zoneId: ZoneId): Option[SQLDate] = {
     if (s == null) {
       return None
     }
@@ -383,6 +386,8 @@ object DateTimeUtils {
     var i = 0
     var currentSegmentValue = 0
     val bytes = s.trim.getBytes
+    val specialDate = convertSpecialDate(bytes, zoneId)
+    if (specialDate.isDefined) return specialDate
     var j = 0
     while (j < bytes.length && (i < 3 && !(bytes(j) == ' ' || bytes(j) == 'T'))) {
       val b = bytes(j)
@@ -456,13 +461,20 @@ object DateTimeUtils {
     (MICROSECONDS.toSeconds(localTimestamp(microsec, timeZone)) % 60).toInt
   }
 
+  /**
+   * Returns the seconds part and its fractional part with microseconds.
+   */
+  def getSecondsWithFraction(microsec: SQLTimestamp, timeZone: TimeZone): Decimal = {
+    val secFrac = localTimestamp(microsec, timeZone) % (MILLIS_PER_MINUTE * MICROS_PER_MILLIS)
+    Decimal(secFrac, 8, 6)
+  }
+
   /**
    * Returns seconds, including fractional parts, multiplied by 1000. The timestamp
    * is expressed in microseconds since the epoch.
    */
   def getMilliseconds(timestamp: SQLTimestamp, timeZone: TimeZone): Decimal = {
-    val micros = Decimal(getMicroseconds(timestamp, timeZone))
-    (micros / Decimal(MICROS_PER_MILLIS)).toPrecision(8, 3)
+    Decimal(getMicroseconds(timestamp, timeZone), 8, 3)
   }
 
   /**
@@ -844,8 +856,98 @@ object DateTimeUtils {
    * since 1970-01-01 00:00:00 local time.
    */
   def getEpoch(timestamp: SQLTimestamp, zoneId: ZoneId): Decimal = {
-    val offset = zoneId.getRules.getOffset(microsToInstant(timestamp)).getTotalSeconds
-    val sinceEpoch = BigDecimal(timestamp) / MICROS_PER_SECOND + offset
-    new Decimal().set(sinceEpoch, 20, 6)
+    val offset = SECONDS.toMicros(
+      zoneId.getRules.getOffset(microsToInstant(timestamp)).getTotalSeconds)
+    val sinceEpoch = timestamp + offset
+    Decimal(sinceEpoch, 20, 6)
+  }
+
+  def currentTimestamp(): SQLTimestamp = instantToMicros(Instant.now())
+
+  def currentDate(zoneId: ZoneId): SQLDate = localDateToDays(LocalDate.now(zoneId))
+
+  private def today(zoneId: ZoneId): ZonedDateTime = {
+    Instant.now().atZone(zoneId).`with`(LocalTime.MIDNIGHT)
+  }
+
+  private val specialValueRe = """(\p{Alpha}+)\p{Blank}*(.*)""".r
+
+  /**
+   * Extracts special values from an input string ignoring case.
+   * @param input - a trimmed string
+   * @param zoneId - zone identifier used to get the current date.
+   * @return some special value in lower case or None.
+   */
+  private def extractSpecialValue(input: String, zoneId: ZoneId): Option[String] = {
+    def isValid(value: String, timeZoneId: String): Boolean = {
+      // Special value can be without any time zone
+      if (timeZoneId.isEmpty) return true
+      // "now" must not have the time zone field
+      if (value.compareToIgnoreCase("now") == 0) return false
+      // If the time zone field presents in the input, it must be resolvable
+      try {
+        getZoneId(timeZoneId)
+        true
+      } catch {
+        case NonFatal(_) => false
+      }
+    }
+
+    assert(input.trim.length == input.length)
+    if (input.length < 3 || !input(0).isLetter) return None
+    input match {
+      case specialValueRe(v, z) if isValid(v, z) => Some(v.toLowerCase(Locale.US))
+      case _ => None
+    }
+  }
+
+  /**
+   * Converts notational shorthands that are converted to ordinary timestamps.
+   * @param input - a trimmed string
+   * @param zoneId - zone identifier used to get the current date.
+   * @return some of microseconds since the epoch if the conversion completed
+   *         successfully otherwise None.
+   */
+  def convertSpecialTimestamp(input: String, zoneId: ZoneId): Option[SQLTimestamp] = {
+    extractSpecialValue(input, zoneId).flatMap {
+      case "epoch" => Some(0)
+      case "now" => Some(currentTimestamp())
+      case "today" => Some(instantToMicros(today(zoneId).toInstant))
+      case "tomorrow" => Some(instantToMicros(today(zoneId).plusDays(1).toInstant))
+      case "yesterday" => Some(instantToMicros(today(zoneId).minusDays(1).toInstant))
+      case _ => None
+    }
+  }
+
+  private def convertSpecialTimestamp(bytes: Array[Byte], zoneId: ZoneId): Option[SQLTimestamp] = {
+    if (bytes.length > 0 && Character.isAlphabetic(bytes(0))) {
+      convertSpecialTimestamp(new String(bytes, StandardCharsets.UTF_8), zoneId)
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Converts notational shorthands that are converted to ordinary dates.
+   * @param input - a trimmed string
+   * @param zoneId - zone identifier used to get the current date.
+   * @return some of days since the epoch if the conversion completed successfully otherwise None.
+   */
+  def convertSpecialDate(input: String, zoneId: ZoneId): Option[SQLDate] = {
+    extractSpecialValue(input, zoneId).flatMap {
+      case "epoch" => Some(0)
+      case "now" | "today" => Some(currentDate(zoneId))
+      case "tomorrow" => Some(Math.addExact(currentDate(zoneId), 1))
+      case "yesterday" => Some(Math.subtractExact(currentDate(zoneId), 1))
+      case _ => None
+    }
+  }
+
+  private def convertSpecialDate(bytes: Array[Byte], zoneId: ZoneId): Option[SQLDate] = {
+    if (bytes.length > 0 && Character.isAlphabetic(bytes(0))) {
+      convertSpecialDate(new String(bytes, StandardCharsets.UTF_8), zoneId)
+    } else {
+      None
+    }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index a14ae540f5056..3bda9a2a1fc48 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -65,16 +65,16 @@ object StringUtils extends Logging {
     "(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines
   }
 
-  // "true", "yes", "1", "false", "no", "0", and unique prefixes of these strings are accepted.
   private[this] val trueStrings =
-    Set("true", "tru", "tr", "t", "yes", "ye", "y", "on", "1").map(UTF8String.fromString)
+    Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
 
   private[this] val falseStrings =
-    Set("false", "fals", "fal", "fa", "f", "no", "n", "off", "of", "0").map(UTF8String.fromString)
+    Set("f", "false", "n", "no", "0").map(UTF8String.fromString)
 
   // scalastyle:off caselocale
-  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase.trim())
-  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase.trim())
+  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s.toLowerCase)
+
+  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s.toLowerCase)
   // scalastyle:on caselocale
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index b23cec64568df..5be4807083fa3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -22,9 +22,11 @@ import java.time._
 import java.time.format.DateTimeParseException
 import java.time.temporal.ChronoField.MICRO_OF_SECOND
 import java.time.temporal.TemporalQueries
-import java.util.{Locale, TimeZone}
+import java.util.Locale
 import java.util.concurrent.TimeUnit.SECONDS
 
+import DateTimeUtils.convertSpecialTimestamp
+
 sealed trait TimestampFormatter extends Serializable {
   /**
    * Parses a timestamp in a string and converts it to microseconds.
@@ -50,14 +52,17 @@ class Iso8601TimestampFormatter(
   protected lazy val formatter = getOrCreateFormatter(pattern, locale)
 
   override def parse(s: String): Long = {
-    val parsed = formatter.parse(s)
-    val parsedZoneId = parsed.query(TemporalQueries.zone())
-    val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
-    val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
-    val epochSeconds = zonedDateTime.toEpochSecond
-    val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
+    val specialDate = convertSpecialTimestamp(s.trim, zoneId)
+    specialDate.getOrElse {
+      val parsed = formatter.parse(s)
+      val parsedZoneId = parsed.query(TemporalQueries.zone())
+      val timeZoneId = if (parsedZoneId == null) zoneId else parsedZoneId
+      val zonedDateTime = toZonedDateTime(parsed, timeZoneId)
+      val epochSeconds = zonedDateTime.toEpochSecond
+      val microsOfSecond = zonedDateTime.get(MICRO_OF_SECOND)
 
-    Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
+      Math.addExact(SECONDS.toMicros(epochSeconds), microsOfSecond)
+    }
   }
 
   override def format(us: Long): String = {
@@ -67,7 +72,7 @@ class Iso8601TimestampFormatter(
 }
 
 /**
- * The formatter parses/formats timestamps according to the pattern `yyyy-MM-dd HH:mm:ss.[..fff..]`
+ * The formatter parses/formats timestamps according to the pattern `uuuu-MM-dd HH:mm:ss.[..fff..]`
  * where `[..fff..]` is a fraction of second up to microsecond resolution. The formatter does not
  * output trailing zeros in the fraction. For example, the timestamp `2019-03-05 15:00:01.123400` is
  * formatted as the string `2019-03-05 15:00:01.1234`.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/postgreSQL/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/postgreSQL/StringUtils.scala
new file mode 100644
index 0000000000000..1ae15df29d6e7
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/postgreSQL/StringUtils.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util.postgreSQL
+
+import org.apache.spark.unsafe.types.UTF8String
+
+object StringUtils {
+  // "true", "yes", "1", "false", "no", "0", and unique prefixes of these strings are accepted.
+  private[this] val trueStrings =
+    Set("true", "tru", "tr", "t", "yes", "ye", "y", "on", "1").map(UTF8String.fromString)
+
+  private[this] val falseStrings =
+    Set("false", "fals", "fal", "fa", "f", "no", "n", "off", "of", "0").map(UTF8String.fromString)
+
+  def isTrueString(s: UTF8String): Boolean = trueStrings.contains(s)
+
+  def isFalseString(s: UTF8String): Boolean = falseStrings.contains(s)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
new file mode 100644
index 0000000000000..be14b17701276
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import scala.collection.mutable
+import scala.util.control.NonFatal
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * A thread-safe manager for [[CatalogPlugin]]s. It tracks all the registered catalogs, and allow
+ * the caller to look up a catalog by name.
+ *
+ * There are still many commands (e.g. ANALYZE TABLE) that do not support v2 catalog API. They
+ * ignore the current catalog and blindly go to the v1 `SessionCatalog`. To avoid tracking current
+ * namespace in both `SessionCatalog` and `CatalogManger`, we let `CatalogManager` to set/get
+ * current database of `SessionCatalog` when the current catalog is the session catalog.
+ */
+// TODO: all commands should look up table from the current catalog. The `SessionCatalog` doesn't
+//       need to track current database at all.
+private[sql]
+class CatalogManager(
+    conf: SQLConf,
+    defaultSessionCatalog: CatalogPlugin,
+    val v1SessionCatalog: SessionCatalog) extends Logging {
+  import CatalogManager.SESSION_CATALOG_NAME
+
+  private val catalogs = mutable.HashMap.empty[String, CatalogPlugin]
+
+  def catalog(name: String): CatalogPlugin = synchronized {
+    if (name.equalsIgnoreCase(SESSION_CATALOG_NAME)) {
+      v2SessionCatalog
+    } else {
+      catalogs.getOrElseUpdate(name, Catalogs.load(name, conf))
+    }
+  }
+
+  def defaultCatalog: Option[CatalogPlugin] = {
+    conf.defaultV2Catalog.flatMap { catalogName =>
+      try {
+        Some(catalog(catalogName))
+      } catch {
+        case NonFatal(e) =>
+          logError(s"Cannot load default v2 catalog: $catalogName", e)
+          None
+      }
+    }
+  }
+
+  private def loadV2SessionCatalog(): CatalogPlugin = {
+    Catalogs.load(SESSION_CATALOG_NAME, conf) match {
+      case extension: CatalogExtension =>
+        extension.setDelegateCatalog(defaultSessionCatalog)
+        extension
+      case other => other
+    }
+  }
+
+  // If the V2_SESSION_CATALOG config is specified, we try to instantiate the user-specified v2
+  // session catalog. Otherwise, return the default session catalog.
+  def v2SessionCatalog: CatalogPlugin = {
+    conf.getConf(SQLConf.V2_SESSION_CATALOG).map { customV2SessionCatalog =>
+      try {
+        catalogs.getOrElseUpdate(SESSION_CATALOG_NAME, loadV2SessionCatalog())
+      } catch {
+        case NonFatal(_) =>
+          logError(
+            "Fail to instantiate the custom v2 session catalog: " + customV2SessionCatalog)
+          defaultSessionCatalog
+      }
+    }.getOrElse(defaultSessionCatalog)
+  }
+
+  private def getDefaultNamespace(c: CatalogPlugin) = c match {
+    case c: SupportsNamespaces => c.defaultNamespace()
+    case _ => Array.empty[String]
+  }
+
+  private var _currentNamespace: Option[Array[String]] = None
+
+  def currentNamespace: Array[String] = synchronized {
+    _currentNamespace.getOrElse {
+      if (currentCatalog.name() == SESSION_CATALOG_NAME) {
+        Array(v1SessionCatalog.getCurrentDatabase)
+      } else {
+        getDefaultNamespace(currentCatalog)
+      }
+    }
+  }
+
+  def setCurrentNamespace(namespace: Array[String]): Unit = synchronized {
+    if (currentCatalog.name() == SESSION_CATALOG_NAME) {
+      if (namespace.length != 1) {
+        throw new NoSuchNamespaceException(namespace)
+      }
+      v1SessionCatalog.setCurrentDatabase(namespace.head)
+    } else {
+      _currentNamespace = Some(namespace)
+    }
+  }
+
+  private var _currentCatalogName: Option[String] = None
+
+  def currentCatalog: CatalogPlugin = synchronized {
+    _currentCatalogName.map(catalogName => catalog(catalogName))
+      .orElse(defaultCatalog)
+      .getOrElse(v2SessionCatalog)
+  }
+
+  def setCurrentCatalog(catalogName: String): Unit = synchronized {
+    // `setCurrentCatalog` is noop if it doesn't switch to a different catalog.
+    if (currentCatalog.name() != catalogName) {
+      _currentCatalogName = Some(catalogName)
+      _currentNamespace = None
+      // Reset the current database of v1 `SessionCatalog` when switching current catalog, so that
+      // when we switch back to session catalog, the current namespace definitely is ["default"].
+      v1SessionCatalog.setCurrentDatabase(SessionCatalog.DEFAULT_DATABASE)
+    }
+  }
+
+  // Clear all the registered catalogs. Only used in tests.
+  private[sql] def reset(): Unit = synchronized {
+    catalogs.clear()
+    _currentNamespace = None
+    _currentCatalogName = None
+    v1SessionCatalog.setCurrentDatabase(SessionCatalog.DEFAULT_DATABASE)
+  }
+}
+
+private[sql] object CatalogManager {
+  val SESSION_CATALOG_NAME: String = "session"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogNotFoundException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogNotFoundException.scala
similarity index 95%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogNotFoundException.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogNotFoundException.scala
index 86de1c9285b73..d376b98afa415 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogNotFoundException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogNotFoundException.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2
+package org.apache.spark.sql.connector.catalog
 
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Experimental
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
similarity index 72%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogV2Implicits.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index f512cd5e23c6b..4e5341839a725 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -15,17 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2
+package org.apache.spark.sql.connector.catalog
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.expressions.{BucketTransform, IdentityTransform, LogicalExpressions, Transform}
+import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.connector.expressions.{BucketTransform, IdentityTransform, LogicalExpressions, Transform}
 import org.apache.spark.sql.types.StructType
 
 /**
  * Conversion helpers for working with v2 [[CatalogPlugin]].
  */
-object CatalogV2Implicits {
+private[sql] object CatalogV2Implicits {
   implicit class PartitionTypeHelper(partitionType: StructType) {
     def asTransforms: Array[Transform] = partitionType.names.map(LogicalExpressions.identity)
   }
@@ -68,6 +69,14 @@ object CatalogV2Implicits {
       case _ =>
         throw new AnalysisException(s"Cannot use catalog ${plugin.name}: not a TableCatalog")
     }
+
+    def asNamespaceCatalog: SupportsNamespaces = plugin match {
+      case namespaceCatalog: SupportsNamespaces =>
+        namespaceCatalog
+      case _ =>
+        throw new AnalysisException(
+          s"Cannot use catalog ${plugin.name}: does not support namespaces")
+    }
   }
 
   implicit class NamespaceHelper(namespace: Array[String]) {
@@ -84,8 +93,22 @@ object CatalogV2Implicits {
     }
   }
 
-  implicit class MultipartIdentifierHelper(namespace: Seq[String]) {
-    def quoted: String = namespace.map(quote).mkString(".")
+  implicit class MultipartIdentifierHelper(parts: Seq[String]) {
+    if (parts.isEmpty) {
+      throw new AnalysisException("multi-part identifier cannot be empty.")
+    }
+
+    def asIdentifier: Identifier = Identifier.of(parts.init.toArray, parts.last)
+
+    def asTableIdentifier: TableIdentifier = parts match {
+      case Seq(tblName) => TableIdentifier(tblName)
+      case Seq(dbName, tblName) => TableIdentifier(tblName, Some(dbName))
+      case _ =>
+        throw new AnalysisException(
+          s"$quoted is not a valid TableIdentifier as it has more than 2 name parts.")
+    }
+
+    def quoted: String = parts.map(quote).mkString(".")
   }
 
   private def quote(part: String): String = {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/utils/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
similarity index 75%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/utils/CatalogV2Util.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index d5079202c8fee..6d8c6f8456412 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/utils/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -15,21 +15,22 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.utils
+package org.apache.spark.sql.connector.catalog
 
 import java.util
 import java.util.Collections
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 
-import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, NamespaceChange, TableChange}
-import org.apache.spark.sql.catalog.v2.TableChange.{AddColumn, DeleteColumn, RemoveProperty, RenameColumn, SetProperty, UpdateColumnComment, UpdateColumnType}
-import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException}
-import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, UnresolvedV2Relation}
+import org.apache.spark.sql.catalyst.plans.logical.AlterTable
+import org.apache.spark.sql.connector.catalog.TableChange._
 import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType}
 
-object CatalogV2Util {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+private[sql] object CatalogV2Util {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   /**
    * Apply properties changes to a map and return the result.
@@ -222,4 +223,59 @@ object CatalogV2Util {
       case _: NoSuchDatabaseException => None
       case _: NoSuchNamespaceException => None
     }
+
+  def isSessionCatalog(catalog: CatalogPlugin): Boolean = {
+    catalog.name().equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME)
+  }
+
+  def convertTableProperties(
+      properties: Map[String, String],
+      options: Map[String, String],
+      location: Option[String],
+      comment: Option[String],
+      provider: String): Map[String, String] = {
+    if (options.contains("path") && location.isDefined) {
+      throw new AnalysisException(
+        "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " +
+          "you can only specify one of them.")
+    }
+
+    if ((options.contains("comment") || properties.contains("comment"))
+      && comment.isDefined) {
+      throw new AnalysisException(
+        "COMMENT and option/property 'comment' are both used to set the table comment, you can " +
+          "only specify one of them.")
+    }
+
+    if (options.contains("provider") || properties.contains("provider")) {
+      throw new AnalysisException(
+        "USING and option/property 'provider' are both used to set the provider implementation, " +
+          "you can only specify one of them.")
+    }
+
+    val filteredOptions = options.filterKeys(_ != "path")
+
+    // create table properties from TBLPROPERTIES and OPTIONS clauses
+    val tableProperties = new mutable.HashMap[String, String]()
+    tableProperties ++= properties
+    tableProperties ++= filteredOptions
+
+    // convert USING, LOCATION, and COMMENT clauses to table properties
+    tableProperties += ("provider" -> provider)
+    comment.map(text => tableProperties += ("comment" -> text))
+    location.orElse(options.get("path")).map(loc => tableProperties += ("location" -> loc))
+
+    tableProperties.toMap
+  }
+
+  def createAlterTable(
+      originalNameParts: Seq[String],
+      catalog: CatalogPlugin,
+      tableName: Seq[String],
+      changes: Seq[TableChange]): AlterTable = {
+    val tableCatalog = catalog.asTableCatalog
+    val ident = tableName.asIdentifier
+    val unresolved = UnresolvedV2Relation(originalNameParts, tableCatalog, ident)
+    AlterTable(tableCatalog, ident, unresolved, changes)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/LookupCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
similarity index 73%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/LookupCatalog.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
index 846810fc18061..be4a7eba59105 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/LookupCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/LookupCatalog.scala
@@ -15,17 +15,15 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2
+package org.apache.spark.sql.connector.catalog
 
-import org.apache.spark.annotation.Experimental
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.TableIdentifier
 
 /**
  * A trait to encapsulate catalog lookup function and helpful extractors.
  */
-@Experimental
-trait LookupCatalog extends Logging {
+private[sql] trait LookupCatalog extends Logging {
 
   protected val catalogManager: CatalogManager
 
@@ -39,13 +37,18 @@ trait LookupCatalog extends Logging {
    */
   def defaultCatalog: Option[CatalogPlugin] = catalogManager.defaultCatalog
 
+  /**
+   * Returns the current catalog set.
+   */
+  def currentCatalog: CatalogPlugin = catalogManager.currentCatalog
+
   /**
    * This catalog is a v2 catalog that delegates to the v1 session catalog. it is used when the
    * session catalog is responsible for an identifier, but the source requires the v2 catalog API.
    * This happens when the source implementation extends the v2 TableProvider API and is not listed
    * in the fallback configuration, spark.sql.sources.write.useV1SourceList
    */
-  def sessionCatalog: Option[CatalogPlugin] = catalogManager.v2SessionCatalog
+  def sessionCatalog: CatalogPlugin = catalogManager.v2SessionCatalog
 
   /**
    * Extract catalog plugin and remaining identifier names.
@@ -81,14 +84,14 @@ trait LookupCatalog extends Logging {
     }
   }
 
-  type CatalogNamespace = (Option[CatalogPlugin], Seq[String])
+  type DefaultCatalogAndNamespace = (Option[CatalogPlugin], Seq[String])
 
   /**
    * Extract catalog and namespace from a multi-part identifier with the default catalog if needed.
    * Catalog name takes precedence over namespaces.
    */
-  object CatalogNamespace {
-    def unapply(parts: Seq[String]): Some[CatalogNamespace] = parts match {
+  object DefaultCatalogAndNamespace {
+    def unapply(parts: Seq[String]): Some[DefaultCatalogAndNamespace] = parts match {
       case Seq(catalogName, tail @ _*) =>
         try {
           Some((Some(catalogManager.catalog(catalogName)), tail))
@@ -99,6 +102,24 @@ trait LookupCatalog extends Logging {
     }
   }
 
+  type CurrentCatalogAndNamespace = (CatalogPlugin, Seq[String])
+
+  /**
+   * Extract catalog and namespace from a multi-part identifier with the current catalog if needed.
+   * Catalog name takes precedence over namespaces.
+   */
+  object CurrentCatalogAndNamespace {
+    def unapply(parts: Seq[String]): Some[CurrentCatalogAndNamespace] = parts match {
+      case Seq(catalogName, tail @ _*) =>
+        try {
+          Some((catalogManager.catalog(catalogName), tail))
+        } catch {
+          case _: CatalogNotFoundException =>
+            Some((currentCatalog, parts))
+        }
+    }
+  }
+
   /**
    * Extract legacy table identifier from a multi-part identifier.
    *
@@ -133,4 +154,20 @@ trait LookupCatalog extends Logging {
         None
     }
   }
+
+  /**
+   * Extract catalog and the rest name parts from a multi-part identifier.
+   */
+  object CatalogAndIdentifierParts {
+    def unapply(nameParts: Seq[String]): Some[(CatalogPlugin, Seq[String])] = {
+      assert(nameParts.nonEmpty)
+      try {
+        Some((catalogManager.catalog(nameParts.head), nameParts.tail))
+      } catch {
+        case _: CatalogNotFoundException =>
+          // TODO (SPARK-29014): use current catalog here.
+          Some((defaultCatalog.getOrElse(sessionCatalog), nameParts))
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/internal/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
similarity index 91%
rename from sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/internal/V1Table.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index ddfc77d0cc0ac..616c3cf696396 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/sources/v2/internal/V1Table.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -15,23 +15,22 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.internal
+package org.apache.spark.sql.connector.catalog
 
 import java.util
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalog.v2.expressions.{LogicalExpressions, Transform}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.sources.v2.{Table, TableCapability}
+import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
 import org.apache.spark.sql.types.StructType
 
 /**
  * An implementation of catalog v2 `Table` to expose v1 table metadata.
  */
-case class V1Table(v1Table: CatalogTable) extends Table {
+private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
   implicit class IdentifierHelper(identifier: TableIdentifier) {
     def quoted: String = {
       identifier.database match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
similarity index 98%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
index bceea147dddd5..2dd0f34c6c051 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalog/v2/expressions/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions
+package org.apache.spark.sql.connector.expressions
 
 import org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -253,7 +253,7 @@ private[sql] final case class LiteralValue[T](value: T, dataType: DataType) exte
 }
 
 private[sql] final case class FieldReference(parts: Seq[String]) extends NamedReference {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.MultipartIdentifierHelper
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
   override def fieldNames: Array[String] = parts.toArray
   override def describe: String = parts.quoted
   override def toString: String = describe
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
index 2d59c42ee8684..86ef867eca547 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import scala.collection.JavaConverters._
+
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.sources.v2.{SupportsDelete, SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.{SupportsDelete, SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object DataSourceV2Implicits {
   implicit class TableHelper(table: Table) {
@@ -53,4 +56,10 @@ object DataSourceV2Implicits {
 
     def supportsAny(capabilities: TableCapability*): Boolean = capabilities.exists(supports)
   }
+
+  implicit class OptionsHelper(options: Map[String, String]) {
+    def asOptions: CaseInsensitiveStringMap = {
+      new CaseInsensitiveStringMap(options.asJava)
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 9ae3dbbc45502..7da502fc298ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -21,10 +21,10 @@ import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelat
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.{Statistics => V2Statistics, _}
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
-import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.connector.catalog.{Table, TableCapability}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, Statistics => V2Statistics, SupportsReportStatistics}
+import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index fb6baf9003325..9b2e314a24158 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -115,7 +115,9 @@ object SQLConf {
    * Returns the active config object within the current scope. If there is an active SparkSession,
    * the proper SQLConf associated with the thread's active session is used. If it's called from
    * tasks in the executor side, a SQLConf will be created from job local properties, which are set
-   * and propagated from the driver side.
+   * and propagated from the driver side, unless a `SQLConf` has been set in the scope by
+   * `withExistingConf` as done for propagating SQLConf for operations performed on RDDs created
+   * from DataFrames.
    *
    * The way this works is a little bit convoluted, due to the fact that config was added initially
    * only for physical plans (and as a result not in sql/catalyst module).
@@ -129,10 +131,16 @@ object SQLConf {
    */
   def get: SQLConf = {
     if (TaskContext.get != null) {
-      new ReadOnlySQLConf(TaskContext.get())
+      val conf = existingConf.get()
+      if (conf != null) {
+        conf
+      } else {
+        new ReadOnlySQLConf(TaskContext.get())
+      }
     } else {
       val isSchedulerEventLoopThread = SparkContext.getActive
-        .map(_.dagScheduler.eventProcessLoop.eventThread)
+        .flatMap { sc => Option(sc.dagScheduler) }
+        .map(_.eventProcessLoop.eventThread)
         .exists(_.getId == Thread.currentThread().getId)
       if (isSchedulerEventLoopThread) {
         // DAGScheduler event loop thread does not have an active SparkSession, the `confGetter`
@@ -403,12 +411,6 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
-  val ANSI_SQL_PARSER =
-    buildConf("spark.sql.parser.ansi.enabled")
-      .doc("When true, tries to conform to ANSI SQL syntax.")
-      .booleanConf
-      .createWithDefault(false)
-
   val ESCAPED_STRING_LITERALS = buildConf("spark.sql.parser.escapedStringLiterals")
     .internal()
     .doc("When true, string literals (including regex patterns) remain escaped in our SQL " +
@@ -1549,16 +1551,6 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
-  val DECIMAL_OPERATIONS_NULL_ON_OVERFLOW =
-    buildConf("spark.sql.decimalOperations.nullOnOverflow")
-      .internal()
-      .doc("When true (default), if an overflow on a decimal occurs, then NULL is returned. " +
-        "Spark's older versions and Hive behave in this way. If turned to false, SQL ANSI 2011 " +
-        "specification will be followed instead: an arithmetic exception is thrown, as most " +
-        "of the SQL databases do.")
-      .booleanConf
-      .createWithDefault(true)
-
   val LITERAL_PICK_MINIMUM_PRECISION =
     buildConf("spark.sql.legacy.literal.pickMinimumPrecision")
       .internal()
@@ -1597,12 +1589,22 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
-  val PREFER_INTEGRAL_DIVISION = buildConf("spark.sql.function.preferIntegralDivision")
-    .internal()
-    .doc("When true, will perform integral division with the / operator " +
-      "if both sides are integral types. This is for PostgreSQL test cases only.")
-    .booleanConf
-    .createWithDefault(false)
+  object Dialect extends Enumeration {
+    val SPARK, POSTGRESQL = Value
+  }
+
+  val DIALECT =
+    buildConf("spark.sql.dialect")
+      .doc("The specific features of the SQL language to be adopted, which are available when " +
+        "accessing the given database. Currently, Spark supports two database dialects, `Spark` " +
+        "and `PostgreSQL`. With `PostgreSQL` dialect, Spark will: " +
+        "1. perform integral division with the / operator if both sides are integral types; " +
+        "2. accept \"true\", \"yes\", \"1\", \"false\", \"no\", \"0\", and unique prefixes as " +
+        "input and trim input for the boolean data type.")
+      .stringConf
+      .transform(_.toUpperCase(Locale.ROOT))
+      .checkValues(Dialect.values.map(_.toString))
+      .createWithDefault(Dialect.SPARK.toString)
 
   val ALLOW_CREATING_MANAGED_TABLE_USING_NONEMPTY_LOCATION =
     buildConf("spark.sql.legacy.allowCreatingManagedTableUsingNonemptyLocation")
@@ -1651,7 +1653,7 @@ object SQLConf {
       "implementation class names for which Data Source V2 code path is disabled. These data " +
       "sources will fallback to Data Source V1 code path.")
     .stringConf
-    .createWithDefault("")
+    .createWithDefault("kafka")
 
   val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
     .doc("A comma-separated list of fully qualified data source register class names for which" +
@@ -1715,6 +1717,14 @@ object SQLConf {
       .checkValues(StoreAssignmentPolicy.values.map(_.toString))
       .createOptional
 
+  val ANSI_ENABLED = buildConf("spark.sql.ansi.enabled")
+    .doc("When true, Spark tries to conform to the ANSI SQL specification: 1. Spark will " +
+      "throw a runtime exception if an overflow occurs in any operation on integral/decimal " +
+      "field. 2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in " +
+      "the SQL parser.")
+    .booleanConf
+    .createWithDefault(false)
+
   val SORT_BEFORE_REPARTITION =
     buildConf("spark.sql.execution.sortBeforeRepartition")
       .internal()
@@ -1878,15 +1888,6 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
-  val FAIL_ON_INTEGRAL_TYPE_OVERFLOW =
-    buildConf("spark.sql.failOnIntegralTypeOverflow")
-      .doc("If it is set to true, all operations on integral fields throw an " +
-        "exception if an overflow occurs. If it is false (default), in case of overflow a wrong " +
-        "result is returned.")
-      .internal()
-      .booleanConf
-      .createWithDefault(false)
-
   val LEGACY_HAVING_WITHOUT_GROUP_BY_AS_WHERE =
     buildConf("spark.sql.legacy.parser.havingWithoutGroupByAsWhere")
       .internal()
@@ -1965,9 +1966,12 @@ object SQLConf {
     .createOptional
 
   val V2_SESSION_CATALOG = buildConf("spark.sql.catalog.session")
-      .doc("Name of the default v2 catalog, used when a catalog is not identified in queries")
+      .doc("A catalog implementation that will be used in place of the Spark built-in session " +
+        "catalog for v2 operations. The implementation may extend `CatalogExtension` to be " +
+        "passed the Spark built-in session catalog, so that it may delegate calls to the " +
+        "built-in session catalog.")
       .stringConf
-      .createWithDefault("org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog")
+      .createOptional
 
   val LEGACY_LOOSE_UPCAST = buildConf("spark.sql.legacy.looseUpcast")
     .doc("When true, the upcast will be loose and allows string to atomic types.")
@@ -1985,6 +1989,15 @@ object SQLConf {
       .doc("When true, the ArrayExists will follow the three-valued boolean logic.")
       .booleanConf
       .createWithDefault(true)
+
+  val ADDITIONAL_REMOTE_REPOSITORIES =
+    buildConf("spark.sql.additionalRemoteRepositories")
+      .doc("A comma-delimited string config of the optional additional remote Maven mirror " +
+        "repositories. This is only used for downloading Hive jars in IsolatedClientLoader " +
+        "if the default Maven Central repo is unreachable.")
+      .stringConf
+      .createWithDefault(
+        "https://maven-central.storage-download.googleapis.com/repos/central/data/")
 }
 
 /**
@@ -2184,8 +2197,6 @@ class SQLConf extends Serializable with Logging {
 
   def constraintPropagationEnabled: Boolean = getConf(CONSTRAINT_PROPAGATION_ENABLED)
 
-  def ansiParserEnabled: Boolean = getConf(ANSI_SQL_PARSER)
-
   def escapedStringLiterals: Boolean = getConf(ESCAPED_STRING_LITERALS)
 
   def fileCompressionFactor: Double = getConf(FILE_COMPRESSION_FACTOR)
@@ -2407,10 +2418,6 @@ class SQLConf extends Serializable with Logging {
 
   def decimalOperationsAllowPrecisionLoss: Boolean = getConf(DECIMAL_OPERATIONS_ALLOW_PREC_LOSS)
 
-  def decimalOperationsNullOnOverflow: Boolean = getConf(DECIMAL_OPERATIONS_NULL_ON_OVERFLOW)
-
-  def failOnIntegralTypeOverflow: Boolean = getConf(FAIL_ON_INTEGRAL_TYPE_OVERFLOW)
-
   def literalPickMinimumPrecision: Boolean = getConf(LITERAL_PICK_MINIMUM_PRECISION)
 
   def continuousStreamingEpochBacklogQueueSize: Int =
@@ -2430,8 +2437,6 @@ class SQLConf extends Serializable with Logging {
 
   def eltOutputAsString: Boolean = getConf(ELT_OUTPUT_AS_STRING)
 
-  def preferIntegralDivision: Boolean = getConf(PREFER_INTEGRAL_DIVISION)
-
   def allowCreatingManagedTableUsingNonemptyLocation: Boolean =
     getConf(ALLOW_CREATING_MANAGED_TABLE_USING_NONEMPTY_LOCATION)
 
@@ -2443,6 +2448,10 @@ class SQLConf extends Serializable with Logging {
   def storeAssignmentPolicy: Option[StoreAssignmentPolicy.Value] =
     getConf(STORE_ASSIGNMENT_POLICY).map(StoreAssignmentPolicy.withName)
 
+  def ansiEnabled: Boolean = getConf(ANSI_ENABLED)
+
+  def usePostgreSQLDialect: Boolean = getConf(DIALECT) == Dialect.POSTGRESQL.toString()
+
   def nestedSchemaPruningEnabled: Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED)
 
   def serializerNestedSchemaPruningEnabled: Boolean =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 6445820f1237b..3136669f64803 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -239,7 +239,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   def toByte: Byte = toLong.toByte
 
   private def overflowException(dataType: String) =
-    throw new ArithmeticException(s"Casting $this to $dataType causes overflow.")
+    throw new ArithmeticException(s"Casting $this to $dataType causes overflow")
 
   /**
    * @return the Byte value that is equal to the rounded decimal.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
index 9ff55b7f6e6ec..518255ecc42cc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
@@ -114,13 +114,13 @@ object LongExactNumeric extends LongIsIntegral with Ordering.LongOrdering {
     if (x == x.toInt) {
       x.toInt
     } else {
-      throw new ArithmeticException(s"Casting $x to int causes overflow.")
+      throw new ArithmeticException(s"Casting $x to int causes overflow")
     }
 }
 
 object FloatExactNumeric extends FloatIsFractional with Ordering.FloatOrdering {
   private def overflowException(x: Float, dataType: String) =
-    throw new ArithmeticException(s"Casting $x to $dataType causes overflow.")
+    throw new ArithmeticException(s"Casting $x to $dataType causes overflow")
 
   private val intUpperBound = Int.MaxValue.toFloat
   private val intLowerBound = Int.MinValue.toFloat
@@ -152,7 +152,7 @@ object FloatExactNumeric extends FloatIsFractional with Ordering.FloatOrdering {
 
 object DoubleExactNumeric extends DoubleIsFractional with Ordering.DoubleOrdering {
   private def overflowException(x: Double, dataType: String) =
-    throw new ArithmeticException(s"Casting $x to $dataType causes overflow.")
+    throw new ArithmeticException(s"Casting $x to $dataType causes overflow")
 
   private val intUpperBound = Int.MaxValue.toDouble
   private val intLowerBound = Int.MinValue.toDouble
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
index d15440632f324..27b5eec27281d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.util
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.expressions._
 import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, NamedTransform, Transform}
 import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
 
 
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
similarity index 99%
rename from sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java
rename to sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
index 6bd9192816992..37f60511cd60a 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalog/v2/CatalogLoadingSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2;
+package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.SparkException;
 import org.apache.spark.sql.internal.SQLConf;
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
index 39228102682b9..5c69ef95ffe17 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
@@ -27,7 +27,7 @@ class DistributionSuite extends SparkFunSuite {
   protected def checkSatisfied(
       inputPartitioning: Partitioning,
       requiredDistribution: Distribution,
-      satisfied: Boolean) {
+      satisfied: Boolean): Unit = {
     if (inputPartitioning.satisfies(requiredDistribution) != satisfied) {
       fail(
         s"""
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index a2e58c3eaa0bd..f9cd9c3c398f6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -138,6 +138,14 @@ trait ScroogeLikeExample extends Product1[Int] with Serializable {
   override def hashCode: Int = x
 }
 
+/** Counter-examples to [[ScroogeLikeExample]] as a trait without a companion object constructor */
+trait TraitProductWithoutCompanion extends Product1[Int] {}
+
+/** Counter-examples to [[ScroogeLikeExample]] as a trait with no-constructor companion object */
+object TraitProductWithNoConstructorCompanion {}
+
+trait TraitProductWithNoConstructorCompanion extends Product1[Int] {}
+
 class ScalaReflectionSuite extends SparkFunSuite {
   import org.apache.spark.sql.catalyst.ScalaReflection._
 
@@ -404,6 +412,20 @@ class ScalaReflectionSuite extends SparkFunSuite {
         StructField("x", IntegerType, nullable = false))), nullable = true))
   }
 
+  test("SPARK-29026: schemaFor for trait without companion object throws exception ") {
+    val e = intercept[UnsupportedOperationException] {
+      schemaFor[TraitProductWithoutCompanion]
+    }
+    assert(e.getMessage.contains("Unable to find constructor"))
+  }
+
+  test("SPARK-29026: schemaFor for trait with no-constructor companion throws exception ") {
+    val e = intercept[UnsupportedOperationException] {
+      schemaFor[TraitProductWithNoConstructorCompanion]
+    }
+    assert(e.getMessage.contains("Unable to find constructor"))
+  }
+
   test("SPARK-27625: annotated data types") {
     assert(serializerFor[FooWithAnnotation].dataType == StructType(Seq(
       StructField("f1", StringType),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
index c4e7194f7d651..79e6245528dea 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.LogicalExpressions
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, LeafNode}
 import org.apache.spark.sql.connector.InMemoryTableCatalog
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.connector.expressions.LogicalExpressions
 import org.apache.spark.sql.types.{DoubleType, LongType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
index eade9b6112fe4..c01dea96fe2de 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DataSourceV2AnalysisSuite.scala
@@ -21,7 +21,7 @@ import java.net.URI
 import java.util.Locale
 
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, Expression, LessThanOrEqual, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, AttributeReference, Cast, Expression, LessThanOrEqual, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
@@ -77,57 +77,30 @@ class V2OverwriteByExpressionANSIAnalysisSuite extends DataSourceV2ANSIAnalysisS
   }
 
   test("delete expression is resolved using table fields") {
-    val table = TestRelation(StructType(Seq(
-      StructField("x", DoubleType, nullable = false),
-      StructField("y", DoubleType))).toAttributes)
-
-    val query = TestRelation(StructType(Seq(
-      StructField("a", DoubleType, nullable = false),
-      StructField("b", DoubleType))).toAttributes)
-
-    val a = query.output.head
-    val b = query.output.last
-    val x = table.output.head
-
-    val parsedPlan = OverwriteByExpression.byPosition(table, query,
-      LessThanOrEqual(UnresolvedAttribute(Seq("x")), Literal(15.0d)))
-
-    val expectedPlan = OverwriteByExpression.byPosition(table,
-      Project(Seq(
-        Alias(Cast(a, DoubleType, Some(conf.sessionLocalTimeZone)), "x")(),
-        Alias(Cast(b, DoubleType, Some(conf.sessionLocalTimeZone)), "y")()),
-        query),
-      LessThanOrEqual(
-        AttributeReference("x", DoubleType, nullable = false)(x.exprId),
-        Literal(15.0d)))
-
-    assertNotResolved(parsedPlan)
-    checkAnalysis(parsedPlan, expectedPlan)
-    assertResolved(expectedPlan)
+    testResolvedOverwriteByExpression()
   }
 
   test("delete expression is not resolved using query fields") {
-    val xRequiredTable = TestRelation(StructType(Seq(
-      StructField("x", DoubleType, nullable = false),
-      StructField("y", DoubleType))).toAttributes)
+    testNotResolvedOverwriteByExpression()
+  }
+}
 
-    val query = TestRelation(StructType(Seq(
-      StructField("a", DoubleType, nullable = false),
-      StructField("b", DoubleType))).toAttributes)
+class V2OverwriteByExpressionStrictAnalysisSuite extends DataSourceV2StrictAnalysisSuite {
+  override def byName(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
+    OverwriteByExpression.byName(table, query, Literal(true))
+  }
 
-    // the write is resolved (checked above). this test plan is not because of the expression.
-    val parsedPlan = OverwriteByExpression.byPosition(xRequiredTable, query,
-      LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
+  override def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan = {
+    OverwriteByExpression.byPosition(table, query, Literal(true))
+  }
 
-    assertNotResolved(parsedPlan)
-    assertAnalysisError(parsedPlan, Seq("cannot resolve", "`a`", "given input columns", "x, y"))
+  test("delete expression is resolved using table fields") {
+    testResolvedOverwriteByExpression()
   }
-}
 
-class V2OverwriteByExpressionStrictAnalysisSuite extends V2OverwriteByExpressionANSIAnalysisSuite {
-  override def getSQLConf(caseSensitive: Boolean): SQLConf =
-    super.getSQLConf(caseSensitive)
-      .copy(SQLConf.STORE_ASSIGNMENT_POLICY -> StoreAssignmentPolicy.STRICT)
+  test("delete expression is not resolved using query fields") {
+    testNotResolvedOverwriteByExpression()
+  }
 }
 
 case class TestRelation(output: Seq[AttributeReference]) extends LeafNode with NamedRelation {
@@ -144,6 +117,19 @@ abstract class DataSourceV2ANSIAnalysisSuite extends DataSourceV2AnalysisBaseSui
   override def getSQLConf(caseSensitive: Boolean): SQLConf =
     super.getSQLConf(caseSensitive)
       .copy(SQLConf.STORE_ASSIGNMENT_POLICY -> StoreAssignmentPolicy.ANSI)
+
+
+  // For Ansi store assignment policy, expression `AnsiCast` is used instead of `Cast`.
+  override def checkAnalysis(
+      inputPlan: LogicalPlan,
+      expectedPlan: LogicalPlan,
+      caseSensitive: Boolean): Unit = {
+    val expectedPlanWithAnsiCast = expectedPlan transformAllExpressions {
+      case c: Cast => AnsiCast(c.child, c.dataType, c.timeZoneId)
+      case other => other
+    }
+    super.checkAnalysis(inputPlan, expectedPlanWithAnsiCast, caseSensitive)
+  }
 }
 
 abstract class DataSourceV2StrictAnalysisSuite extends DataSourceV2AnalysisBaseSuite {
@@ -571,4 +557,51 @@ abstract class DataSourceV2AnalysisBaseSuite extends AnalysisTest {
   def toLower(attr: AttributeReference): AttributeReference = {
     AttributeReference(attr.name.toLowerCase(Locale.ROOT), attr.dataType)(attr.exprId)
   }
+
+  protected def testResolvedOverwriteByExpression(): Unit = {
+    val table = TestRelation(StructType(Seq(
+      StructField("x", DoubleType, nullable = false),
+      StructField("y", DoubleType))).toAttributes)
+
+    val query = TestRelation(StructType(Seq(
+      StructField("a", DoubleType, nullable = false),
+      StructField("b", DoubleType))).toAttributes)
+
+    val a = query.output.head
+    val b = query.output.last
+    val x = table.output.head
+
+    val parsedPlan = OverwriteByExpression.byPosition(table, query,
+      LessThanOrEqual(UnresolvedAttribute(Seq("x")), Literal(15.0d)))
+
+    val expectedPlan = OverwriteByExpression.byPosition(table,
+      Project(Seq(
+        Alias(Cast(a, DoubleType, Some(conf.sessionLocalTimeZone)), "x")(),
+        Alias(Cast(b, DoubleType, Some(conf.sessionLocalTimeZone)), "y")()),
+        query),
+      LessThanOrEqual(
+        AttributeReference("x", DoubleType, nullable = false)(x.exprId),
+        Literal(15.0d)))
+
+    assertNotResolved(parsedPlan)
+    checkAnalysis(parsedPlan, expectedPlan)
+    assertResolved(expectedPlan)
+  }
+
+  protected def testNotResolvedOverwriteByExpression(): Unit = {
+    val xRequiredTable = TestRelation(StructType(Seq(
+      StructField("x", DoubleType, nullable = false),
+      StructField("y", DoubleType))).toAttributes)
+
+    val query = TestRelation(StructType(Seq(
+      StructField("a", DoubleType, nullable = false),
+      StructField("b", DoubleType))).toAttributes)
+
+    // the write is resolved (checked above). this test plan is not because of the expression.
+    val parsedPlan = OverwriteByExpression.byPosition(xRequiredTable, query,
+      LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
+
+    assertNotResolved(parsedPlan)
+    assertAnalysisError(parsedPlan, Seq("cannot resolve", "`a`", "given input columns", "x, y"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 949bb30d15503..155280c6f9fea 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -690,7 +690,8 @@ class TypeCoercionSuite extends AnalysisTest {
       Some(new StructType().add("a", StringType)))
   }
 
-  private def ruleTest(rule: Rule[LogicalPlan], initial: Expression, transformed: Expression) {
+  private def ruleTest(rule: Rule[LogicalPlan],
+      initial: Expression, transformed: Expression): Unit = {
     ruleTest(Seq(rule), initial, transformed)
   }
 
@@ -1430,6 +1431,8 @@ class TypeCoercionSuite extends AnalysisTest {
     ruleTest(dateTimeOperations, Add(intValue, date), DateAdd(date, intValue))
     ruleTest(dateTimeOperations, Subtract(date, intValue), DateSub(date, intValue))
     ruleTest(dateTimeOperations, Subtract(date, date), DateDiff(date, date))
+    ruleTest(dateTimeOperations, Subtract(timestamp, timestamp),
+      TimestampDiff(timestamp, timestamp))
   }
 
   /**
@@ -1483,15 +1486,15 @@ class TypeCoercionSuite extends AnalysisTest {
 
   test("SPARK-28395 Division operator support integral division") {
     val rules = Seq(FunctionArgumentConversion, Division(conf))
-    Seq(true, false).foreach { preferIntegralDivision =>
-      withSQLConf(SQLConf.PREFER_INTEGRAL_DIVISION.key -> s"$preferIntegralDivision") {
-        val result1 = if (preferIntegralDivision) {
+    Seq(SQLConf.Dialect.SPARK, SQLConf.Dialect.POSTGRESQL).foreach { dialect =>
+      withSQLConf(SQLConf.DIALECT.key -> dialect.toString) {
+        val result1 = if (dialect == SQLConf.Dialect.POSTGRESQL) {
           IntegralDivide(1L, 1L)
         } else {
           Divide(Cast(1L, DoubleType), Cast(1L, DoubleType))
         }
         ruleTest(rules, Divide(1L, 1L), result1)
-        val result2 = if (preferIntegralDivision) {
+        val result2 = if (dialect == SQLConf.Dialect.POSTGRESQL) {
           IntegralDivide(1, Cast(1, ShortType))
         } else {
           Divide(Cast(1, DoubleType), Cast(Cast(1, ShortType), DoubleType))
@@ -1576,6 +1579,20 @@ class TypeCoercionSuite extends AnalysisTest {
         SpecifiedWindowFrame(RangeFrame, CurrentRow, UnboundedFollowing))
     )
   }
+
+  test("SPARK-29000: skip to handle decimals in ImplicitTypeCasts") {
+    ruleTest(TypeCoercion.ImplicitTypeCasts,
+      Multiply(CaseWhen(Seq((EqualTo(1, 2), Cast(1, DecimalType(34, 24)))),
+        Cast(100, DecimalType(34, 24))), Literal(1)),
+      Multiply(CaseWhen(Seq((EqualTo(1, 2), Cast(1, DecimalType(34, 24)))),
+        Cast(100, DecimalType(34, 24))), Literal(1)))
+
+    ruleTest(TypeCoercion.ImplicitTypeCasts,
+      Multiply(CaseWhen(Seq((EqualTo(1, 2), Cast(1, DecimalType(34, 24)))),
+        Cast(100, DecimalType(34, 24))), Cast(1, IntegerType)),
+      Multiply(CaseWhen(Seq((EqualTo(1, 2), Cast(1, DecimalType(34, 24)))),
+        Cast(100, DecimalType(34, 24))), Cast(1, IntegerType)))
+  }
 }
 
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 0fe646edb340e..3ec6fdeedd4b8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -643,6 +643,153 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
       null,
       new TestStreamingRelationV2(attribute)), OutputMode.Append())
 
+  // streaming aggregation
+  {
+    assertPassOnGlobalWatermarkLimit(
+      "single streaming aggregation in Append mode",
+      streamRelation.groupBy("a")(count("*")),
+      OutputMode.Append())
+
+    assertFailOnGlobalWatermarkLimit(
+      "chained streaming aggregations in Append mode",
+      streamRelation.groupBy("a")(count("*")).groupBy()(count("*")),
+      OutputMode.Append())
+
+    Seq(Inner, LeftOuter, RightOuter).foreach { joinType =>
+      val plan = streamRelation.join(streamRelation.groupBy("a")(count("*")), joinType = joinType)
+      assertFailOnGlobalWatermarkLimit(
+        s"$joinType join after streaming aggregation in Append mode",
+        streamRelation.join(streamRelation.groupBy("a")(count("*")), joinType = joinType),
+        OutputMode.Append())
+    }
+
+    assertFailOnGlobalWatermarkLimit(
+      "deduplicate after streaming aggregation in Append mode",
+      Deduplicate(Seq(attribute), streamRelation.groupBy("a")(count("*"))),
+      OutputMode.Append())
+
+    assertFailOnGlobalWatermarkLimit(
+      "FlatMapGroupsWithState after streaming aggregation in Append mode",
+      FlatMapGroupsWithState(
+        null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null,
+        streamRelation.groupBy("a")(count("*"))),
+      OutputMode.Append())
+  }
+
+  // stream-stream join
+  // stream-stream inner join doesn't emit late rows, whereas outer joins could
+  Seq((Inner, false), (LeftOuter, true), (RightOuter, true)).map { case (joinType, expectFailure) =>
+    assertPassOnGlobalWatermarkLimit(
+      s"single $joinType join in Append mode",
+      streamRelation.join(streamRelation, joinType = RightOuter,
+        condition = Some(attributeWithWatermark === attribute)),
+      OutputMode.Append())
+
+    testGlobalWatermarkLimit(
+      s"streaming aggregation after stream-stream $joinType join in Append mode",
+      streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attributeWithWatermark === attribute))
+        .groupBy("a")(count("*")),
+      OutputMode.Append(),
+      expectFailure = expectFailure)
+
+    Seq(Inner, LeftOuter, RightOuter).map { joinType2 =>
+      testGlobalWatermarkLimit(
+        s"streaming-stream $joinType2 after stream-stream $joinType join in Append mode",
+        streamRelation.join(
+          streamRelation.join(streamRelation, joinType = joinType,
+            condition = Some(attributeWithWatermark === attribute)),
+          joinType = joinType2,
+          condition = Some(attributeWithWatermark === attribute)),
+        OutputMode.Append(),
+        expectFailure = expectFailure)
+    }
+
+    testGlobalWatermarkLimit(
+      s"FlatMapGroupsWithState after stream-stream $joinType join in Append mode",
+      FlatMapGroupsWithState(
+        null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null,
+        streamRelation.join(streamRelation, joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute))),
+      OutputMode.Append(),
+      expectFailure = expectFailure)
+
+    testGlobalWatermarkLimit(
+      s"deduplicate after stream-stream $joinType join in Append mode",
+      Deduplicate(Seq(attribute), streamRelation.join(streamRelation, joinType = joinType,
+        condition = Some(attributeWithWatermark === attribute))),
+      OutputMode.Append(),
+      expectFailure = expectFailure)
+  }
+
+  // FlatMapGroupsWithState
+  {
+    assertPassOnGlobalWatermarkLimit(
+      "single FlatMapGroupsWithState in Append mode",
+      FlatMapGroupsWithState(
+        null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null, streamRelation),
+      OutputMode.Append())
+
+    assertFailOnGlobalWatermarkLimit(
+      "streaming aggregation after FlatMapGroupsWithState in Append mode",
+      FlatMapGroupsWithState(
+        null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null, streamRelation).groupBy("*")(count("*")),
+      OutputMode.Append())
+
+    Seq(Inner, LeftOuter, RightOuter).map { joinType =>
+      assertFailOnGlobalWatermarkLimit(
+        s"stream-stream $joinType after FlatMapGroupsWithState in Append mode",
+        streamRelation.join(
+          FlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+          isMapGroupsWithState = false, null, streamRelation), joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute)),
+        OutputMode.Append())
+    }
+
+    assertFailOnGlobalWatermarkLimit(
+      "FlatMapGroupsWithState after FlatMapGroupsWithState in Append mode",
+      FlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null,
+        FlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+          isMapGroupsWithState = false, null, streamRelation)),
+      OutputMode.Append())
+
+    assertFailOnGlobalWatermarkLimit(
+      s"deduplicate after FlatMapGroupsWithState in Append mode",
+      Deduplicate(Seq(attribute),
+        FlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+          isMapGroupsWithState = false, null, streamRelation)),
+      OutputMode.Append())
+  }
+
+  // deduplicate
+  {
+    assertPassOnGlobalWatermarkLimit(
+      "streaming aggregation after deduplicate in Append mode",
+      Deduplicate(Seq(attribute), streamRelation).groupBy("a")(count("*")),
+      OutputMode.Append())
+
+    Seq(Inner, LeftOuter, RightOuter).map { joinType =>
+      assertPassOnGlobalWatermarkLimit(
+        s"$joinType join after deduplicate in Append mode",
+        streamRelation.join(Deduplicate(Seq(attribute), streamRelation), joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute)),
+        OutputMode.Append())
+    }
+
+    assertPassOnGlobalWatermarkLimit(
+      "FlatMapGroupsWithState after deduplicate in Append mode",
+      FlatMapGroupsWithState(
+        null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null,
+        Deduplicate(Seq(attribute), streamRelation)),
+      OutputMode.Append())
+  }
+
   /*
     =======================================================================================
                                      TESTING FUNCTIONS
@@ -839,6 +986,40 @@ class UnsupportedOperationsSuite extends SparkFunSuite {
     }
   }
 
+
+  def assertPassOnGlobalWatermarkLimit(
+      testNamePostfix: String,
+      plan: LogicalPlan,
+      outputMode: OutputMode): Unit = {
+    testGlobalWatermarkLimit(testNamePostfix, plan, outputMode, expectFailure = false)
+  }
+
+  def assertFailOnGlobalWatermarkLimit(
+      testNamePostfix: String,
+      plan: LogicalPlan,
+      outputMode: OutputMode): Unit = {
+    testGlobalWatermarkLimit(testNamePostfix, plan, outputMode, expectFailure = true)
+  }
+
+  def testGlobalWatermarkLimit(
+      testNamePostfix: String,
+      plan: LogicalPlan,
+      outputMode: OutputMode,
+      expectFailure: Boolean): Unit = {
+    test(s"Global watermark limit - $testNamePostfix") {
+      if (expectFailure) {
+        val e = intercept[AnalysisException] {
+          UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit(
+            wrapInStreaming(plan), outputMode, failWhenDetected = true)
+        }
+        assert(e.message.contains("Detected pattern of possible 'correctness' issue"))
+      } else {
+        UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit(
+          wrapInStreaming(plan), outputMode, failWhenDetected = true)
+      }
+    }
+  }
+
   /**
    * Test whether the body of code will fail. If it does fail, then check if it has expected
    * messages.
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 59fb941f41f7e..f334ba5690af0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -87,7 +87,7 @@ abstract class SessionCatalogSuite extends AnalysisTest {
     }
   }
 
-  def testInvalidName(func: (String) => Unit) {
+  def testInvalidName(func: (String) => Unit): Unit = {
     // scalastyle:off
     // non ascii characters are not allowed in the source code, so we disable the scalastyle.
     val name = "砖"
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 9380c7e3f5f72..c1f1be3b30e4b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -432,16 +432,16 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
   }
 
   private def testOverflowingBigNumeric[T: TypeTag](bigNumeric: T, testName: String): Unit = {
-    Seq(true, false).foreach { allowNullOnOverflow =>
+    Seq(true, false).foreach { ansiEnabled =>
       testAndVerifyNotLeakingReflectionObjects(
-        s"overflowing $testName, allowNullOnOverflow=$allowNullOnOverflow") {
+        s"overflowing $testName, ansiEnabled=$ansiEnabled") {
         withSQLConf(
-          SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> allowNullOnOverflow.toString
+          SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString
         ) {
           // Need to construct Encoder here rather than implicitly resolving it
           // so that SQLConf changes are respected.
           val encoder = ExpressionEncoder[T]()
-          if (allowNullOnOverflow) {
+          if (!ansiEnabled) {
             val convertedBack = encoder.resolveAndBind().fromRow(encoder.toRow(bigNumeric))
             assert(convertedBack === null)
           } else {
@@ -553,7 +553,7 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
     r
   }
 
-  private def testAndVerifyNotLeakingReflectionObjects(testName: String)(testFun: => Any) {
+  private def testAndVerifyNotLeakingReflectionObjects(testName: String)(testFun: => Any): Unit = {
     test(testName) {
       verifyNotLeakingReflectionObjects(testFun)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index 5d21e4a2a83ca..1a1cab823d4f3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -169,7 +169,7 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
   }
 
   private def testDecimalOverflow(schema: StructType, row: Row): Unit = {
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       val encoder = RowEncoder(schema).resolveAndBind()
       intercept[Exception] {
         encoder.toRow(row)
@@ -182,7 +182,7 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
       }
     }
 
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       val encoder = RowEncoder(schema).resolveAndBind()
       assert(encoder.fromRow(encoder.toRow(row)).get(0) == null)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
index 729e2f529b97f..ad8b1a1673679 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
@@ -61,7 +61,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Add(positiveLongLit, negativeLongLit), -1L)
 
     Seq("true", "false").foreach { checkOverflow =>
-      withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> checkOverflow) {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) {
         DataTypeTestUtils.numericAndInterval.foreach { tpe =>
           checkConsistencyBetweenInterpretedAndCodegenAllowingException(Add, tpe, tpe)
         }
@@ -80,7 +80,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(UnaryMinus(Literal(Int.MinValue)), Int.MinValue)
     checkEvaluation(UnaryMinus(Literal(Short.MinValue)), Short.MinValue)
     checkEvaluation(UnaryMinus(Literal(Byte.MinValue)), Byte.MinValue)
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       checkExceptionInExpression[ArithmeticException](
         UnaryMinus(Literal(Long.MinValue)), "overflow")
       checkExceptionInExpression[ArithmeticException](
@@ -122,7 +122,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Subtract(positiveLongLit, negativeLongLit), positiveLong - negativeLong)
 
     Seq("true", "false").foreach { checkOverflow =>
-      withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> checkOverflow) {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) {
         DataTypeTestUtils.numericAndInterval.foreach { tpe =>
           checkConsistencyBetweenInterpretedAndCodegenAllowingException(Subtract, tpe, tpe)
         }
@@ -144,7 +144,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(Multiply(positiveLongLit, negativeLongLit), positiveLong * negativeLong)
 
     Seq("true", "false").foreach { checkOverflow =>
-      withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> checkOverflow) {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> checkOverflow) {
         DataTypeTestUtils.numericTypeWithoutDecimal.foreach { tpe =>
           checkConsistencyBetweenInterpretedAndCodegenAllowingException(Multiply, tpe, tpe)
         }
@@ -445,12 +445,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     val e4 = Add(minLongLiteral, minLongLiteral)
     val e5 = Subtract(minLongLiteral, maxLongLiteral)
     val e6 = Multiply(minLongLiteral, minLongLiteral)
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(e1, Long.MinValue)
       checkEvaluation(e2, Long.MinValue)
       checkEvaluation(e3, -2L)
@@ -469,12 +469,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     val e4 = Add(minIntLiteral, minIntLiteral)
     val e5 = Subtract(minIntLiteral, maxIntLiteral)
     val e6 = Multiply(minIntLiteral, minIntLiteral)
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(e1, Int.MinValue)
       checkEvaluation(e2, Int.MinValue)
       checkEvaluation(e3, -2)
@@ -493,12 +493,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     val e4 = Add(minShortLiteral, minShortLiteral)
     val e5 = Subtract(minShortLiteral, maxShortLiteral)
     val e6 = Multiply(minShortLiteral, minShortLiteral)
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(e1, Short.MinValue)
       checkEvaluation(e2, Short.MinValue)
       checkEvaluation(e3, (-2).toShort)
@@ -517,12 +517,12 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     val e4 = Add(minByteLiteral, minByteLiteral)
     val e5 = Subtract(minByteLiteral, maxByteLiteral)
     val e6 = Multiply(minByteLiteral, minByteLiteral)
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       Seq(e1, e2, e3, e4, e5, e6).foreach { e =>
         checkExceptionInExpression[ArithmeticException](e, "overflow")
       }
     }
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(e1, Byte.MinValue)
       checkEvaluation(e2, Byte.MinValue)
       checkEvaluation(e3, (-2).toByte)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
index 7a781295a7957..d717647abe665 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
@@ -21,6 +21,8 @@ import java.sql.{Date, Timestamp}
 import java.util.{Calendar, TimeZone}
 import java.util.concurrent.TimeUnit._
 
+import scala.collection.parallel.immutable.ParVector
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
@@ -33,24 +35,19 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
-/**
- * Test suite for data type casting expression [[Cast]].
- */
-class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
+abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
 
-  private def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): Cast = {
-    v match {
-      case lit: Expression => Cast(lit, targetType, timeZoneId)
-      case _ => Cast(Literal(v), targetType, timeZoneId)
-    }
-  }
+  // Whether it is required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
+  protected def requiredAnsiEnabledForOverflowTestCases: Boolean
+
+  protected def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase
 
   // expected cannot be null
-  private def checkCast(v: Any, expected: Any): Unit = {
+  protected def checkCast(v: Any, expected: Any): Unit = {
     checkEvaluation(cast(v, Literal(expected).dataType), expected)
   }
 
-  private def checkNullCast(from: DataType, to: DataType): Unit = {
+  protected def checkNullCast(from: DataType, to: DataType): Unit = {
     checkEvaluation(cast(Literal.create(null, from), to, Option("GMT")), null)
   }
 
@@ -113,7 +110,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("cast string to timestamp") {
-    ALL_TIMEZONES.par.foreach { tz =>
+    new ParVector(ALL_TIMEZONES.toVector).foreach { tz =>
       def checkCastStringToTimestamp(str: String, expected: Timestamp): Unit = {
         checkEvaluation(cast(Literal(str), TimestampType, Option(tz.getID)), expected)
       }
@@ -207,43 +204,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("cast from int") {
-    checkCast(0, false)
-    checkCast(1, true)
-    checkCast(-5, true)
-    checkCast(1, 1.toByte)
-    checkCast(1, 1.toShort)
-    checkCast(1, 1)
-    checkCast(1, 1.toLong)
-    checkCast(1, 1.0f)
-    checkCast(1, 1.0)
-    checkCast(123, "123")
-
-    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 1)), null)
-    checkEvaluation(cast(123, DecimalType(2, 0)), null)
-  }
-
-  test("cast from long") {
-    checkCast(0L, false)
-    checkCast(1L, true)
-    checkCast(-5L, true)
-    checkCast(1L, 1.toByte)
-    checkCast(1L, 1.toShort)
-    checkCast(1L, 1)
-    checkCast(1L, 1.toLong)
-    checkCast(1L, 1.0f)
-    checkCast(1L, 1.0)
-    checkCast(123L, "123")
-
-    checkEvaluation(cast(123L, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123L, DecimalType(3, 0)), Decimal(123))
-    checkEvaluation(cast(123L, DecimalType(3, 1)), null)
-
-    checkEvaluation(cast(123L, DecimalType(2, 0)), null)
-  }
-
   test("cast from boolean") {
     checkEvaluation(cast(true, IntegerType), 1)
     checkEvaluation(cast(false, IntegerType), 0)
@@ -253,17 +213,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(cast(cast(0, BooleanType), IntegerType), 0)
   }
 
-  test("cast from int 2") {
-    checkEvaluation(cast(1, LongType), 1.toLong)
-    checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
-    checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
-
-    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
-    checkEvaluation(cast(123, DecimalType(3, 1)), null)
-    checkEvaluation(cast(123, DecimalType(2, 0)), null)
-  }
-
   test("cast from float") {
     checkCast(0.0f, false)
     checkCast(0.5f, true)
@@ -401,101 +350,6 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkCast(Decimal(1.5), "1.5")
   }
 
-  test("casting to fixed-precision decimals") {
-    assert(cast(123, DecimalType.USER_DEFAULT).nullable === false)
-    assert(cast(10.03f, DecimalType.SYSTEM_DEFAULT).nullable)
-    assert(cast(10.03, DecimalType.SYSTEM_DEFAULT).nullable)
-    assert(cast(Decimal(10.03), DecimalType.SYSTEM_DEFAULT).nullable === false)
-
-    assert(cast(123, DecimalType(2, 1)).nullable)
-    assert(cast(10.03f, DecimalType(2, 1)).nullable)
-    assert(cast(10.03, DecimalType(2, 1)).nullable)
-    assert(cast(Decimal(10.03), DecimalType(2, 1)).nullable)
-
-    assert(cast(123, DecimalType.IntDecimal).nullable === false)
-    assert(cast(10.03f, DecimalType.FloatDecimal).nullable)
-    assert(cast(10.03, DecimalType.DoubleDecimal).nullable)
-    assert(cast(Decimal(10.03), DecimalType(4, 2)).nullable === false)
-    assert(cast(Decimal(10.03), DecimalType(5, 3)).nullable === false)
-
-    assert(cast(Decimal(10.03), DecimalType(3, 1)).nullable)
-    assert(cast(Decimal(10.03), DecimalType(4, 1)).nullable === false)
-    assert(cast(Decimal(9.95), DecimalType(2, 1)).nullable)
-    assert(cast(Decimal(9.95), DecimalType(3, 1)).nullable === false)
-
-    assert(cast(Decimal("1003"), DecimalType(3, -1)).nullable)
-    assert(cast(Decimal("1003"), DecimalType(4, -1)).nullable === false)
-    assert(cast(Decimal("995"), DecimalType(2, -1)).nullable)
-    assert(cast(Decimal("995"), DecimalType(3, -1)).nullable === false)
-
-    assert(cast(true, DecimalType.SYSTEM_DEFAULT).nullable === false)
-    assert(cast(true, DecimalType(1, 1)).nullable)
-
-
-    checkEvaluation(cast(10.03, DecimalType.SYSTEM_DEFAULT), Decimal(10.03))
-    checkEvaluation(cast(10.03, DecimalType(4, 2)), Decimal(10.03))
-    checkEvaluation(cast(10.03, DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(10.03, DecimalType(2, 0)), Decimal(10))
-    checkEvaluation(cast(10.03, DecimalType(1, 0)), null)
-    checkEvaluation(cast(10.03, DecimalType(2, 1)), null)
-    checkEvaluation(cast(10.03, DecimalType(3, 2)), null)
-    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 2)), null)
-
-    checkEvaluation(cast(10.05, DecimalType.SYSTEM_DEFAULT), Decimal(10.05))
-    checkEvaluation(cast(10.05, DecimalType(4, 2)), Decimal(10.05))
-    checkEvaluation(cast(10.05, DecimalType(3, 1)), Decimal(10.1))
-    checkEvaluation(cast(10.05, DecimalType(2, 0)), Decimal(10))
-    checkEvaluation(cast(10.05, DecimalType(1, 0)), null)
-    checkEvaluation(cast(10.05, DecimalType(2, 1)), null)
-    checkEvaluation(cast(10.05, DecimalType(3, 2)), null)
-    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 1)), Decimal(10.1))
-    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 2)), null)
-
-    checkEvaluation(cast(9.95, DecimalType(3, 2)), Decimal(9.95))
-    checkEvaluation(cast(9.95, DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(9.95, DecimalType(2, 0)), Decimal(10))
-    checkEvaluation(cast(9.95, DecimalType(2, 1)), null)
-    checkEvaluation(cast(9.95, DecimalType(1, 0)), null)
-    checkEvaluation(cast(Decimal(9.95), DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(Decimal(9.95), DecimalType(1, 0)), null)
-
-    checkEvaluation(cast(-9.95, DecimalType(3, 2)), Decimal(-9.95))
-    checkEvaluation(cast(-9.95, DecimalType(3, 1)), Decimal(-10.0))
-    checkEvaluation(cast(-9.95, DecimalType(2, 0)), Decimal(-10))
-    checkEvaluation(cast(-9.95, DecimalType(2, 1)), null)
-    checkEvaluation(cast(-9.95, DecimalType(1, 0)), null)
-    checkEvaluation(cast(Decimal(-9.95), DecimalType(3, 1)), Decimal(-10.0))
-    checkEvaluation(cast(Decimal(-9.95), DecimalType(1, 0)), null)
-
-    checkEvaluation(cast(Decimal("1003"), DecimalType.SYSTEM_DEFAULT), Decimal(1003))
-    checkEvaluation(cast(Decimal("1003"), DecimalType(4, 0)), Decimal(1003))
-    checkEvaluation(cast(Decimal("1003"), DecimalType(3, -1)), Decimal(1000))
-    checkEvaluation(cast(Decimal("1003"), DecimalType(2, -2)), Decimal(1000))
-    checkEvaluation(cast(Decimal("1003"), DecimalType(1, -2)), null)
-    checkEvaluation(cast(Decimal("1003"), DecimalType(2, -1)), null)
-    checkEvaluation(cast(Decimal("1003"), DecimalType(3, 0)), null)
-
-    checkEvaluation(cast(Decimal("995"), DecimalType(3, 0)), Decimal(995))
-    checkEvaluation(cast(Decimal("995"), DecimalType(3, -1)), Decimal(1000))
-    checkEvaluation(cast(Decimal("995"), DecimalType(2, -2)), Decimal(1000))
-    checkEvaluation(cast(Decimal("995"), DecimalType(2, -1)), null)
-    checkEvaluation(cast(Decimal("995"), DecimalType(1, -2)), null)
-
-    checkEvaluation(cast(Double.NaN, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(1.0 / 0.0, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(Float.NaN, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(1.0f / 0.0f, DecimalType.SYSTEM_DEFAULT), null)
-
-    checkEvaluation(cast(Double.NaN, DecimalType(2, 1)), null)
-    checkEvaluation(cast(1.0 / 0.0, DecimalType(2, 1)), null)
-    checkEvaluation(cast(Float.NaN, DecimalType(2, 1)), null)
-    checkEvaluation(cast(1.0f / 0.0f, DecimalType(2, 1)), null)
-
-    checkEvaluation(cast(true, DecimalType(2, 1)), Decimal(1))
-    checkEvaluation(cast(true, DecimalType(1, 1)), null)
-  }
-
   test("cast from date") {
     val d = Date.valueOf("1970-01-01")
     checkEvaluation(cast(d, ShortType), null)
@@ -819,35 +673,23 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("cast string to boolean") {
-    checkCast("true", true)
-    checkCast("tru", true)
-    checkCast("tr", true)
     checkCast("t", true)
+    checkCast("true", true)
     checkCast("tRUe", true)
-    checkCast("    tRue   ", true)
-    checkCast("    tRu   ", true)
-    checkCast("yes", true)
-    checkCast("ye", true)
     checkCast("y", true)
+    checkCast("yes", true)
     checkCast("1", true)
-    checkCast("on", true)
 
-    checkCast("false", false)
-    checkCast("fals", false)
-    checkCast("fal", false)
-    checkCast("fa", false)
     checkCast("f", false)
-    checkCast("    fAlse    ", false)
-    checkCast("    fAls    ", false)
-    checkCast("    FAlsE    ", false)
-    checkCast("no", false)
+    checkCast("false", false)
+    checkCast("FAlsE", false)
     checkCast("n", false)
+    checkCast("no", false)
     checkCast("0", false)
-    checkCast("off", false)
-    checkCast("of", false)
 
-    checkEvaluation(cast("o", BooleanType), null)
     checkEvaluation(cast("abc", BooleanType), null)
+    checkEvaluation(cast("tru", BooleanType), null)
+    checkEvaluation(cast("fla", BooleanType), null)
     checkEvaluation(cast("", BooleanType), null)
   }
 
@@ -1043,24 +885,17 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("SPARK-28470: Cast should honor nullOnOverflow property") {
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "true") {
-      checkEvaluation(Cast(Literal("134.12"), DecimalType(3, 2)), null)
-      checkEvaluation(
-        Cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)), null)
-      checkEvaluation(Cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), null)
-      checkEvaluation(Cast(Literal(134.12), DecimalType(3, 2)), null)
-    }
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "false") {
+  test("Throw exception on casting out-of-range value to decimal type") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
       checkExceptionInExpression[ArithmeticException](
-        Cast(Literal("134.12"), DecimalType(3, 2)), "cannot be represented")
+        cast(Literal("134.12"), DecimalType(3, 2)), "cannot be represented")
       checkExceptionInExpression[ArithmeticException](
-        Cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)),
+        cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)),
         "cannot be represented")
       checkExceptionInExpression[ArithmeticException](
-        Cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), "cannot be represented")
+        cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), "cannot be represented")
       checkExceptionInExpression[ArithmeticException](
-        Cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
+        cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
     }
   }
 
@@ -1116,8 +951,8 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Cast to byte with option FAIL_ON_INTEGER_OVERFLOW enabled") {
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+  test("Throw exception on casting out-of-range value to byte type") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
       testIntMaxAndMin(ByteType)
       Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
         checkExceptionInExpression[ArithmeticException](cast(value, ByteType), "overflow")
@@ -1141,8 +976,8 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Cast to short with option FAIL_ON_INTEGER_OVERFLOW enabled") {
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+  test("Throw exception on casting out-of-range value to short type") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
       testIntMaxAndMin(ShortType)
       Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
         checkExceptionInExpression[ArithmeticException](cast(value, ShortType), "overflow")
@@ -1166,8 +1001,8 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Cast to int with option FAIL_ON_INTEGER_OVERFLOW enabled") {
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+  test("Throw exception on casting out-of-range value to int type") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
       testIntMaxAndMin(IntegerType)
       testLongMaxAndMin(IntegerType)
 
@@ -1183,8 +1018,8 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("Cast to long with option FAIL_ON_INTEGER_OVERFLOW enabled") {
-    withSQLConf(SQLConf.FAIL_ON_INTEGRAL_TYPE_OVERFLOW.key -> "true") {
+  test("Throw exception on casting out-of-range value to long type") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> requiredAnsiEnabledForOverflowTestCases.toString) {
       testLongMaxAndMin(LongType)
 
       Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
@@ -1201,3 +1036,187 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 }
+
+/**
+ * Test suite for data type casting expression [[Cast]].
+ */
+class CastSuite extends CastSuiteBase {
+  // It is required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
+  override protected def requiredAnsiEnabledForOverflowTestCases: Boolean = true
+
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
+    v match {
+      case lit: Expression => Cast(lit, targetType, timeZoneId)
+      case _ => Cast(Literal(v), targetType, timeZoneId)
+    }
+  }
+
+
+  test("cast from int") {
+    checkCast(0, false)
+    checkCast(1, true)
+    checkCast(-5, true)
+    checkCast(1, 1.toByte)
+    checkCast(1, 1.toShort)
+    checkCast(1, 1)
+    checkCast(1, 1.toLong)
+    checkCast(1, 1.0f)
+    checkCast(1, 1.0)
+    checkCast(123, "123")
+
+    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
+    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
+    checkEvaluation(cast(123, DecimalType(3, 1)), null)
+    checkEvaluation(cast(123, DecimalType(2, 0)), null)
+  }
+
+  test("cast from long") {
+    checkCast(0L, false)
+    checkCast(1L, true)
+    checkCast(-5L, true)
+    checkCast(1L, 1.toByte)
+    checkCast(1L, 1.toShort)
+    checkCast(1L, 1)
+    checkCast(1L, 1.toLong)
+    checkCast(1L, 1.0f)
+    checkCast(1L, 1.0)
+    checkCast(123L, "123")
+
+    checkEvaluation(cast(123L, DecimalType.USER_DEFAULT), Decimal(123))
+    checkEvaluation(cast(123L, DecimalType(3, 0)), Decimal(123))
+    checkEvaluation(cast(123L, DecimalType(3, 1)), null)
+
+    checkEvaluation(cast(123L, DecimalType(2, 0)), null)
+  }
+
+  test("cast from int 2") {
+    checkEvaluation(cast(1, LongType), 1.toLong)
+    checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
+    checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
+
+    checkEvaluation(cast(123, DecimalType.USER_DEFAULT), Decimal(123))
+    checkEvaluation(cast(123, DecimalType(3, 0)), Decimal(123))
+    checkEvaluation(cast(123, DecimalType(3, 1)), null)
+    checkEvaluation(cast(123, DecimalType(2, 0)), null)
+  }
+
+  test("casting to fixed-precision decimals") {
+    assert(cast(123, DecimalType.USER_DEFAULT).nullable === false)
+    assert(cast(10.03f, DecimalType.SYSTEM_DEFAULT).nullable)
+    assert(cast(10.03, DecimalType.SYSTEM_DEFAULT).nullable)
+    assert(cast(Decimal(10.03), DecimalType.SYSTEM_DEFAULT).nullable === false)
+
+    assert(cast(123, DecimalType(2, 1)).nullable)
+    assert(cast(10.03f, DecimalType(2, 1)).nullable)
+    assert(cast(10.03, DecimalType(2, 1)).nullable)
+    assert(cast(Decimal(10.03), DecimalType(2, 1)).nullable)
+
+    assert(cast(123, DecimalType.IntDecimal).nullable === false)
+    assert(cast(10.03f, DecimalType.FloatDecimal).nullable)
+    assert(cast(10.03, DecimalType.DoubleDecimal).nullable)
+    assert(cast(Decimal(10.03), DecimalType(4, 2)).nullable === false)
+    assert(cast(Decimal(10.03), DecimalType(5, 3)).nullable === false)
+
+    assert(cast(Decimal(10.03), DecimalType(3, 1)).nullable)
+    assert(cast(Decimal(10.03), DecimalType(4, 1)).nullable === false)
+    assert(cast(Decimal(9.95), DecimalType(2, 1)).nullable)
+    assert(cast(Decimal(9.95), DecimalType(3, 1)).nullable === false)
+
+    assert(cast(Decimal("1003"), DecimalType(3, -1)).nullable)
+    assert(cast(Decimal("1003"), DecimalType(4, -1)).nullable === false)
+    assert(cast(Decimal("995"), DecimalType(2, -1)).nullable)
+    assert(cast(Decimal("995"), DecimalType(3, -1)).nullable === false)
+
+    assert(cast(true, DecimalType.SYSTEM_DEFAULT).nullable === false)
+    assert(cast(true, DecimalType(1, 1)).nullable)
+
+
+    checkEvaluation(cast(10.03, DecimalType.SYSTEM_DEFAULT), Decimal(10.03))
+    checkEvaluation(cast(10.03, DecimalType(4, 2)), Decimal(10.03))
+    checkEvaluation(cast(10.03, DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(10.03, DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(cast(10.03, DecimalType(1, 0)), null)
+    checkEvaluation(cast(10.03, DecimalType(2, 1)), null)
+    checkEvaluation(cast(10.03, DecimalType(3, 2)), null)
+    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 2)), null)
+
+    checkEvaluation(cast(10.05, DecimalType.SYSTEM_DEFAULT), Decimal(10.05))
+    checkEvaluation(cast(10.05, DecimalType(4, 2)), Decimal(10.05))
+    checkEvaluation(cast(10.05, DecimalType(3, 1)), Decimal(10.1))
+    checkEvaluation(cast(10.05, DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(cast(10.05, DecimalType(1, 0)), null)
+    checkEvaluation(cast(10.05, DecimalType(2, 1)), null)
+    checkEvaluation(cast(10.05, DecimalType(3, 2)), null)
+    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 1)), Decimal(10.1))
+    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 2)), null)
+
+    checkEvaluation(cast(9.95, DecimalType(3, 2)), Decimal(9.95))
+    checkEvaluation(cast(9.95, DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(9.95, DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(cast(9.95, DecimalType(2, 1)), null)
+    checkEvaluation(cast(9.95, DecimalType(1, 0)), null)
+    checkEvaluation(cast(Decimal(9.95), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(Decimal(9.95), DecimalType(1, 0)), null)
+
+    checkEvaluation(cast(-9.95, DecimalType(3, 2)), Decimal(-9.95))
+    checkEvaluation(cast(-9.95, DecimalType(3, 1)), Decimal(-10.0))
+    checkEvaluation(cast(-9.95, DecimalType(2, 0)), Decimal(-10))
+    checkEvaluation(cast(-9.95, DecimalType(2, 1)), null)
+    checkEvaluation(cast(-9.95, DecimalType(1, 0)), null)
+    checkEvaluation(cast(Decimal(-9.95), DecimalType(3, 1)), Decimal(-10.0))
+    checkEvaluation(cast(Decimal(-9.95), DecimalType(1, 0)), null)
+
+    checkEvaluation(cast(Decimal("1003"), DecimalType.SYSTEM_DEFAULT), Decimal(1003))
+    checkEvaluation(cast(Decimal("1003"), DecimalType(4, 0)), Decimal(1003))
+    checkEvaluation(cast(Decimal("1003"), DecimalType(3, -1)), Decimal(1000))
+    checkEvaluation(cast(Decimal("1003"), DecimalType(2, -2)), Decimal(1000))
+    checkEvaluation(cast(Decimal("1003"), DecimalType(1, -2)), null)
+    checkEvaluation(cast(Decimal("1003"), DecimalType(2, -1)), null)
+    checkEvaluation(cast(Decimal("1003"), DecimalType(3, 0)), null)
+
+    checkEvaluation(cast(Decimal("995"), DecimalType(3, 0)), Decimal(995))
+    checkEvaluation(cast(Decimal("995"), DecimalType(3, -1)), Decimal(1000))
+    checkEvaluation(cast(Decimal("995"), DecimalType(2, -2)), Decimal(1000))
+    checkEvaluation(cast(Decimal("995"), DecimalType(2, -1)), null)
+    checkEvaluation(cast(Decimal("995"), DecimalType(1, -2)), null)
+
+    checkEvaluation(cast(Double.NaN, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(1.0 / 0.0, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(Float.NaN, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(1.0f / 0.0f, DecimalType.SYSTEM_DEFAULT), null)
+
+    checkEvaluation(cast(Double.NaN, DecimalType(2, 1)), null)
+    checkEvaluation(cast(1.0 / 0.0, DecimalType(2, 1)), null)
+    checkEvaluation(cast(Float.NaN, DecimalType(2, 1)), null)
+    checkEvaluation(cast(1.0f / 0.0f, DecimalType(2, 1)), null)
+
+    checkEvaluation(cast(true, DecimalType(2, 1)), Decimal(1))
+    checkEvaluation(cast(true, DecimalType(1, 1)), null)
+  }
+
+  test("SPARK-28470: Cast should honor nullOnOverflow property") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(Cast(Literal("134.12"), DecimalType(3, 2)), null)
+      checkEvaluation(
+        Cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)), null)
+      checkEvaluation(Cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), null)
+      checkEvaluation(Cast(Literal(134.12), DecimalType(3, 2)), null)
+    }
+  }
+}
+
+/**
+ * Test suite for data type casting expression [[AnsiCast]].
+ */
+class AnsiCastSuite extends CastSuiteBase {
+  // It is not required to set SQLConf.ANSI_ENABLED as true for testing numeric overflow.
+  override protected def requiredAnsiEnabledForOverflowTestCases: Boolean = false
+
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
+    v match {
+      case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
+      case _ => AnsiCast(Literal(v), targetType, timeZoneId)
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 4e64313da136b..28d2607e6e43e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -545,6 +545,18 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
     assert(appender.seenMessage)
   }
+
+  test("SPARK-28916: subexrepssion elimination can cause 64kb code limit on UnsafeProjection") {
+    val numOfExprs = 10000
+    val exprs = (0 to numOfExprs).flatMap(colIndex =>
+      Seq(Add(BoundReference(colIndex, DoubleType, true),
+        BoundReference(numOfExprs + colIndex, DoubleType, true)),
+        Add(BoundReference(colIndex, DoubleType, true),
+          BoundReference(numOfExprs + colIndex, DoubleType, true))))
+    // these should not fail to compile due to 64K limit
+    GenerateUnsafeProjection.generate(exprs, true)
+    GenerateMutableProjection.generate(exprs, true)
+  }
 }
 
 case class HugeCodeIntExpression(value: Int) extends Expression {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index ae3549b4aaf5f..8680a15ee1cd7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.time.{LocalDateTime, ZoneId, ZoneOffset}
+import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset}
 import java.util.{Calendar, Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 import java.util.concurrent.TimeUnit._
@@ -1053,4 +1053,38 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(IsoYear(MakeDate(Literal(2006), Literal(1), Literal(1))), 2005)
     checkEvaluation(IsoYear(MakeDate(Literal(2006), Literal(1), Literal(2))), 2006)
   }
+
+  test("extract the seconds part with fraction from timestamps") {
+    outstandingTimezonesIds.foreach { timezone =>
+      val timestamp = MakeTimestamp(Literal(2019), Literal(8), Literal(10),
+        Literal(0), Literal(0), Literal(Decimal(10.123456, 8, 6)),
+        Some(Literal(timezone)))
+
+      checkEvaluation(SecondWithFraction(timestamp), Decimal(10.123456, 8, 6))
+      checkEvaluation(
+        SecondWithFraction(timestamp.copy(sec = Literal(Decimal(59000001, 8, 6)))),
+        Decimal(59000001, 8, 6))
+      checkEvaluation(
+        SecondWithFraction(timestamp.copy(sec = Literal(Decimal(1, 8, 6)))),
+        Decimal(0.000001, 8, 6))
+    }
+  }
+
+  test("timestamps difference") {
+    val end = Instant.parse("2019-10-04T11:04:01.123456Z")
+    checkEvaluation(TimestampDiff(Literal(end), Literal(end)),
+      new CalendarInterval(0, 0))
+    checkEvaluation(TimestampDiff(Literal(end), Literal(Instant.EPOCH)),
+      CalendarInterval.fromString("interval 18173 days " +
+        "11 hours 4 minutes 1 seconds 123 milliseconds 456 microseconds"))
+    checkEvaluation(TimestampDiff(Literal(Instant.EPOCH), Literal(end)),
+      CalendarInterval.fromString("interval -18173 days " +
+        "-11 hours -4 minutes -1 seconds -123 milliseconds -456 microseconds"))
+    checkEvaluation(
+      TimestampDiff(
+        Literal(Instant.parse("9999-12-31T23:59:59.999999Z")),
+        Literal(Instant.parse("0001-01-01T00:00:00Z"))),
+      CalendarInterval.fromString("interval 521722 weeks 4 days " +
+        "23 hours 59 minutes 59 seconds 999 milliseconds 999 microseconds"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala
index fc5e8dc5ee7f1..36bc3db580400 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala
@@ -32,7 +32,7 @@ class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("MakeDecimal") {
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
       checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
       val overflowExpr = MakeDecimal(Literal.create(1000L, LongType), 3, 1)
@@ -41,7 +41,7 @@ class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       evaluateWithoutCodegen(overflowExpr, null)
       checkEvaluationWithUnsafeProjection(overflowExpr, null)
     }
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       checkEvaluation(MakeDecimal(Literal(101L), 3, 1), Decimal("10.1"))
       checkEvaluation(MakeDecimal(Literal.create(null, LongType), 3, 1), null)
       val overflowExpr = MakeDecimal(Literal.create(1000L, LongType), 3, 1)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index bc1f31b101c6e..6f73c1b0c04fb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -22,7 +22,7 @@ import scala.reflect.ClassTag
 import org.scalacheck.Gen
 import org.scalactic.TripleEqualsSupport.Spread
 import org.scalatest.exceptions.TestFailedException
-import org.scalatest.prop.GeneratorDrivenPropertyChecks
+import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.serializer.JavaSerializer
@@ -41,7 +41,7 @@ import org.apache.spark.util.Utils
 /**
  * A few helper functions for expression evaluation testing. Mixin this trait to use them.
  */
-trait ExpressionEvalHelper extends GeneratorDrivenPropertyChecks with PlanTestBase {
+trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestBase {
   self: SparkFunSuite =>
 
   protected def create_row(values: Any*): InternalRow = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
index b5cfaf8f4b0fd..f90c98be0b3fd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -174,7 +174,7 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("hive-hash for date type") {
     def checkHiveHashForDateType(dateString: String, expected: Long): Unit = {
       checkHiveHash(
-        DateTimeUtils.stringToDate(UTF8String.fromString(dateString)).get,
+        DateTimeUtils.stringToDate(UTF8String.fromString(dateString), ZoneOffset.UTC).get,
         DateType,
         expected)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
index b83d03025d21c..4cdee447fa45a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
@@ -89,6 +89,11 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     ArrayFilter(expr, createLambda(et, cn, f)).bind(validateBinding)
   }
 
+  def filter(expr: Expression, f: (Expression, Expression) => Expression): Expression = {
+    val ArrayType(et, cn) = expr.dataType
+    ArrayFilter(expr, createLambda(et, cn, IntegerType, false, f)).bind(validateBinding)
+  }
+
   def transformKeys(expr: Expression, f: (Expression, Expression) => Expression): Expression = {
     val MapType(kt, vt, vcn) = expr.dataType
     TransformKeys(expr, createLambda(kt, false, vt, vcn, f)).bind(validateBinding)
@@ -218,9 +223,11 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
 
     val isEven: Expression => Expression = x => x % 2 === 0
     val isNullOrOdd: Expression => Expression = x => x.isNull || x % 2 === 1
+    val indexIsEven: (Expression, Expression) => Expression = { case (_, idx) => idx % 2 === 0 }
 
     checkEvaluation(filter(ai0, isEven), Seq(2))
     checkEvaluation(filter(ai0, isNullOrOdd), Seq(1, 3))
+    checkEvaluation(filter(ai0, indexIsEven), Seq(1, 3))
     checkEvaluation(filter(ai1, isEven), Seq.empty)
     checkEvaluation(filter(ai1, isNullOrOdd), Seq(1, null, 3))
     checkEvaluation(filter(ain, isEven), null)
@@ -234,13 +241,17 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val startsWithA: Expression => Expression = x => x.startsWith("a")
 
     checkEvaluation(filter(as0, startsWithA), Seq("a0", "a2"))
+    checkEvaluation(filter(as0, indexIsEven), Seq("a0", "a2"))
     checkEvaluation(filter(as1, startsWithA), Seq("a"))
+    checkEvaluation(filter(as1, indexIsEven), Seq("a", "c"))
     checkEvaluation(filter(asn, startsWithA), null)
 
     val aai = Literal.create(Seq(Seq(1, 2, 3), null, Seq(4, 5)),
       ArrayType(ArrayType(IntegerType, containsNull = false), containsNull = true))
     checkEvaluation(transform(aai, ix => filter(ix, isNullOrOdd)),
       Seq(Seq(1, 3), null, Seq(5)))
+    checkEvaluation(transform(aai, ix => filter(ix, indexIsEven)),
+      Seq(Seq(1, 3), null, Seq(4)))
   }
 
   test("ArrayExists") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index 7bff277c793ea..52cdd988caa2e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -38,7 +38,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   private def booleanLogicTest(
     name: String,
     op: (Expression, Expression) => Expression,
-    truthTable: Seq[(Any, Any, Any)]) {
+    truthTable: Seq[(Any, Any, Any)]): Unit = {
     test(s"3VL $name") {
       truthTable.foreach {
         case (l, r, answer) =>
@@ -560,4 +560,10 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
         assert(msg.contains("argument 1 requires boolean type"))
     }
   }
+
+  test("SPARK-29100: InSet with empty input set") {
+    val row = create_row(1)
+    val inSet = InSet(BoundReference(0, IntegerType, true), Set.empty)
+    checkEvaluation(inSet, false, row)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
index 981ef57c051fd..c5ffc381b58e2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDFSuite.scala
@@ -57,7 +57,7 @@ class ScalaUDFSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPARK-28369: honor nullOnOverflow config for ScalaUDF") {
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       val udf = ScalaUDF(
         (a: java.math.BigDecimal) => a.multiply(new java.math.BigDecimal(100)),
         DecimalType.SYSTEM_DEFAULT,
@@ -69,7 +69,7 @@ class ScalaUDFSuite extends SparkFunSuite with ExpressionEvalHelper {
       }
       assert(e2.getCause.isInstanceOf[ArithmeticException])
     }
-    withSQLConf(SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       val udf = ScalaUDF(
         (a: java.math.BigDecimal) => a.multiply(new java.math.BigDecimal(100)),
         DecimalType.SYSTEM_DEFAULT,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
index 6a3cc21804991..3c826e812b5cc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
@@ -508,7 +508,7 @@ class SelectedFieldSuite extends AnalysisTest {
   // Test that the given SELECT expressions prune the test schema to the single-column schema
   // defined by the given field
   private def testSelect(inputSchema: StructType, selectExprs: String*)
-                        (expected: StructField) {
+                        (expected: StructField): Unit = {
     test(s"SELECT ${selectExprs.map(s => s""""$s"""").mkString(", ")} should select the schema\n" +
       indent(StructType(expected :: Nil).treeString)) {
       for (selectExpr <- selectExprs) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 1b5acf4b0abcc..4308f98d6969a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.types._
 
@@ -428,7 +429,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // scalastyle:on
   }
 
-  test("overlay") {
+  test("overlay for string") {
     checkEvaluation(new Overlay(Literal("Spark SQL"), Literal("_"),
       Literal.create(6, IntegerType)), "Spark_SQL")
     checkEvaluation(new Overlay(Literal("Spark SQL"), Literal("CORE"),
@@ -450,6 +451,75 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(new Overlay(Literal("Spark的SQL"), Literal("_"),
       Literal.create(6, IntegerType)), "Spark_SQL")
     // scalastyle:on
+    // position greater than the length of input string
+    checkEvaluation(new Overlay(Literal("Spark SQL"), Literal("_"),
+      Literal.create(10, IntegerType)), "Spark SQL_")
+    checkEvaluation(Overlay(Literal("Spark SQL"), Literal("_"),
+      Literal.create(10, IntegerType), Literal.create(4, IntegerType)), "Spark SQL_")
+    // position is zero
+    checkEvaluation(new Overlay(Literal("Spark SQL"), Literal("__"),
+      Literal.create(0, IntegerType)), "__park SQL")
+    checkEvaluation(Overlay(Literal("Spark SQL"), Literal("__"),
+      Literal.create(0, IntegerType), Literal.create(4, IntegerType)), "__rk SQL")
+    // position is negative
+    checkEvaluation(new Overlay(Literal("Spark SQL"), Literal("__"),
+      Literal.create(-10, IntegerType)), "__park SQL")
+    checkEvaluation(Overlay(Literal("Spark SQL"), Literal("__"),
+      Literal.create(-10, IntegerType), Literal.create(4, IntegerType)), "__rk SQL")
+  }
+
+  test("overlay for byte array") {
+    val input = Literal(Array[Byte](1, 2, 3, 4, 5, 6, 7, 8, 9))
+    checkEvaluation(new Overlay(input, Literal(Array[Byte](-1)),
+      Literal.create(6, IntegerType)), Array[Byte](1, 2, 3, 4, 5, -1, 7, 8, 9))
+    checkEvaluation(new Overlay(input, Literal(Array[Byte](-1, -1, -1, -1)),
+      Literal.create(7, IntegerType)), Array[Byte](1, 2, 3, 4, 5, 6, -1, -1, -1, -1))
+    checkEvaluation(Overlay(input, Literal(Array[Byte](-1, -1)), Literal.create(7, IntegerType),
+      Literal.create(0, IntegerType)), Array[Byte](1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9))
+    checkEvaluation(Overlay(input, Literal(Array[Byte](-1, -1, -1, -1, -1)),
+      Literal.create(2, IntegerType), Literal.create(4, IntegerType)),
+      Array[Byte](1, -1, -1, -1, -1, -1, 6, 7, 8, 9))
+
+    val nullInput = Literal.create(null, BinaryType)
+    checkEvaluation(new Overlay(nullInput, Literal(Array[Byte](-1)),
+      Literal.create(6, IntegerType)), null)
+    checkEvaluation(new Overlay(nullInput, Literal(Array[Byte](-1, -1, -1, -1)),
+      Literal.create(7, IntegerType)), null)
+    checkEvaluation(Overlay(nullInput, Literal(Array[Byte](-1, -1)),
+      Literal.create(7, IntegerType), Literal.create(0, IntegerType)), null)
+    checkEvaluation(Overlay(nullInput, Literal(Array[Byte](-1, -1, -1, -1, -1)),
+      Literal.create(2, IntegerType), Literal.create(4, IntegerType)), null)
+    // position greater than the length of input byte array
+    checkEvaluation(new Overlay(input, Literal(Array[Byte](-1)),
+      Literal.create(10, IntegerType)), Array[Byte](1, 2, 3, 4, 5, 6, 7, 8, 9, -1))
+    checkEvaluation(Overlay(input, Literal(Array[Byte](-1)), Literal.create(10, IntegerType),
+      Literal.create(4, IntegerType)), Array[Byte](1, 2, 3, 4, 5, 6, 7, 8, 9, -1))
+    // position is zero
+    checkEvaluation(new Overlay(input, Literal(Array[Byte](-1, -1)),
+      Literal.create(0, IntegerType)), Array[Byte](-1, -1, 2, 3, 4, 5, 6, 7, 8, 9))
+    checkEvaluation(Overlay(input, Literal(Array[Byte](-1, -1)), Literal.create(0, IntegerType),
+      Literal.create(4, IntegerType)), Array[Byte](-1, -1, 4, 5, 6, 7, 8, 9))
+    // position is negative
+    checkEvaluation(new Overlay(input, Literal(Array[Byte](-1, -1)),
+      Literal.create(-10, IntegerType)), Array[Byte](-1, -1, 2, 3, 4, 5, 6, 7, 8, 9))
+    checkEvaluation(Overlay(input, Literal(Array[Byte](-1, -1)), Literal.create(-10, IntegerType),
+      Literal.create(4, IntegerType)), Array[Byte](-1, -1, 4, 5, 6, 7, 8, 9))
+  }
+
+  test("Check Overlay.checkInputDataTypes results") {
+    assert(new Overlay(Literal("Spark SQL"), Literal("_"),
+      Literal.create(6, IntegerType)).checkInputDataTypes().isSuccess)
+    assert(Overlay(Literal("Spark SQL"), Literal("ANSI "), Literal.create(7, IntegerType),
+      Literal.create(0, IntegerType)).checkInputDataTypes().isSuccess)
+    assert(new Overlay(Literal.create("Spark SQL".getBytes), Literal.create("_".getBytes),
+      Literal.create(6, IntegerType)).checkInputDataTypes().isSuccess)
+    assert(Overlay(Literal.create("Spark SQL".getBytes), Literal.create("ANSI ".getBytes),
+      Literal.create(7, IntegerType), Literal.create(0, IntegerType))
+      .checkInputDataTypes().isSuccess)
+    assert(new Overlay(Literal.create(1), Literal.create(2), Literal.create(0, IntegerType))
+      .checkInputDataTypes().isFailure)
+    assert(Overlay(Literal("Spark SQL"), Literal.create(2), Literal.create(7, IntegerType),
+      Literal.create(0, IntegerType)).checkInputDataTypes().isFailure)
   }
 
   test("translate") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
index 0e0c8e167a0a7..258d0aed94fcb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
@@ -81,7 +81,7 @@ class PercentileSuite extends SparkFunSuite {
 
   private def runTest(agg: Percentile,
         rows : Seq[Seq[Any]],
-        expectedPercentiles : Seq[Double]) {
+        expectedPercentiles : Seq[Double]): Unit = {
     assert(agg.nullable)
     val group1 = (0 until rows.length / 2)
     val group1Buffer = agg.createAggregationBuffer()
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
index 75c6beeb32150..81e29931e41dd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerSuite.scala
@@ -105,7 +105,8 @@ class GenerateUnsafeRowJoinerSuite extends SparkFunSuite {
     }
   }
 
-  private def testConcatOnce(numFields1: Int, numFields2: Int, candidateTypes: Seq[DataType]) {
+  private def testConcatOnce(numFields1: Int, numFields2: Int,
+      candidateTypes: Seq[DataType]): Unit = {
     info(s"schema size $numFields1, $numFields2")
     val random = new Random()
     val schema1 = RandomDataGenerator.randomSchema(random, numFields1, candidateTypes)
@@ -129,7 +130,7 @@ class GenerateUnsafeRowJoinerSuite extends SparkFunSuite {
       schema1: StructType,
       row1: UnsafeRow,
       schema2: StructType,
-      row2: UnsafeRow) {
+      row2: UnsafeRow): Unit = {
 
     // Run the joiner.
     val mergedSchema = StructType(schema1 ++ schema2)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala
new file mode 100644
index 0000000000000..175904da21969
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/postgreSQL/CastSuite.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions.postgreSQL
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper, Literal}
+
+class CastSuite extends SparkFunSuite with ExpressionEvalHelper {
+  private def checkPostgreCastStringToBoolean(v: Any, expected: Any): Unit = {
+    checkEvaluation(PostgreCastStringToBoolean(Literal(v)), expected)
+  }
+
+  test("cast string to boolean") {
+    checkPostgreCastStringToBoolean("true", true)
+    checkPostgreCastStringToBoolean("tru", true)
+    checkPostgreCastStringToBoolean("tr", true)
+    checkPostgreCastStringToBoolean("t", true)
+    checkPostgreCastStringToBoolean("tRUe", true)
+    checkPostgreCastStringToBoolean("    tRue   ", true)
+    checkPostgreCastStringToBoolean("    tRu   ", true)
+    checkPostgreCastStringToBoolean("yes", true)
+    checkPostgreCastStringToBoolean("ye", true)
+    checkPostgreCastStringToBoolean("y", true)
+    checkPostgreCastStringToBoolean("1", true)
+    checkPostgreCastStringToBoolean("on", true)
+
+    checkPostgreCastStringToBoolean("false", false)
+    checkPostgreCastStringToBoolean("fals", false)
+    checkPostgreCastStringToBoolean("fal", false)
+    checkPostgreCastStringToBoolean("fa", false)
+    checkPostgreCastStringToBoolean("f", false)
+    checkPostgreCastStringToBoolean("    fAlse    ", false)
+    checkPostgreCastStringToBoolean("    fAls    ", false)
+    checkPostgreCastStringToBoolean("    FAlsE    ", false)
+    checkPostgreCastStringToBoolean("no", false)
+    checkPostgreCastStringToBoolean("n", false)
+    checkPostgreCastStringToBoolean("0", false)
+    checkPostgreCastStringToBoolean("off", false)
+    checkPostgreCastStringToBoolean("of", false)
+
+    checkPostgreCastStringToBoolean("o", null)
+    checkPostgreCastStringToBoolean("abc", null)
+    checkPostgreCastStringToBoolean("", null)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index a0de5f6930958..a8b8417754b00 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -239,6 +239,11 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with
     checkCondition(!'f || 'e, testRelationWithData.where(!'f || 'e).analyze)
   }
 
+  test("simplify NOT(IsNull(x)) and NOT(IsNotNull(x))") {
+    checkCondition(Not(IsNotNull('b)), IsNull('b))
+    checkCondition(Not(IsNull('b)), IsNotNull('b))
+  }
+
   protected def assertEquivalent(e1: Expression, e2: Expression): Unit = {
     val correctAnswer = Project(Alias(e2, "out")() :: Nil, OneRowRelation()).analyze
     val actual = Optimize.execute(Project(Alias(e1, "out")() :: Nil, OneRowRelation()).analyze)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 75ff07637fccc..5be37318ae6eb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -109,7 +109,7 @@ class ColumnPruningSuite extends PlanTest {
         replacedGenerator: Seq[String] => Generator,
         aliasedExprs: Seq[String] => Seq[Expression],
         unrequiredChildIndex: Seq[Int],
-        generatorOutputNames: Seq[String]) {
+        generatorOutputNames: Seq[String]): Unit = {
       withSQLConf(SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> "true") {
         val structType = StructType.fromDDL("d double, e array<string>, f double, g double, " +
           "h array<struct<h1: int, h2: double>>")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index 0f93305565224..3d81c567eff11 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -52,7 +52,8 @@ class JoinOptimizationSuite extends PlanTest {
     val y = testRelation1.subquery('y)
     val z = testRelation.subquery('z)
 
-    def testExtract(plan: LogicalPlan, expected: Option[(Seq[LogicalPlan], Seq[Expression])]) {
+    def testExtract(plan: LogicalPlan,
+        expected: Option[(Seq[LogicalPlan], Seq[Expression])]): Unit = {
       val expectedNoCross = expected map {
         seq_pair => {
           val plans = seq_pair._1
@@ -63,8 +64,8 @@ class JoinOptimizationSuite extends PlanTest {
       testExtractCheckCross(plan, expectedNoCross)
     }
 
-    def testExtractCheckCross
-        (plan: LogicalPlan, expected: Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression])]) {
+    def testExtractCheckCross(plan: LogicalPlan,
+        expected: Option[(Seq[(LogicalPlan, InnerLike)], Seq[Expression])]): Unit = {
       assert(
         ExtractFiltersAndInnerJoins.unapply(plan) === expected.map(e => (e._1, e._2)))
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
index 2a8780346d99e..a277a2d339e91 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
@@ -28,7 +28,7 @@ class OptimizerRuleExclusionSuite extends PlanTest {
 
   val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
 
-  private def verifyExcludedRules(optimizer: Optimizer, rulesToExclude: Seq[String]) {
+  private def verifyExcludedRules(optimizer: Optimizer, rulesToExclude: Seq[String]): Unit = {
     val nonExcludableRules = optimizer.nonExcludableRules
 
     val excludedRuleNames = rulesToExclude.filter(!nonExcludableRules.contains(_))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
index 5e0d2041fac5d..5998437f11f4d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Literal, NamedExpressio
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalRelation, LogicalPlan, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.internal.SQLConf
 
 
@@ -43,10 +44,10 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
   }
 
   object Optimize extends Optimizer(
-    new SessionCatalog(
-      new InMemoryCatalog,
-      EmptyFunctionRegistry,
-      new SQLConf())) {
+    new CatalogManager(
+      new SQLConf(),
+      FakeV2SessionCatalog,
+      new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, new SQLConf()))) {
     val newBatch = Batch("OptimizeRuleBreakSI", Once, OptimizeRuleBreakSI)
     override def defaultBatches: Seq[Batch] = Seq(newBatch) ++ super.defaultBatches
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 9a0a326ecd5d2..c0d444e5f5da5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedRelation, UnresolvedStar}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedRelation, UnresolvedStar}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
-import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, AlterTableAlterColumnStatement, AlterTableDropColumnsStatement, AlterTableRenameColumnStatement, AlterTableSetLocationStatement, AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement, AlterViewSetPropertiesStatement, AlterViewUnsetPropertiesStatement, CreateTableAsSelectStatement, CreateTableStatement, DescribeColumnStatement, DescribeTableStatement, DropTableStatement, DropViewStatement, InsertIntoStatement, QualifiedColType, ReplaceTableAsSelectStatement, ReplaceTableStatement, ShowTablesStatement}
+import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, AlterTableAlterColumnStatement, AlterTableDropColumnsStatement, AlterTableRenameColumnStatement, AlterTableSetLocationStatement, AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement, AlterViewSetPropertiesStatement, AlterViewUnsetPropertiesStatement, CreateTableAsSelectStatement, CreateTableStatement, DeleteFromStatement, DescribeColumnStatement, DescribeTableStatement, DropTableStatement, DropViewStatement, InsertIntoStatement, QualifiedColType, ReplaceTableAsSelectStatement, ReplaceTableStatement, ShowNamespacesStatement, ShowTablesStatement, UpdateTableStatement}
+import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -407,7 +408,7 @@ class DDLParserSuite extends AnalysisTest {
   private def testCreateOrReplaceDdl(
       sqlStatement: String,
       tableSpec: TableSpec,
-      expectedIfNotExists: Boolean) {
+      expectedIfNotExists: Boolean): Unit = {
     val parsedPlan = parsePlan(sqlStatement)
     val newTableToken = sqlStatement.split(" ")(0).trim.toUpperCase(Locale.ROOT)
     parsedPlan match {
@@ -764,6 +765,72 @@ class DDLParserSuite extends AnalysisTest {
     assert(exc.getMessage.contains("INSERT INTO ... IF NOT EXISTS"))
   }
 
+  test("delete from table: delete all") {
+    parseCompare("DELETE FROM testcat.ns1.ns2.tbl",
+      DeleteFromStatement(
+        Seq("testcat", "ns1", "ns2", "tbl"),
+        None,
+        None))
+  }
+
+  test("delete from table: with alias and where clause") {
+    parseCompare("DELETE FROM testcat.ns1.ns2.tbl AS t WHERE t.a = 2",
+      DeleteFromStatement(
+        Seq("testcat", "ns1", "ns2", "tbl"),
+        Some("t"),
+        Some(EqualTo(UnresolvedAttribute("t.a"), Literal(2)))))
+  }
+
+  test("delete from table: columns aliases is not allowed") {
+    val exc = intercept[ParseException] {
+      parsePlan("DELETE FROM testcat.ns1.ns2.tbl AS t(a,b,c,d) WHERE d = 2")
+    }
+
+    assert(exc.getMessage.contains("Columns aliases is not allowed in DELETE."))
+  }
+
+  test("update table: basic") {
+    parseCompare(
+      """
+        |UPDATE testcat.ns1.ns2.tbl
+        |SET t.a='Robert', t.b=32
+      """.stripMargin,
+      UpdateTableStatement(
+        Seq("testcat", "ns1", "ns2", "tbl"),
+        None,
+        Seq(Seq("t", "a"), Seq("t", "b")),
+        Seq(Literal("Robert"), Literal(32)),
+        None))
+  }
+
+  test("update table: with alias and where clause") {
+    parseCompare(
+      """
+        |UPDATE testcat.ns1.ns2.tbl AS t
+        |SET t.a='Robert', t.b=32
+        |WHERE t.c=2
+      """.stripMargin,
+      UpdateTableStatement(
+        Seq("testcat", "ns1", "ns2", "tbl"),
+        Some("t"),
+        Seq(Seq("t", "a"), Seq("t", "b")),
+        Seq(Literal("Robert"), Literal(32)),
+        Some(EqualTo(UnresolvedAttribute("t.c"), Literal(2)))))
+  }
+
+  test("update table: columns aliases is not allowed") {
+    val exc = intercept[ParseException] {
+      parsePlan(
+        """
+          |UPDATE testcat.ns1.ns2.tbl AS t(a,b,c,d)
+          |SET b='Robert', c=32
+          |WHERE d=2
+        """.stripMargin)
+    }
+
+    assert(exc.getMessage.contains("Columns aliases is not allowed in UPDATE."))
+  }
+
   test("show tables") {
     comparePlans(
       parsePlan("SHOW TABLES"),
@@ -779,6 +846,21 @@ class DDLParserSuite extends AnalysisTest {
       ShowTablesStatement(Some(Seq("tbl")), Some("*dog*")))
   }
 
+  test("show namespaces") {
+    comparePlans(
+      parsePlan("SHOW NAMESPACES"),
+      ShowNamespacesStatement(None, None))
+    comparePlans(
+      parsePlan("SHOW NAMESPACES FROM testcat.ns1.ns2"),
+      ShowNamespacesStatement(Some(Seq("testcat", "ns1", "ns2")), None))
+    comparePlans(
+      parsePlan("SHOW NAMESPACES IN testcat.ns1.ns2"),
+      ShowNamespacesStatement(Some(Seq("testcat", "ns1", "ns2")), None))
+    comparePlans(
+      parsePlan("SHOW NAMESPACES IN testcat.ns1 LIKE '*pattern*'"),
+      ShowNamespacesStatement(Some(Seq("testcat", "ns1")), Some("*pattern*")))
+  }
+
   private case class TableSpec(
       name: Seq[String],
       schema: Option[StructType],
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index ee89980d3eedd..5da2bf059758d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -605,7 +605,7 @@ class ExpressionParserSuite extends AnalysisTest {
       assertEqual(s"interval $intervalValue", expected)
 
       // SPARK-23264 Support interval values without INTERVAL clauses if ANSI SQL enabled
-      withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
         assertEqual(intervalValue, expected)
       }
     }
@@ -682,12 +682,12 @@ class ExpressionParserSuite extends AnalysisTest {
 
   test("SPARK-23264 Interval Compatibility tests") {
     def checkIntervals(intervalValue: String, expected: Literal): Unit = {
-      withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
         assertEqual(intervalValue, expected)
       }
 
       // Compatibility tests: If ANSI SQL disabled, `intervalValue` should be parsed as an alias
-      withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "false") {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
         val aliases = defaultParser.parseExpression(intervalValue).collect {
           case a @ Alias(_: Literal, name)
             if intervalUnits.exists { unit => name.startsWith(unit) } => a
@@ -785,12 +785,12 @@ class ExpressionParserSuite extends AnalysisTest {
   }
 
   test("current date/timestamp braceless expressions") {
-    withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       assertEqual("current_date", CurrentDate())
       assertEqual("current_timestamp", CurrentTimestamp())
     }
 
-    withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "false") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       assertEqual("current_date", UnresolvedAttribute.quoted("current_date"))
       assertEqual("current_timestamp", UnresolvedAttribute.quoted("current_timestamp"))
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 246d0cf01e9ba..a9216174804d0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -443,6 +443,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
     "month",
     "months",
     "msck",
+    "namespaces",
     "natural",
     "no",
     "not",
@@ -657,7 +658,7 @@ class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("table identifier - reserved/non-reserved keywords if ANSI mode enabled") {
-    withSQLConf(SQLConf.ANSI_SQL_PARSER.key -> "true") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
       reservedKeywordsInAnsiMode.foreach { keyword =>
         val errMsg = intercept[ParseException] {
           parseTableIdentifier(keyword)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index 08f1f87514b1d..b28e6ded6ca98 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -160,7 +160,7 @@ trait PlanTestBase extends PredicateHelper with SQLHelper { self: Suite =>
   }
 
   /** Fails the test if the join order in the two plans do not match */
-  protected def compareJoinOrder(plan1: LogicalPlan, plan2: LogicalPlan) {
+  protected def compareJoinOrder(plan1: LogicalPlan, plan2: LogicalPlan): Unit = {
     val normalized1 = normalizePlan(normalizeExprIds(plan1))
     val normalized2 = normalizePlan(normalizeExprIds(plan2))
     if (!sameJoinPlan(normalized1, normalized2)) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index fbaa5527a7058..0e094bc06b05f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -28,7 +28,7 @@ import org.json4s.jackson.JsonMethods
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.{AliasIdentifier, FunctionIdentifier, InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.dsl.expressions.DslString
 import org.apache.spark.sql.catalyst.expressions._
@@ -431,6 +431,28 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
         "product-class" -> JString(classOf[FunctionIdentifier].getName),
           "funcName" -> "function"))
 
+    // Converts AliasIdentifier to JSON
+    assertJSON(
+      AliasIdentifier("alias"),
+      JObject(
+        "product-class" -> JString(classOf[AliasIdentifier].getName),
+          "identifier" -> "alias"))
+
+    // Converts SubqueryAlias to JSON
+    assertJSON(
+      SubqueryAlias("t1", JsonTestTreeNode("0")),
+      List(
+        JObject(
+          "class" -> classOf[SubqueryAlias].getName,
+          "num-children" -> 1,
+          "name" -> JObject("product-class" -> JString(classOf[AliasIdentifier].getName),
+            "identifier" -> "t1"),
+          "child" -> 0),
+        JObject(
+          "class" -> classOf[JsonTestTreeNode].getName,
+          "num-children" -> 0,
+          "arg" -> "0")))
+
     // Converts BucketSpec to JSON
     assertJSON(
       BucketSpec(1, Seq("bucket"), Seq("sort")),
@@ -680,12 +702,12 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("clone") {
-    def assertDifferentInstance(before: AnyRef, after: AnyRef): Unit = {
+    def assertDifferentInstance[T <: TreeNode[T]](before: TreeNode[T], after: TreeNode[T]): Unit = {
       assert(before.ne(after) && before == after)
-      before.asInstanceOf[TreeNode[_]].children.zip(
-          after.asInstanceOf[TreeNode[_]].children).foreach {
-        case (beforeChild: AnyRef, afterChild: AnyRef) =>
-          assertDifferentInstance(beforeChild, afterChild)
+      before.children.zip(after.children).foreach { case (beforeChild, afterChild) =>
+        assertDifferentInstance(
+          beforeChild.asInstanceOf[TreeNode[T]],
+          afterChild.asInstanceOf[TreeNode[T]])
       }
     }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 056337205ae7e..a9e3c9006a334 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -19,23 +19,25 @@ package org.apache.spark.sql.catalyst.util
 
 import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.time.ZoneId
+import java.time.{LocalDate, LocalDateTime, LocalTime, ZoneId, ZoneOffset}
 import java.util.{Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 
+import org.scalatest.Matchers
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.unsafe.types.UTF8String
 
-class DateTimeUtilsSuite extends SparkFunSuite {
+class DateTimeUtilsSuite extends SparkFunSuite with Matchers {
 
   val TimeZonePST = TimeZone.getTimeZone("PST")
   private def defaultZoneId = ZoneId.systemDefault()
 
   test("nanoseconds truncation") {
     val tf = TimestampFormatter.getFractionFormatter(DateTimeUtils.defaultTimeZone.toZoneId)
-    def checkStringToTimestamp(originalTime: String, expectedParsedTime: String) {
+    def checkStringToTimestamp(originalTime: String, expectedParsedTime: String): Unit = {
       val parsedTimestampOp = DateTimeUtils.stringToTimestamp(
         UTF8String.fromString(originalTime), defaultZoneId)
       assert(parsedTimestampOp.isDefined, "timestamp with nanoseconds was not parsed correctly")
@@ -118,34 +120,42 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     checkFromToJavaDate(new Date(df2.parse("1776-07-04 18:30:00 UTC").getTime))
   }
 
+  private def toDate(s: String, zoneId: ZoneId = ZoneOffset.UTC): Option[SQLDate] = {
+    stringToDate(UTF8String.fromString(s), zoneId)
+  }
+
   test("string to date") {
-    assert(stringToDate(UTF8String.fromString("2015-01-28")).get === days(2015, 1, 28))
-    assert(stringToDate(UTF8String.fromString("2015")).get === days(2015, 1, 1))
-    assert(stringToDate(UTF8String.fromString("0001")).get === days(1, 1, 1))
-    assert(stringToDate(UTF8String.fromString("2015-03")).get === days(2015, 3, 1))
+    assert(toDate("2015-01-28").get === days(2015, 1, 28))
+    assert(toDate("2015").get === days(2015, 1, 1))
+    assert(toDate("0001").get === days(1, 1, 1))
+    assert(toDate("2015-03").get === days(2015, 3, 1))
     Seq("2015-03-18", "2015-03-18 ", " 2015-03-18", " 2015-03-18 ", "2015-03-18 123142",
       "2015-03-18T123123", "2015-03-18T").foreach { s =>
-      assert(stringToDate(UTF8String.fromString(s)).get === days(2015, 3, 18))
+      assert(toDate(s).get === days(2015, 3, 18))
     }
 
-    assert(stringToDate(UTF8String.fromString("2015-03-18X")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("2015/03/18")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("2015.03.18")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("20150318")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("2015-031-8")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("02015-03-18")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("015-03-18")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("015")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("02015")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("1999 08 01")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("1999-08 01")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("1999 08")).isEmpty)
+    assert(toDate("2015-03-18X").isEmpty)
+    assert(toDate("2015/03/18").isEmpty)
+    assert(toDate("2015.03.18").isEmpty)
+    assert(toDate("20150318").isEmpty)
+    assert(toDate("2015-031-8").isEmpty)
+    assert(toDate("02015-03-18").isEmpty)
+    assert(toDate("015-03-18").isEmpty)
+    assert(toDate("015").isEmpty)
+    assert(toDate("02015").isEmpty)
+    assert(toDate("1999 08 01").isEmpty)
+    assert(toDate("1999-08 01").isEmpty)
+    assert(toDate("1999 08").isEmpty)
+  }
+
+  private def toTimestamp(str: String, zoneId: ZoneId): Option[SQLTimestamp] = {
+    stringToTimestamp(UTF8String.fromString(str), zoneId)
   }
 
   test("string to timestamp") {
     for (tz <- ALL_TIMEZONES) {
       def checkStringToTimestamp(str: String, expected: Option[Long]): Unit = {
-        assert(stringToTimestamp(UTF8String.fromString(str), tz.toZoneId) === expected)
+        assert(toTimestamp(str, tz.toZoneId) === expected)
       }
 
       checkStringToTimestamp("1969-12-31 16:00:00", Option(date(1969, 12, 31, 16, tz = tz)))
@@ -258,12 +268,10 @@ class DateTimeUtilsSuite extends SparkFunSuite {
 
   test("SPARK-15379: special invalid date string") {
     // Test stringToDate
-    assert(stringToDate(
-      UTF8String.fromString("2015-02-29 00:00:00")).isEmpty)
-    assert(stringToDate(
-      UTF8String.fromString("2015-04-31 00:00:00")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("2015-02-29")).isEmpty)
-    assert(stringToDate(UTF8String.fromString("2015-04-31")).isEmpty)
+    assert(toDate("2015-02-29 00:00:00").isEmpty)
+    assert(toDate("2015-04-31 00:00:00").isEmpty)
+    assert(toDate("2015-02-29").isEmpty)
+    assert(toDate("2015-04-31").isEmpty)
 
 
     // Test stringToTimestamp
@@ -271,8 +279,8 @@ class DateTimeUtilsSuite extends SparkFunSuite {
       UTF8String.fromString("2015-02-29 00:00:00"), defaultZoneId).isEmpty)
     assert(stringToTimestamp(
       UTF8String.fromString("2015-04-31 00:00:00"), defaultZoneId).isEmpty)
-    assert(stringToTimestamp(UTF8String.fromString("2015-02-29"), defaultZoneId).isEmpty)
-    assert(stringToTimestamp(UTF8String.fromString("2015-04-31"), defaultZoneId).isEmpty)
+    assert(toTimestamp("2015-02-29", defaultZoneId).isEmpty)
+    assert(toTimestamp("2015-04-31", defaultZoneId).isEmpty)
   }
 
   test("hours") {
@@ -456,8 +464,7 @@ class DateTimeUtilsSuite extends SparkFunSuite {
         timezone: TimeZone = DateTimeUtils.defaultTimeZone()): Unit = {
       val truncated =
         DateTimeUtils.truncTimestamp(inputTS, level, timezone)
-      val expectedTS =
-        DateTimeUtils.stringToTimestamp(UTF8String.fromString(expected), defaultZoneId)
+      val expectedTS = toTimestamp(expected, defaultZoneId)
       assert(truncated === expectedTS.get)
     }
 
@@ -564,4 +571,33 @@ class DateTimeUtilsSuite extends SparkFunSuite {
     assert(DateTimeUtils.toMillis(-9223372036844776001L) === -9223372036844777L)
     assert(DateTimeUtils.toMillis(-157700927876544L) === -157700927877L)
   }
+
+  test("special timestamp values") {
+    DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
+      val tolerance = TimeUnit.SECONDS.toMicros(30)
+
+      assert(toTimestamp("Epoch", zoneId).get === 0)
+      val now = instantToMicros(LocalDateTime.now(zoneId).atZone(zoneId).toInstant)
+      toTimestamp("NOW", zoneId).get should be (now +- tolerance)
+      assert(toTimestamp("now UTC", zoneId) === None)
+      val today = instantToMicros(LocalDateTime.now(zoneId)
+        .`with`(LocalTime.MIDNIGHT)
+        .atZone(zoneId).toInstant)
+      toTimestamp(" Yesterday", zoneId).get should be (today - MICROS_PER_DAY +- tolerance)
+      toTimestamp("Today ", zoneId).get should be (today +- tolerance)
+      toTimestamp(" tomorrow CET ", zoneId).get should be (today + MICROS_PER_DAY +- tolerance)
+    }
+  }
+
+  test("special date values") {
+    DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
+      assert(toDate("epoch", zoneId).get === 0)
+      val today = localDateToDays(LocalDate.now(zoneId))
+      assert(toDate("YESTERDAY", zoneId).get === today - 1)
+      assert(toDate(" Now ", zoneId).get === today)
+      assert(toDate("now UTC", zoneId) === None) // "now" does not accept time zones
+      assert(toDate("today", zoneId).get === today)
+      assert(toDate("tomorrow CET ", zoneId).get === today + 1)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
index 0b9e023b0b450..41adf845a6fab 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeArraySuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
-import java.time.ZoneId
+import java.time.{ZoneId, ZoneOffset}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
@@ -38,8 +38,8 @@ class UnsafeArraySuite extends SparkFunSuite {
   val doubleArray = Array(1.1, 2.2, 3.3)
   val stringArray = Array("1", "10", "100")
   val dateArray = Array(
-    DateTimeUtils.stringToDate(UTF8String.fromString("1970-1-1")).get,
-    DateTimeUtils.stringToDate(UTF8String.fromString("2016-7-26")).get)
+    DateTimeUtils.stringToDate(UTF8String.fromString("1970-1-1"), ZoneOffset.UTC).get,
+    DateTimeUtils.stringToDate(UTF8String.fromString("2016-7-26"), ZoneOffset.UTC).get)
   private def defaultZoneId = ZoneId.systemDefault()
   val timestampArray = Array(
     DateTimeUtils.stringToTimestamp(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
index 0dea1e3a68dc8..414f9d5834868 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTable.scala
@@ -22,12 +22,12 @@ import java.util
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalog.v2.expressions.{IdentityTransform, Transform}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.expressions.{IdentityTransform, Transform}
+import org.apache.spark.sql.connector.read._
+import org.apache.spark.sql.connector.write._
 import org.apache.spark.sql.sources.{And, EqualTo, Filter, IsNotNull}
-import org.apache.spark.sql.sources.v2.{SupportsDelete, SupportsRead, SupportsWrite, Table, TableCapability}
-import org.apache.spark.sql.sources.v2.reader.{Batch, InputPartition, PartitionReader, PartitionReaderFactory, Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -41,8 +41,11 @@ class InMemoryTable(
     override val properties: util.Map[String, String])
   extends Table with SupportsRead with SupportsWrite with SupportsDelete {
 
+  private val allowUnsupportedTransforms =
+    properties.getOrDefault("allow-unsupported-transforms", "false").toBoolean
+
   partitioning.foreach { t =>
-    if (!t.isInstanceOf[IdentityTransform]) {
+    if (!t.isInstanceOf[IdentityTransform] && !allowUnsupportedTransforms) {
       throw new IllegalArgumentException(s"Transform $t must be IdentityTransform")
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala
index cff09f7550385..8724a38d08d1f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/InMemoryTableCatalog.scala
@@ -22,19 +22,14 @@ import java.util.concurrent.ConcurrentHashMap
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.catalog.v2.{Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class InMemoryTableCatalog extends TableCatalog with SupportsNamespaces {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
-
-  protected val namespaces: util.Map[List[String], Map[String, String]] =
-    new ConcurrentHashMap[List[String], Map[String, String]]()
+class BasicInMemoryTableCatalog extends TableCatalog {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   protected val tables: util.Map[Identifier, InMemoryTable] =
     new ConcurrentHashMap[Identifier, InMemoryTable]()
@@ -112,6 +107,13 @@ class InMemoryTableCatalog extends TableCatalog with SupportsNamespaces {
   def clearTables(): Unit = {
     tables.clear()
   }
+}
+
+class InMemoryTableCatalog extends BasicInMemoryTableCatalog with SupportsNamespaces {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  protected val namespaces: util.Map[List[String], Map[String, String]] =
+    new ConcurrentHashMap[List[String], Map[String, String]]()
 
   private def allNamespaces: Seq[Seq[String]] = {
     (tables.keySet.asScala.map(_.namespace.toSeq) ++ namespaces.keySet.asScala).toSeq.distinct
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/StagingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/StagingInMemoryTableCatalog.scala
index e1b0e6136de95..513ea67b1fe95 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/StagingInMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/StagingInMemoryTableCatalog.scala
@@ -19,18 +19,17 @@ package org.apache.spark.sql.connector
 
 import java.util
 
-import org.apache.spark.sql.catalog.v2.{Identifier, StagingTableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.sources.v2.{StagedTable, SupportsRead, SupportsWrite, TableCapability}
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class StagingInMemoryTableCatalog extends InMemoryTableCatalog with StagingTableCatalog {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
   import InMemoryTableCatalog._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   override def stageCreate(
       ident: Identifier,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogManagerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
similarity index 51%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogManagerSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
index f7f190136bfce..fcbf139fd97c1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogManagerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogManagerSuite.scala
@@ -15,55 +15,100 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalyst.catalog
+package org.apache.spark.sql.connector.catalog
 
+import java.net.URI
 import java.util
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalog.v2.{CatalogManager, NamespaceChange, SupportsNamespaces}
+import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, NoSuchNamespaceException}
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class CatalogManagerSuite extends SparkFunSuite {
 
+  private def createSessionCatalog(conf: SQLConf): SessionCatalog = {
+    val catalog = new InMemoryCatalog()
+    catalog.createDatabase(
+      CatalogDatabase(SessionCatalog.DEFAULT_DATABASE, "", new URI("fake"), Map.empty),
+      ignoreIfExists = true)
+    new SessionCatalog(catalog, EmptyFunctionRegistry, conf)
+  }
+
   test("CatalogManager should reflect the changes of default catalog") {
     val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf)
-    assert(catalogManager.currentCatalog.isEmpty)
+    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
+    assert(catalogManager.currentCatalog.name() == CatalogManager.SESSION_CATALOG_NAME)
     assert(catalogManager.currentNamespace.sameElements(Array("default")))
 
     conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
     conf.setConfString(SQLConf.DEFAULT_V2_CATALOG.key, "dummy")
 
     // The current catalog should be changed if the default catalog is set.
-    assert(catalogManager.currentCatalog == Some("dummy"))
+    assert(catalogManager.currentCatalog.name() == "dummy")
     assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
   }
 
   test("CatalogManager should keep the current catalog once set") {
     val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf)
-    assert(catalogManager.currentCatalog.isEmpty)
+    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
+    assert(catalogManager.currentCatalog.name() == CatalogManager.SESSION_CATALOG_NAME)
     conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
     catalogManager.setCurrentCatalog("dummy")
-    assert(catalogManager.currentCatalog == Some("dummy"))
+    assert(catalogManager.currentCatalog.name() == "dummy")
     assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
 
     conf.setConfString("spark.sql.catalog.dummy2", classOf[DummyCatalog].getName)
     conf.setConfString(SQLConf.DEFAULT_V2_CATALOG.key, "dummy2")
     // The current catalog shouldn't be changed if it's set before.
-    assert(catalogManager.currentCatalog == Some("dummy"))
+    assert(catalogManager.currentCatalog.name() == "dummy")
   }
 
   test("current namespace should be updated when switching current catalog") {
     val conf = new SQLConf
-    val catalogManager = new CatalogManager(conf)
-    catalogManager.setCurrentNamespace(Array("abc"))
-    assert(catalogManager.currentNamespace.sameElements(Array("abc")))
-
+    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, createSessionCatalog(conf))
     conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
     catalogManager.setCurrentCatalog("dummy")
     assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+    catalogManager.setCurrentNamespace(Array("a"))
+    assert(catalogManager.currentNamespace.sameElements(Array("a")))
+
+    // If we set current catalog to the same catalog, current namespace should stay the same.
+    catalogManager.setCurrentCatalog("dummy")
+    assert(catalogManager.currentNamespace.sameElements(Array("a")))
+
+    // If we switch to a different catalog, current namespace should be reset.
+    conf.setConfString("spark.sql.catalog.dummy2", classOf[DummyCatalog].getName)
+    catalogManager.setCurrentCatalog("dummy2")
+    assert(catalogManager.currentNamespace.sameElements(Array("a", "b")))
+  }
+
+  test("set current namespace") {
+    val conf = new SQLConf
+    val v1SessionCatalog = createSessionCatalog(conf)
+    v1SessionCatalog.createDatabase(
+      CatalogDatabase(
+        "test", "", v1SessionCatalog.getDefaultDBPath("test"), Map.empty),
+      ignoreIfExists = false)
+    val catalogManager = new CatalogManager(conf, FakeV2SessionCatalog, v1SessionCatalog)
+
+    // If the current catalog is session catalog, setting current namespace actually sets
+    // `SessionCatalog.currentDb`.
+    catalogManager.setCurrentNamespace(Array("test"))
+    assert(catalogManager.currentNamespace.sameElements(Array("test")))
+    assert(v1SessionCatalog.getCurrentDatabase == "test")
+
+    intercept[NoSuchNamespaceException] {
+      catalogManager.setCurrentNamespace(Array("ns1", "ns2"))
+    }
+
+    // when switching current catalog, `SessionCatalog.currentDb` should be reset.
+    conf.setConfString("spark.sql.catalog.dummy", classOf[DummyCatalog].getName)
+    catalogManager.setCurrentCatalog("dummy")
+    assert(v1SessionCatalog.getCurrentDatabase == "default")
+    catalogManager.setCurrentNamespace(Array("test2"))
+    assert(v1SessionCatalog.getCurrentDatabase == "default")
   }
 }
 
@@ -89,6 +134,9 @@ class DummyCatalog extends SupportsNamespaces {
   override def dropNamespace(namespace: Array[String]): Boolean = {
     throw new UnsupportedOperationException
   }
-  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {}
-  override def name(): String = "dummy"
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
+    _name = name
+  }
+  private var _name: String = null
+  override def name(): String = _name
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/v2/LookupCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/LookupCatalogSuite.scala
similarity index 94%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/v2/LookupCatalogSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/LookupCatalogSuite.scala
index 229ed22b507a3..187551dd90db7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/v2/LookupCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/LookupCatalogSuite.scala
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.catalyst.catalog.v2
+package org.apache.spark.sql.connector.catalog
 
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, when}
@@ -23,12 +23,11 @@ import org.scalatest.Inside
 import org.scalatest.Matchers._
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalog.v2.{CatalogManager, CatalogNotFoundException, CatalogPlugin, Identifier, LookupCatalog}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-private case class TestCatalogPlugin(override val name: String) extends CatalogPlugin {
+private case class DummyCatalogPlugin(override val name: String) extends CatalogPlugin {
 
   override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = Unit
 }
@@ -36,7 +35,7 @@ private case class TestCatalogPlugin(override val name: String) extends CatalogP
 class LookupCatalogSuite extends SparkFunSuite with LookupCatalog with Inside {
   import CatalystSqlParser._
 
-  private val catalogs = Seq("prod", "test").map(x => x -> TestCatalogPlugin(x)).toMap
+  private val catalogs = Seq("prod", "test").map(x => x -> DummyCatalogPlugin(x)).toMap
 
   override val catalogManager: CatalogManager = {
     val manager = mock(classOf[CatalogManager])
@@ -128,7 +127,7 @@ class LookupCatalogSuite extends SparkFunSuite with LookupCatalog with Inside {
 class LookupCatalogWithDefaultSuite extends SparkFunSuite with LookupCatalog with Inside {
   import CatalystSqlParser._
 
-  private val catalogs = Seq("prod", "test").map(x => x -> TestCatalogPlugin(x)).toMap
+  private val catalogs = Seq("prod", "test").map(x => x -> DummyCatalogPlugin(x)).toMap
 
   override val catalogManager: CatalogManager = {
     val manager = mock(classOf[CatalogManager])
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TableCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
similarity index 99%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TableCatalogSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
index 3904790a5e5c9..265a940d66a65 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/TableCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/TableCatalogSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2
+package org.apache.spark.sql.connector.catalog
 
 import java.util
 import java.util.Collections
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/expressions/TransformExtractorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala
similarity index 98%
rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/expressions/TransformExtractorSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala
index c0a5dada19dba..fbd6a886d011b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalog/v2/expressions/TransformExtractorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.catalog.v2.expressions
+package org.apache.spark.sql.connector.expressions
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
index 1f0eff2e5b114..291d40a9e84d3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/DateFormatterSuite.scala
@@ -17,18 +17,19 @@
 
 package org.apache.spark.sql.util
 
-import java.time.LocalDate
+import java.time.{LocalDate, ZoneOffset}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, localDateToDays}
 import org.apache.spark.sql.internal.SQLConf
 
 class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   test("parsing dates") {
     DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-        val formatter = DateFormatter()
+        val formatter = DateFormatter(getZoneId(timeZone))
         val daysSinceEpoch = formatter.parse("2018-12-02")
         assert(daysSinceEpoch === 17867)
       }
@@ -38,7 +39,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   test("format dates") {
     DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
       withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-        val formatter = DateFormatter()
+        val formatter = DateFormatter(getZoneId(timeZone))
         val date = formatter.format(17867)
         assert(date === "2018-12-02")
       }
@@ -58,7 +59,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
       "5010-11-17").foreach { date =>
       DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
         withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-          val formatter = DateFormatter()
+          val formatter = DateFormatter(getZoneId(timeZone))
           val days = formatter.parse(date)
           val formatted = formatter.format(days)
           assert(date === formatted)
@@ -81,7 +82,7 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
       1110657).foreach { days =>
       DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
         withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
-          val formatter = DateFormatter()
+          val formatter = DateFormatter(getZoneId(timeZone))
           val date = formatter.format(days)
           val parsed = formatter.parse(date)
           assert(days === parsed)
@@ -91,13 +92,29 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("parsing date without explicit day") {
-    val formatter = DateFormatter("yyyy MMM")
+    val formatter = DateFormatter("yyyy MMM", ZoneOffset.UTC)
     val daysSinceEpoch = formatter.parse("2018 Dec")
     assert(daysSinceEpoch === LocalDate.of(2018, 12, 1).toEpochDay)
   }
 
   test("formatting negative years with default pattern") {
     val epochDays = LocalDate.of(-99, 1, 1).toEpochDay.toInt
-    assert(DateFormatter().format(epochDays) === "-0099-01-01")
+    assert(DateFormatter(ZoneOffset.UTC).format(epochDays) === "-0099-01-01")
+  }
+
+  test("special date values") {
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+        val zoneId = getZoneId(timeZone)
+        val formatter = DateFormatter(zoneId)
+
+        assert(formatter.parse("EPOCH") === 0)
+        val today = localDateToDays(LocalDate.now(zoneId))
+        assert(formatter.parse("Yesterday") === today - 1)
+        assert(formatter.parse("now") === today)
+        assert(formatter.parse("today ") === today)
+        assert(formatter.parse("tomorrow UTC") === today + 1)
+      }
+    }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
index c223639a47294..170daa6277c49 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/TimestampFormatterSuite.scala
@@ -17,14 +17,18 @@
 
 package org.apache.spark.sql.util
 
-import java.time.{LocalDateTime, ZoneId, ZoneOffset}
+import java.time.{LocalDateTime, LocalTime, ZoneOffset}
 import java.util.concurrent.TimeUnit
 
+import org.scalatest.Matchers
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, instantToMicros, MICROS_PER_DAY}
+import org.apache.spark.sql.internal.SQLConf
 
-class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
+class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers {
 
   test("parsing timestamps using time zones") {
     val localDate = "2018-12-02T10:11:12.001234"
@@ -131,4 +135,24 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper {
     val micros = DateTimeUtils.instantToMicros(instant)
     assert(TimestampFormatter(ZoneOffset.UTC).format(micros) === "-0099-01-01 00:00:00")
   }
+
+  test("special timestamp values") {
+    DateTimeTestUtils.outstandingTimezonesIds.foreach { timeZone =>
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> timeZone) {
+        val zoneId = getZoneId(timeZone)
+        val formatter = TimestampFormatter(zoneId)
+        val tolerance = TimeUnit.SECONDS.toMicros(30)
+
+        assert(formatter.parse("EPOCH") === 0)
+        val now = instantToMicros(LocalDateTime.now(zoneId).atZone(zoneId).toInstant)
+        formatter.parse("now") should be (now +- tolerance)
+        val today = instantToMicros(LocalDateTime.now(zoneId)
+          .`with`(LocalTime.MIDNIGHT)
+          .atZone(zoneId).toInstant)
+        formatter.parse("yesterday CET") should be (today - MICROS_PER_DAY +- tolerance)
+        formatter.parse(" TODAY ") should be (today +- tolerance)
+        formatter.parse("Tomorrow ") should be (today + MICROS_PER_DAY +- tolerance)
+      }
+    }
+  }
 }
diff --git a/sql/core/benchmarks/AggregateBenchmark-jdk11-results.txt b/sql/core/benchmarks/AggregateBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..e71d7ab0f3b6f
--- /dev/null
+++ b/sql/core/benchmarks/AggregateBenchmark-jdk11-results.txt
@@ -0,0 +1,143 @@
+================================================================================================
+aggregate without grouping
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+agg w/o group wholestage off                      55644          59484         NaN         37.7          26.5       1.0X
+agg w/o group wholestage on                         896            906           8       2340.7           0.4      62.1X
+
+
+================================================================================================
+stat functions
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+stddev wholestage off                              8655           9022         519         12.1          82.5       1.0X
+stddev wholestage on                               1306           1323          13         80.3          12.5       6.6X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+kurtosis wholestage off                           40795          41041         349          2.6         389.0       1.0X
+kurtosis wholestage on                             1441           1468          22         72.8          13.7      28.3X
+
+
+================================================================================================
+aggregate with linear keys
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                       10559          10956         562          7.9         125.9       1.0X
+codegen = T hashmap = F                            6533           6567          34         12.8          77.9       1.6X
+codegen = T hashmap = T                            1362           1377          22         61.6          16.2       7.8X
+
+
+================================================================================================
+aggregate with randomized keys
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                       12631          12634           3          6.6         150.6       1.0X
+codegen = T hashmap = F                            8434           8478          44          9.9         100.5       1.5X
+codegen = T hashmap = T                            2484           2598         117         33.8          29.6       5.1X
+
+
+================================================================================================
+aggregate with string key
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        4173           4204          44          5.0         199.0       1.0X
+codegen = T hashmap = F                            2664           2721          81          7.9         127.0       1.6X
+codegen = T hashmap = T                            1178           1219          59         17.8          56.2       3.5X
+
+
+================================================================================================
+aggregate with decimal key
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        3740           3747           9          5.6         178.3       1.0X
+codegen = T hashmap = F                            2398           2528         184          8.7         114.3       1.6X
+codegen = T hashmap = T                             638            644           7         32.9          30.4       5.9X
+
+
+================================================================================================
+aggregate with multiple key types
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        6874           6896          31          3.1         327.8       1.0X
+codegen = T hashmap = F                            3866           3886          28          5.4         184.3       1.8X
+codegen = T hashmap = T                            2619           2641          31          8.0         124.9       2.6X
+
+
+================================================================================================
+max function bytecode size of wholestagecodegen
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                         793            806          19          0.8        1209.7       1.0X
+codegen = T hugeMethodLimit = 10000                 401            456          61          1.6         611.2       2.0X
+codegen = T hugeMethodLimit = 1500                  694            715          19          0.9        1059.3       1.1X
+
+
+================================================================================================
+cube
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cube wholestage off                                3616           3630          19          1.4         689.8       1.0X
+cube wholestage on                                 1819           1866          41          2.9         347.0       2.0X
+
+
+================================================================================================
+hash and BytesToBytesMap
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeRowhash                                       321            321           1         65.4          15.3       1.0X
+murmur3 hash                                        145            145           0        144.5           6.9       2.2X
+fast hash                                            70             71           1        298.9           3.3       4.6X
+arrayEqual                                          198            199           2        105.7           9.5       1.6X
+Java HashMap (Long)                                 132            136           3        158.6           6.3       2.4X
+Java HashMap (two ints)                             152            156           4        138.3           7.2       2.1X
+Java HashMap (UnsafeRow)                            819            839          17         25.6          39.1       0.4X
+LongToUnsafeRowMap (opt=false)                      459            472          25         45.7          21.9       0.7X
+LongToUnsafeRowMap (opt=true)                       107            108           1        195.5           5.1       3.0X
+BytesToBytesMap (off Heap)                         1012           1019          10         20.7          48.3       0.3X
+BytesToBytesMap (on Heap)                           963            974          18         21.8          45.9       0.3X
+Aggregate HashMap                                    41             43           2        515.1           1.9       7.9X
+
+
diff --git a/sql/core/benchmarks/AggregateBenchmark-results.txt b/sql/core/benchmarks/AggregateBenchmark-results.txt
index 19e524777692e..ea8f113143a0f 100644
--- a/sql/core/benchmarks/AggregateBenchmark-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-results.txt
@@ -2,142 +2,142 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-agg w/o group:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                65374 / 70665         32.1          31.2       1.0X
-agg w/o group wholestage on                   1178 / 1209       1779.8           0.6      55.5X
+agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+agg w/o group wholestage off                      50499          52524        2863         41.5          24.1       1.0X
+agg w/o group wholestage on                        1163           1205          56       1803.1           0.6      43.4X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-stddev:                                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-stddev wholestage off                         8667 / 8851         12.1          82.7       1.0X
-stddev wholestage on                          1266 / 1273         82.8          12.1       6.8X
+stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+stddev wholestage off                             13393          13403          14          7.8         127.7       1.0X
+stddev wholestage on                               1291           1315          25         81.2          12.3      10.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-kurtosis:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-kurtosis wholestage off                     41218 / 41231          2.5         393.1       1.0X
-kurtosis wholestage on                        1347 / 1357         77.8          12.8      30.6X
+kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+kurtosis wholestage off                           40131          40250         168          2.6         382.7       1.0X
+kurtosis wholestage on                             1435           1452          20         73.1          13.7      28.0X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Aggregate w keys:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                   9309 / 9389          9.0         111.0       1.0X
-codegen = T hashmap = F                       4417 / 4435         19.0          52.7       2.1X
-codegen = T hashmap = T                       1289 / 1298         65.1          15.4       7.2X
+Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                       10008          10008           1          8.4         119.3       1.0X
+codegen = T hashmap = F                            5803           6158         370         14.5          69.2       1.7X
+codegen = T hashmap = T                            1332           1348          12         63.0          15.9       7.5X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Aggregate w keys:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                 11424 / 11426          7.3         136.2       1.0X
-codegen = T hashmap = F                       6441 / 6496         13.0          76.8       1.8X
-codegen = T hashmap = T                       2333 / 2344         36.0          27.8       4.9X
+Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                       11853          11988         191          7.1         141.3       1.0X
+codegen = T hashmap = F                            7469           7531          57         11.2          89.0       1.6X
+codegen = T hashmap = T                            2412           2436          24         34.8          28.8       4.9X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Aggregate w string key:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                   4751 / 4890          4.4         226.5       1.0X
-codegen = T hashmap = F                       3146 / 3182          6.7         150.0       1.5X
-codegen = T hashmap = T                       2211 / 2261          9.5         105.4       2.1X
+Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        4880           4908          40          4.3         232.7       1.0X
+codegen = T hashmap = F                            3483           3505          26          6.0         166.1       1.4X
+codegen = T hashmap = T                            2362           2425          39          8.9         112.6       2.1X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Aggregate w decimal key:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                   3029 / 3062          6.9         144.4       1.0X
-codegen = T hashmap = F                       1534 / 1569         13.7          73.2       2.0X
-codegen = T hashmap = T                        575 /  578         36.5          27.4       5.3X
+Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        3475           3516          58          6.0         165.7       1.0X
+codegen = T hashmap = F                            1939           1958          27         10.8          92.5       1.8X
+codegen = T hashmap = T                             599            609           9         35.0          28.6       5.8X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Aggregate w multiple keys:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                   7506 / 7521          2.8         357.9       1.0X
-codegen = T hashmap = F                       4791 / 4808          4.4         228.5       1.6X
-codegen = T hashmap = T                       3553 / 3585          5.9         169.4       2.1X
+Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                        7649           7675          37          2.7         364.7       1.0X
+codegen = T hashmap = F                            4729           4759          42          4.4         225.5       1.6X
+codegen = T hashmap = T                            3917           3929          16          5.4         186.8       2.0X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-max function bytecode size:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-codegen = F                                    608 /  656          1.1         927.1       1.0X
-codegen = T hugeMethodLimit = 10000            402 /  419          1.6         613.5       1.5X
-codegen = T hugeMethodLimit = 1500             616 /  619          1.1         939.9       1.0X
+max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+codegen = F                                         628            661          25          1.0         958.2       1.0X
+codegen = T hugeMethodLimit = 10000                 366            385          21          1.8         558.9       1.7X
+codegen = T hugeMethodLimit = 1500                  620            648          24          1.1         946.7       1.0X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-cube:                                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-cube wholestage off                           3229 / 3237          1.6         615.9       1.0X
-cube wholestage on                            1285 / 1306          4.1         245.2       2.5X
+cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cube wholestage off                                3225           3273          68          1.6         615.1       1.0X
+cube wholestage on                                 1636           1680          56          3.2         312.0       2.0X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-BytesToBytesMap:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-UnsafeRowhash                                  328 /  330         64.0          15.6       1.0X
-murmur3 hash                                   167 /  167        125.4           8.0       2.0X
-fast hash                                       84 /   85        249.0           4.0       3.9X
-arrayEqual                                     192 /  192        109.3           9.1       1.7X
-Java HashMap (Long)                            144 /  147        145.9           6.9       2.3X
-Java HashMap (two ints)                        147 /  153        142.3           7.0       2.2X
-Java HashMap (UnsafeRow)                       785 /  788         26.7          37.4       0.4X
-LongToUnsafeRowMap (opt=false)                 456 /  457         46.0          21.8       0.7X
-LongToUnsafeRowMap (opt=true)                  125 /  125        168.3           5.9       2.6X
-BytesToBytesMap (off Heap)                     885 /  885         23.7          42.2       0.4X
-BytesToBytesMap (on Heap)                      860 /  864         24.4          41.0       0.4X
-Aggregate HashMap                               56 /   56        373.9           2.7       5.8X
+BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeRowhash                                       331            332           0         63.3          15.8       1.0X
+murmur3 hash                                        148            148           0        141.4           7.1       2.2X
+fast hash                                            75             76           6        280.6           3.6       4.4X
+arrayEqual                                          176            176           0        119.0           8.4       1.9X
+Java HashMap (Long)                                 140            144           6        149.7           6.7       2.4X
+Java HashMap (two ints)                             153            157           5        137.2           7.3       2.2X
+Java HashMap (UnsafeRow)                            845            852           6         24.8          40.3       0.4X
+LongToUnsafeRowMap (opt=false)                      463            482          26         45.3          22.1       0.7X
+LongToUnsafeRowMap (opt=true)                       118            120           6        178.1           5.6       2.8X
+BytesToBytesMap (off Heap)                          935            937           2         22.4          44.6       0.4X
+BytesToBytesMap (on Heap)                           897            901           5         23.4          42.8       0.4X
+Aggregate HashMap                                    57             58           0        369.9           2.7       5.8X
 
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..9cd7ad16ac2bb
--- /dev/null
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
@@ -0,0 +1,24 @@
+================================================================================================
+ORC Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter                              17367          17786         592          5.8         173.7       1.0X
+With bloom filter                                 20556          20596          57          4.9         205.6       0.8X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter                               2204           2226          31         45.4          22.0       1.0X
+With bloom filter                                  1415           1465          71         70.7          14.2       1.6X
+
+
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
index 2eeb26c899b42..b4e3e843798e6 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
@@ -2,23 +2,23 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Write 100M rows:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Without bloom filter                        16765 / 17587          6.0         167.7       1.0X
-With bloom filter                           20060 / 20626          5.0         200.6       0.8X
+Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter                              19489          19693         289          5.1         194.9       1.0X
+With bloom filter                                 23045          23148         145          4.3         230.5       0.8X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Read a row from 100M rows:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Without bloom filter                          1857 / 1904         53.9          18.6       1.0X
-With bloom filter                             1399 / 1437         71.5          14.0       1.3X
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter                               2038           2084          65         49.1          20.4       1.0X
+With bloom filter                                  1465           1475          15         68.3          14.6       1.4X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..bc13f6e9b9424
--- /dev/null
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt
@@ -0,0 +1,60 @@
+================================================================================================
+Parquet writer benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet writer benchmark:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2552           2690         195          6.2         162.2       1.0X
+Output Single Double Column                        2865           2892          38          5.5         182.2       0.9X
+Output Int and String Column                       7876           7885          12          2.0         500.7       0.3X
+Output Partitions                                  5079           5871        1120          3.1         322.9       0.5X
+Output Buckets                                     6980           6994          20          2.3         443.8       0.4X
+
+
+================================================================================================
+ORC writer benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           1799           1902         146          8.7         114.4       1.0X
+Output Single Double Column                        2268           2276          11          6.9         144.2       0.8X
+Output Int and String Column                       6650           6670          28          2.4         422.8       0.3X
+Output Partitions                                  4697           4719          31          3.3         298.6       0.4X
+Output Buckets                                     6394           6436          60          2.5         406.5       0.3X
+
+
+================================================================================================
+JSON writer benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2778           3522        1052          5.7         176.6       1.0X
+Output Single Double Column                        4222           4269          67          3.7         268.4       0.7X
+Output Int and String Column                      10822          10845          33          1.5         688.0       0.3X
+Output Partitions                                  5450           5523         104          2.9         346.5       0.5X
+Output Buckets                                    10827          11622        1123          1.5         688.4       0.3X
+
+
+================================================================================================
+CSV writer benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           3649           3698          68          4.3         232.0       1.0X
+Output Single Double Column                        4612           4696         120          3.4         293.2       0.8X
+Output Int and String Column                       7334           7517         258          2.1         466.3       0.5X
+Output Partitions                                  6386           6541         220          2.5         406.0       0.6X
+Output Buckets                                     8692           9439        1057          1.8         552.6       0.4X
+
+
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
index 9d656fc10dce4..1380a6158954a 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
@@ -2,59 +2,59 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet writer benchmark:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Output Single Int Column                      2354 / 2438          6.7         149.7       1.0X
-Output Single Double Column                   2462 / 2485          6.4         156.5       1.0X
-Output Int and String Column                  8083 / 8100          1.9         513.9       0.3X
-Output Partitions                             5015 / 5027          3.1         318.8       0.5X
-Output Buckets                                6883 / 6887          2.3         437.6       0.3X
+Parquet writer benchmark:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2537           2594          81          6.2         161.3       1.0X
+Output Single Double Column                        2615           2645          42          6.0         166.3       1.0X
+Output Int and String Column                       8931           8933           4          1.8         567.8       0.3X
+Output Partitions                                  5011           5321         438          3.1         318.6       0.5X
+Output Buckets                                     6789           6793           6          2.3         431.6       0.4X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-ORC writer benchmark:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Output Single Int Column                      1769 / 1789          8.9         112.4       1.0X
-Output Single Double Column                   1989 / 2009          7.9         126.5       0.9X
-Output Int and String Column                  7323 / 7400          2.1         465.6       0.2X
-Output Partitions                             4374 / 4381          3.6         278.1       0.4X
-Output Buckets                                6086 / 6104          2.6         386.9       0.3X
+ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           1780           1937         222          8.8         113.2       1.0X
+Output Single Double Column                        2248           2265          23          7.0         142.9       0.8X
+Output Int and String Column                       7996           8031          49          2.0         508.4       0.2X
+Output Partitions                                  4827           4939         159          3.3         306.9       0.4X
+Output Buckets                                     6521           6595         104          2.4         414.6       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-JSON writer benchmark:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Output Single Int Column                      2954 / 4085          5.3         187.8       1.0X
-Output Single Double Column                   3832 / 3837          4.1         243.6       0.8X
-Output Int and String Column                 9591 / 10336          1.6         609.8       0.3X
-Output Partitions                             4956 / 4994          3.2         315.1       0.6X
-Output Buckets                                6608 / 6676          2.4         420.1       0.4X
+JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2791           2823          45          5.6         177.5       1.0X
+Output Single Double Column                        4078           5089        1431          3.9         259.3       0.7X
+Output Int and String Column                      10249          10296          66          1.5         651.6       0.3X
+Output Partitions                                  5247           5296          69          3.0         333.6       0.5X
+Output Buckets                                     8168           9714        2187          1.9         519.3       0.3X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-CSV writer benchmark:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Output Single Int Column                      4118 / 4125          3.8         261.8       1.0X
-Output Single Double Column                   4888 / 4891          3.2         310.8       0.8X
-Output Int and String Column                  9788 / 9872          1.6         622.3       0.4X
-Output Partitions                             6578 / 6640          2.4         418.2       0.6X
-Output Buckets                                9125 / 9171          1.7         580.2       0.5X
+CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           4055           4200         205          3.9         257.8       1.0X
+Output Single Double Column                        4978           5010          45          3.2         316.5       0.8X
+Output Int and String Column                       9285           9294          13          1.7         590.3       0.4X
+Output Partitions                                  6681           6738          80          2.4         424.8       0.6X
+Output Buckets                                     9495          10052         787          1.7         603.7       0.4X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt b/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..b65b236fd71f2
--- /dev/null
+++ b/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
@@ -0,0 +1,59 @@
+================================================================================================
+Benchmark to measure CSV read/write performance
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+One quoted string                                 56894          57106         184          0.0     1137889.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 1000 columns                              220825         222234        2018          0.0      220825.5       1.0X
+Select 100 columns                                50507          50723         278          0.0       50506.6       4.4X
+Select one column                                 38629          38642          16          0.0       38628.6       5.7X
+count()                                            8549           8597          51          0.1        8549.2      25.8X
+Select 100 columns, one bad input field           68309          68474         182          0.0       68309.2       3.2X
+Select 100 columns, corrupt record field          74551          74701         136          0.0       74551.5       3.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 10 columns + count()                       27745          28050         276          0.4        2774.5       1.0X
+Select 1 column + count()                         19989          20315         319          0.5        1998.9       1.4X
+count()                                            6091           6109          25          1.6         609.1       4.6X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Create a dataset of timestamps                     2235           2301          59          4.5         223.5       1.0X
+to_csv(timestamp)                                 16033          16205         153          0.6        1603.3       0.1X
+write timestamps to files                         13556          13685         167          0.7        1355.6       0.2X
+Create a dataset of dates                          2262           2290          44          4.4         226.2       1.0X
+to_csv(date)                                      11122          11160          33          0.9        1112.2       0.2X
+write dates to files                               8436           8486          76          1.2         843.6       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                     2617           2644          26          3.8         261.7       1.0X
+read timestamps from files                        53245          53381         149          0.2        5324.5       0.0X
+infer timestamps from files                      103797         104026         257          0.1       10379.7       0.0X
+read date text from files                          2371           2378           7          4.2         237.1       1.1X
+read date from files                              41808          41929         177          0.2        4180.8       0.1X
+infer date from files                             35069          35336         458          0.3        3506.9       0.1X
+timestamp strings                                  3104           3127          21          3.2         310.4       0.8X
+parse timestamps from Dataset[String]             61888          62132         342          0.2        6188.8       0.0X
+infer timestamps from Dataset[String]            112494         114609        1949          0.1       11249.4       0.0X
+date strings                                       3558           3603          41          2.8         355.8       0.7X
+parse dates from Dataset[String]                  45871          46000         120          0.2        4587.1       0.1X
+from_csv(timestamp)                               56975          57035          53          0.2        5697.5       0.0X
+from_csv(date)                                    43711          43795          74          0.2        4371.1       0.1X
+
+
diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt b/sql/core/benchmarks/CSVBenchmark-results.txt
index 888c2ce9f2845..d2037e86a3a71 100644
--- a/sql/core/benchmarks/CSVBenchmark-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-results.txt
@@ -2,58 +2,58 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 36998          37134         120          0.0      739953.1       1.0X
+One quoted string                                 62603          62755         133          0.0     1252055.6       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                              140620         141162         737          0.0      140620.5       1.0X
-Select 100 columns                                35170          35287         183          0.0       35170.0       4.0X
-Select one column                                 27711          27927         187          0.0       27710.9       5.1X
-count()                                            7707           7804          84          0.1        7707.4      18.2X
-Select 100 columns, one bad input field           41762          41851         117          0.0       41761.8       3.4X
-Select 100 columns, corrupt record field          48717          48761          44          0.0       48717.4       2.9X
+Select 1000 columns                              225032         225919         782          0.0      225031.7       1.0X
+Select 100 columns                                51982          52290         286          0.0       51982.1       4.3X
+Select one column                                 40167          40283         133          0.0       40167.4       5.6X
+count()                                           11435          11593         176          0.1       11435.1      19.7X
+Select 100 columns, one bad input field           66864          66968         174          0.0       66864.1       3.4X
+Select 100 columns, corrupt record field          79570          80418        1080          0.0       79569.5       2.8X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       16001          16053          53          0.6        1600.1       1.0X
-Select 1 column + count()                         11571          11614          58          0.9        1157.1       1.4X
-count()                                            4752           4766          18          2.1         475.2       3.4X
+Select 10 columns + count()                       23271          23389         103          0.4        2327.1       1.0X
+Select 1 column + count()                         18206          19772         NaN          0.5        1820.6       1.3X
+count()                                            8500           8521          18          1.2         850.0       2.7X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1070           1072           2          9.3         107.0       1.0X
-to_csv(timestamp)                                 10446          10746         344          1.0        1044.6       0.1X
-write timestamps to files                          9573           9659         101          1.0         957.3       0.1X
-Create a dataset of dates                          1245           1260          17          8.0         124.5       0.9X
-to_csv(date)                                       7157           7167          11          1.4         715.7       0.1X
-write dates to files                               5415           5450          57          1.8         541.5       0.2X
+Create a dataset of timestamps                     2025           2068          66          4.9         202.5       1.0X
+to_csv(timestamp)                                 22192          22983         879          0.5        2219.2       0.1X
+write timestamps to files                         15949          16030          72          0.6        1594.9       0.1X
+Create a dataset of dates                          2200           2234          32          4.5         220.0       0.9X
+to_csv(date)                                      18268          18341          73          0.5        1826.8       0.1X
+write dates to files                              10495          10722         214          1.0        1049.5       0.2X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     1880           1887           8          5.3         188.0       1.0X
-read timestamps from files                        27135          27180          43          0.4        2713.5       0.1X
-infer timestamps from files                       51426          51534          97          0.2        5142.6       0.0X
-read date text from files                          1618           1622           4          6.2         161.8       1.2X
-read date from files                              20207          20218          13          0.5        2020.7       0.1X
-infer date from files                             19418          19479          94          0.5        1941.8       0.1X
-timestamp strings                                  2289           2300          13          4.4         228.9       0.8X
-parse timestamps from Dataset[String]             29367          29391          24          0.3        2936.7       0.1X
-infer timestamps from Dataset[String]             54782          54902         126          0.2        5478.2       0.0X
-date strings                                       2508           2524          16          4.0         250.8       0.7X
-parse dates from Dataset[String]                  21884          21902          19          0.5        2188.4       0.1X
-from_csv(timestamp)                               27188          27723         477          0.4        2718.8       0.1X
-from_csv(date)                                    21137          21191          84          0.5        2113.7       0.1X
+read timestamp text from files                     6491           6503          18          1.5         649.1       1.0X
+read timestamps from files                        56069          56795         874          0.2        5606.9       0.1X
+infer timestamps from files                      113383         114203         825          0.1       11338.3       0.1X
+read date text from files                          6411           6419          10          1.6         641.1       1.0X
+read date from files                              46245          46371         138          0.2        4624.5       0.1X
+infer date from files                             43623          43906         291          0.2        4362.3       0.1X
+timestamp strings                                  4951           4959           7          2.0         495.1       1.3X
+parse timestamps from Dataset[String]             65786          66309         663          0.2        6578.6       0.1X
+infer timestamps from Dataset[String]            130891         133861        1928          0.1       13089.1       0.0X
+date strings                                       3814           3895          84          2.6         381.4       1.7X
+parse dates from Dataset[String]                  52259          52960         614          0.2        5225.9       0.1X
+from_csv(timestamp)                               63013          63306         291          0.2        6301.3       0.1X
+from_csv(date)                                    49840          52352         NaN          0.2        4984.0       0.1X
 
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk11-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..d5dcdab6b942a
--- /dev/null
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk11-results.txt
@@ -0,0 +1,59 @@
+================================================================================================
+Int Read/Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Java Array                                          233            234           0       1406.0           0.7       1.0X
+ByteBuffer Unsafe                                   396            397           0        826.5           1.2       0.6X
+ByteBuffer API                                      603            605           3        543.0           1.8       0.4X
+DirectByteBuffer                                   1189           1196           9        275.6           3.6       0.2X
+Unsafe Buffer                                       321            324           3       1022.0           1.0       0.7X
+Column(on heap)                                     235            236           2       1393.1           0.7       1.0X
+Column(off heap)                                    524            526           1        624.8           1.6       0.4X
+Column(off heap direct)                             317            318           1       1034.1           1.0       0.7X
+UnsafeRow (on heap)                                 690            693           3        474.8           2.1       0.3X
+UnsafeRow (off heap)                                642            646           3        510.1           2.0       0.4X
+Column On Heap Append                               522            527           4        628.0           1.6       0.4X
+
+
+================================================================================================
+Boolean Read/Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Bitset                                              778            780           3        431.1           2.3       1.0X
+Byte Array                                          355            356           1        944.9           1.1       2.2X
+
+
+================================================================================================
+String Read/Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+On Heap                                             447            453           5         36.6          27.3       1.0X
+Off Heap                                            652            658           4         25.1          39.8       0.7X
+
+
+================================================================================================
+Array Vector Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+On Heap Read Size Only                              101            102           0       1616.4           0.6       1.0X
+Off Heap Read Size Only                             305            307           2        536.8           1.9       0.3X
+On Heap Read Elements                              4255           4256           1         38.5          26.0       0.0X
+Off Heap Read Elements                             6097           6104          10         26.9          37.2       0.0X
+
+
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
index 59637162f0a1d..8ae21d7a7e653 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Int Read/Write:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Java Array                                     244 /  244       1342.3           0.7       1.0X
-ByteBuffer Unsafe                              445 /  445        736.5           1.4       0.5X
-ByteBuffer API                                2124 / 2125        154.3           6.5       0.1X
-DirectByteBuffer                               750 /  750        437.2           2.3       0.3X
-Unsafe Buffer                                  234 /  236       1401.3           0.7       1.0X
-Column(on heap)                                245 /  245       1335.6           0.7       1.0X
-Column(off heap)                               489 /  489        670.3           1.5       0.5X
-Column(off heap direct)                        236 /  236       1388.1           0.7       1.0X
-UnsafeRow (on heap)                            532 /  534        616.0           1.6       0.5X
-UnsafeRow (off heap)                           564 /  565        580.7           1.7       0.4X
-Column On Heap Append                          489 /  489        670.6           1.5       0.5X
+Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Java Array                                          246            246           0       1333.2           0.8       1.0X
+ByteBuffer Unsafe                                   446            447           1        734.6           1.4       0.6X
+ByteBuffer API                                     2141           2144           4        153.0           6.5       0.1X
+DirectByteBuffer                                    745            745           0        439.9           2.3       0.3X
+Unsafe Buffer                                       237            240           1       1379.7           0.7       1.0X
+Column(on heap)                                     247            247           0       1327.3           0.8       1.0X
+Column(off heap)                                    487            488           1        673.1           1.5       0.5X
+Column(off heap direct)                             242            243           1       1354.1           0.7       1.0X
+UnsafeRow (on heap)                                 541            543           1        605.4           1.7       0.5X
+UnsafeRow (off heap)                                581            583           2        563.7           1.8       0.4X
+Column On Heap Append                               520            521           1        629.8           1.6       0.5X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Boolean Read/Write:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Bitset                                         879 /  879        381.9           2.6       1.0X
-Byte Array                                     794 /  794        422.6           2.4       1.1X
+Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Bitset                                              883            883           1        380.1           2.6       1.0X
+Byte Array                                          775            777           2        432.9           2.3       1.1X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String Read/Write:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-On Heap                                        449 /  449         36.5          27.4       1.0X
-Off Heap                                       679 /  679         24.1          41.4       0.7X
+String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+On Heap                                             463            470          10         35.4          28.3       1.0X
+Off Heap                                            676            677           2         24.2          41.2       0.7X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Array Vector Read:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-On Heap Read Size Only                         713 /  713        229.8           4.4       1.0X
-Off Heap Read Size Only                        757 /  757        216.5           4.6       0.9X
-On Heap Read Elements                         3648 / 3650         44.9          22.3       0.2X
-Off Heap Read Elements                        5263 / 5265         31.1          32.1       0.1X
+Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+On Heap Read Size Only                              422            422           0        388.7           2.6       1.0X
+Off Heap Read Size Only                             259            259           0        633.2           1.6       1.6X
+On Heap Read Elements                              3932           3960          39         41.7          24.0       0.1X
+Off Heap Read Elements                             5143           5173          43         31.9          31.4       0.1X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..4fd57a9e95560
--- /dev/null
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt
@@ -0,0 +1,137 @@
+================================================================================================
+Compression Scheme Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    3              3           0      21087.3           0.0       1.0X
+RunLengthEncoding(2.514)                            739            739           1         90.8          11.0       0.0X
+BooleanBitSet(0.125)                                378            379           1        177.4           5.6       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                         147            147           1        456.1           2.2       1.0X
+RunLengthEncoding                                   731            732           1         91.8          10.9       0.2X
+BooleanBitSet                                      1410           1411           1         47.6          21.0       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    7              7           0       9729.9           0.1       1.0X
+RunLengthEncoding(1.491)                           1576           1576           1         42.6          23.5       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1151           1152           1         58.3          17.2       1.0X
+RunLengthEncoding                                  1619           1621           3         41.4          24.1       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    7              7           0      10135.7           0.1       1.0X
+RunLengthEncoding(2.010)                           1659           1660           0         40.4          24.7       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1151           1151           1         58.3          17.2       1.0X
+RunLengthEncoding                                  1655           1655           0         40.5          24.7       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   23             23           0       2952.8           0.3       1.0X
+RunLengthEncoding(0.997)                           2356           2356           0         28.5          35.1       0.0X
+DictionaryEncoding(0.500)                          1402           1402           0         47.9          20.9       0.0X
+IntDelta(0.250)                                     213            213           0        315.2           3.2       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1319           1319           1         50.9          19.7       1.0X
+RunLengthEncoding                                  1803           1806           5         37.2          26.9       0.7X
+DictionaryEncoding                                  931            931           0         72.1          13.9       1.4X
+IntDelta                                            817            821           4         82.2          12.2       1.6X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   23             23           0       2976.8           0.3       1.0X
+RunLengthEncoding(1.337)                           2552           2552           1         26.3          38.0       0.0X
+DictionaryEncoding(0.501)                          1377           1377           0         48.7          20.5       0.0X
+IntDelta(0.250)                                     213            214           2        315.3           3.2       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1438           1439           1         46.7          21.4       1.0X
+RunLengthEncoding                                  1987           1988           1         33.8          29.6       0.7X
+DictionaryEncoding                                 1249           1250           0         53.7          18.6       1.2X
+IntDelta                                           1135           1136           3         59.2          16.9       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   45             45           0       1487.9           0.7       1.0X
+RunLengthEncoding(0.750)                           2496           2496           1         26.9          37.2       0.0X
+DictionaryEncoding(0.250)                          1433           1433           1         46.8          21.4       0.0X
+LongDelta(0.125)                                    215            215           0        312.6           3.2       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1468           1469           1         45.7          21.9       1.0X
+RunLengthEncoding                                  1977           1981           6         33.9          29.5       0.7X
+DictionaryEncoding                                 1248           1250           3         53.8          18.6       1.2X
+LongDelta                                           838            840           2         80.1          12.5       1.8X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   47             47           0       1437.2           0.7       1.0X
+RunLengthEncoding(1.002)                           2743           2744           0         24.5          40.9       0.0X
+DictionaryEncoding(0.251)                          2016           2016           0         33.3          30.0       0.0X
+LongDelta(0.125)                                    215            217           5        312.1           3.2       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1468           1468           0         45.7          21.9       1.0X
+RunLengthEncoding                                  2020           2021           2         33.2          30.1       0.7X
+DictionaryEncoding                                 1248           1248           0         53.8          18.6       1.2X
+LongDelta                                          1131           1134           4         59.4          16.8       1.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   71             71           0        939.6           1.1       1.0X
+RunLengthEncoding(0.890)                           6050           6052           2         11.1          90.2       0.0X
+DictionaryEncoding(0.167)                          3723           3725           2         18.0          55.5       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        2804           2815          16         23.9          41.8       1.0X
+RunLengthEncoding                                  3390           3391           1         19.8          50.5       0.8X
+DictionaryEncoding                                 2901           2905           5         23.1          43.2       1.0X
+
+
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
index caa9378301f5d..3f6fbe35a7b86 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-BOOLEAN Encode:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                               4 /    4      17998.9           0.1       1.0X
-RunLengthEncoding(2.501)                       680 /  680         98.7          10.1       0.0X
-BooleanBitSet(0.125)                           365 /  365        183.9           5.4       0.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    3              3           0      21114.6           0.0       1.0X
+RunLengthEncoding(2.505)                            694            696           4         96.7          10.3       0.0X
+BooleanBitSet(0.125)                                366            366           0        183.4           5.5       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-BOOLEAN Decode:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                    144 /  144        466.5           2.1       1.0X
-RunLengthEncoding                              679 /  679         98.9          10.1       0.2X
-BooleanBitSet                                 1425 / 1431         47.1          21.2       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                         145            145           0        464.2           2.2       1.0X
+RunLengthEncoding                                   735            735           0         91.3          10.9       0.2X
+BooleanBitSet                                      1437           1437           1         46.7          21.4       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SHORT Encode (Lower Skew):               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                               7 /    7      10115.0           0.1       1.0X
-RunLengthEncoding(1.494)                      1671 / 1672         40.2          24.9       0.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SHORT Decode (Lower Skew):               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1128 / 1128         59.5          16.8       1.0X
-RunLengthEncoding                             1630 / 1633         41.2          24.3       0.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SHORT Encode (Higher Skew):              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                               7 /    7      10164.2           0.1       1.0X
-RunLengthEncoding(1.989)                      1562 / 1563         43.0          23.3       0.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SHORT Decode (Higher Skew):              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1127 / 1127         59.6          16.8       1.0X
-RunLengthEncoding                             1629 / 1631         41.2          24.3       0.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-INT Encode (Lower Skew):                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                              22 /   23       2983.2           0.3       1.0X
-RunLengthEncoding(1.003)                      2426 / 2427         27.7          36.1       0.0X
-DictionaryEncoding(0.500)                      958 /  958         70.1          14.3       0.0X
-IntDelta(0.250)                                286 /  286        235.0           4.3       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-INT Decode (Lower Skew):                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1268 / 1269         52.9          18.9       1.0X
-RunLengthEncoding                             1906 / 1911         35.2          28.4       0.7X
-DictionaryEncoding                             981 /  982         68.4          14.6       1.3X
-IntDelta                                       812 /  817         82.6          12.1       1.6X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-INT Encode (Higher Skew):                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                              23 /   23       2926.9           0.3       1.0X
-RunLengthEncoding(1.326)                      2614 / 2614         25.7          38.9       0.0X
-DictionaryEncoding(0.501)                     1024 / 1024         65.5          15.3       0.0X
-IntDelta(0.250)                                286 /  286        234.7           4.3       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-INT Decode (Higher Skew):                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1433 / 1433         46.8          21.4       1.0X
-RunLengthEncoding                             1923 / 1926         34.9          28.6       0.7X
-DictionaryEncoding                            1285 / 1285         52.2          19.2       1.1X
-IntDelta                                      1129 / 1137         59.4          16.8       1.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-LONG Encode (Lower Skew):                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                              45 /   45       1495.6           0.7       1.0X
-RunLengthEncoding(0.738)                      2662 / 2663         25.2          39.7       0.0X
-DictionaryEncoding(0.250)                     1269 / 1269         52.9          18.9       0.0X
-LongDelta(0.125)                               450 /  450        149.1           6.7       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-LONG Decode (Lower Skew):                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1483 / 1483         45.3          22.1       1.0X
-RunLengthEncoding                             1875 / 1875         35.8          27.9       0.8X
-DictionaryEncoding                            1213 / 1214         55.3          18.1       1.2X
-LongDelta                                      816 /  817         82.2          12.2       1.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-LONG Encode (Higher Skew):               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                              45 /   45       1489.3           0.7       1.0X
-RunLengthEncoding(1.003)                      2906 / 2906         23.1          43.3       0.0X
-DictionaryEncoding(0.251)                     1610 / 1610         41.7          24.0       0.0X
-LongDelta(0.125)                               451 /  451        148.7           6.7       0.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-LONG Decode (Higher Skew):               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   1485 / 1485         45.2          22.1       1.0X
-RunLengthEncoding                             1889 / 1890         35.5          28.2       0.8X
-DictionaryEncoding                            1215 / 1216         55.2          18.1       1.2X
-LongDelta                                     1107 / 1110         60.6          16.5       1.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-STRING Encode:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough(1.000)                              67 /   68        994.5           1.0       1.0X
-RunLengthEncoding(0.894)                      5877 / 5882         11.4          87.6       0.0X
-DictionaryEncoding(0.167)                     3597 / 3602         18.7          53.6       0.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-STRING Decode:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-PassThrough                                   3243 / 3244         20.7          48.3       1.0X
-RunLengthEncoding                             3598 / 3601         18.7          53.6       0.9X
-DictionaryEncoding                            3182 / 3182         21.1          47.4       1.0X
+SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    7              7           0       9336.6           0.1       1.0X
+RunLengthEncoding(1.494)                           1912           1917           7         35.1          28.5       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1104           1104           0         60.8          16.4       1.0X
+RunLengthEncoding                                  1627           1628           0         41.2          24.3       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                    7              7           0       9710.6           0.1       1.0X
+RunLengthEncoding(2.003)                           2021           2027           9         33.2          30.1       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1104           1104           0         60.8          16.5       1.0X
+RunLengthEncoding                                  1621           1621           0         41.4          24.1       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   24             24           0       2854.3           0.4       1.0X
+RunLengthEncoding(1.005)                           2395           2396           2         28.0          35.7       0.0X
+DictionaryEncoding(0.500)                          1366           1366           0         49.1          20.3       0.0X
+IntDelta(0.250)                                     286            287           0        234.2           4.3       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1248           1248           0         53.8          18.6       1.0X
+RunLengthEncoding                                  1738           1739           2         38.6          25.9       0.7X
+DictionaryEncoding                                  969            970           0         69.2          14.4       1.3X
+IntDelta                                            777            779           1         86.3          11.6       1.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   23             23           0       2874.4           0.3       1.0X
+RunLengthEncoding(1.334)                           2581           2581           0         26.0          38.5       0.0X
+DictionaryEncoding(0.501)                          1490           1490           0         45.0          22.2       0.0X
+IntDelta(0.250)                                     286            286           0        234.5           4.3       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1389           1389           0         48.3          20.7       1.0X
+RunLengthEncoding                                  1903           1903           0         35.3          28.4       0.7X
+DictionaryEncoding                                 1231           1232           1         54.5          18.3       1.1X
+IntDelta                                           1103           1108           7         60.8          16.4       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   48             48           0       1405.2           0.7       1.0X
+RunLengthEncoding(0.757)                           2525           2525           1         26.6          37.6       0.0X
+DictionaryEncoding(0.250)                          1380           1381           1         48.6          20.6       0.0X
+LongDelta(0.125)                                    474            474           0        141.7           7.1       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1348           1349           0         49.8          20.1       1.0X
+RunLengthEncoding                                  1850           1851           2         36.3          27.6       0.7X
+DictionaryEncoding                                 1190           1192           3         56.4          17.7       1.1X
+LongDelta                                           801            801           0         83.8          11.9       1.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   46             46           0       1451.2           0.7       1.0X
+RunLengthEncoding(1.003)                           2742           2743           1         24.5          40.9       0.0X
+DictionaryEncoding(0.251)                          1714           1715           0         39.1          25.5       0.0X
+LongDelta(0.125)                                    476            476           0        140.9           7.1       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        1362           1363           1         49.3          20.3       1.0X
+RunLengthEncoding                                  1862           1863           1         36.0          27.7       0.7X
+DictionaryEncoding                                 1190           1192           3         56.4          17.7       1.1X
+LongDelta                                          1079           1082           4         62.2          16.1       1.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough(1.000)                                   67             67           0        994.8           1.0       1.0X
+RunLengthEncoding(0.888)                           6135           6137           2         10.9          91.4       0.0X
+DictionaryEncoding(0.167)                          3747           3748           0         17.9          55.8       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+PassThrough                                        3180           3185           8         21.1          47.4       1.0X
+RunLengthEncoding                                  3658           3660           3         18.3          54.5       0.9X
+DictionaryEncoding                                 3292           3295           4         20.4          49.1       1.0X
 
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..55cb301dba174
--- /dev/null
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt
@@ -0,0 +1,252 @@
+================================================================================================
+SQL Single Numeric Column Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           27115          27169          76          0.6        1723.9       1.0X
+SQL Json                                           9061           9124          89          1.7         576.1       3.0X
+SQL Parquet Vectorized                              196            232          39         80.4          12.4     138.5X
+SQL Parquet MR                                     2187           2216          40          7.2         139.1      12.4X
+SQL ORC Vectorized                                  335            344           5         46.9          21.3      80.9X
+SQL ORC MR                                         1757           1786          42          9.0         111.7      15.4X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single TINYINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            201            205           5         78.3          12.8       1.0X
+ParquetReader Vectorized -> Row                      91             92           1        173.2           5.8       2.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           27969          27972           4          0.6        1778.2       1.0X
+SQL Json                                          10328          10389          87          1.5         656.6       2.7X
+SQL Parquet Vectorized                              217            237          24         72.5          13.8     128.8X
+SQL Parquet MR                                     2494           2567         103          6.3         158.6      11.2X
+SQL ORC Vectorized                                  310            321          10         50.8          19.7      90.3X
+SQL ORC MR                                         1901           1907           9          8.3         120.9      14.7X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            272            280          10         57.8          17.3       1.0X
+ParquetReader Vectorized -> Row                     144            185          68        109.3           9.1       1.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           29507          29532          34          0.5        1876.0       1.0X
+SQL Json                                          10463          10474          16          1.5         665.2       2.8X
+SQL Parquet Vectorized                              193            204          10         81.3          12.3     152.6X
+SQL Parquet MR                                     2948           2954           7          5.3         187.5      10.0X
+SQL ORC Vectorized                                  268            277           9         58.7          17.0     110.1X
+SQL ORC MR                                         1910           1950          57          8.2         121.4      15.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single INT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            263            278          38         59.7          16.7       1.0X
+ParquetReader Vectorized -> Row                     259            266           9         60.7          16.5       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           36696          36771         106          0.4        2333.0       1.0X
+SQL Json                                          13496          13520          34          1.2         858.0       2.7X
+SQL Parquet Vectorized                              282            292           9         55.7          17.9     130.0X
+SQL Parquet MR                                     3358           3383          36          4.7         213.5      10.9X
+SQL ORC Vectorized                                  409            414           5         38.5          26.0      89.7X
+SQL ORC MR                                         2250           2275          35          7.0         143.1      16.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single BIGINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            360            372          15         43.6          22.9       1.0X
+ParquetReader Vectorized -> Row                     354            357           5         44.4          22.5       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           30462          30466           5          0.5        1936.7       1.0X
+SQL Json                                          12916          12948          45          1.2         821.2       2.4X
+SQL Parquet Vectorized                              181            185           5         86.7          11.5     168.0X
+SQL Parquet MR                                     2810           2820          14          5.6         178.7      10.8X
+SQL ORC Vectorized                                  426            430           4         36.9          27.1      71.6X
+SQL ORC MR                                         2106           2112           9          7.5         133.9      14.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single FLOAT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            255            261           7         61.6          16.2       1.0X
+ParquetReader Vectorized -> Row                     285            288           5         55.1          18.1       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           36950          36979          41          0.4        2349.2       1.0X
+SQL Json                                          18794          18795           2          0.8        1194.9       2.0X
+SQL Parquet Vectorized                              279            295          17         56.3          17.8     132.3X
+SQL Parquet MR                                     3933           4025         130          4.0         250.0       9.4X
+SQL ORC Vectorized                                  521            527           6         30.2          33.2      70.9X
+SQL ORC MR                                         2290           2326          51          6.9         145.6      16.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single DOUBLE Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            356            365          12         44.2          22.6       1.0X
+ParquetReader Vectorized -> Row                     350            352           2         45.0          22.2       1.0X
+
+
+================================================================================================
+Int and String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           26764          26810          65          0.4        2552.4       1.0X
+SQL Json                                          12107          12195         124          0.9        1154.6       2.2X
+SQL Parquet Vectorized                             2202           2210          10          4.8         210.0      12.2X
+SQL Parquet MR                                     5297           5302           6          2.0         505.2       5.1X
+SQL ORC Vectorized                                 2356           2372          23          4.5         224.7      11.4X
+SQL ORC MR                                         4370           4419          70          2.4         416.8       6.1X
+
+
+================================================================================================
+Repeated String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           19953          19966          18          0.5        1902.8       1.0X
+SQL Json                                           7151           7220          98          1.5         681.9       2.8X
+SQL Parquet Vectorized                              692            695           3         15.1          66.0      28.8X
+SQL Parquet MR                                     2859           2943         118          3.7         272.6       7.0X
+SQL ORC Vectorized                                  535            540           5         19.6          51.0      37.3X
+SQL ORC MR                                         2157           2162           8          4.9         205.7       9.3X
+
+
+================================================================================================
+Partitioned Table Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column - CSV                                 46775          46785          13          0.3        2973.9       1.0X
+Data column - Json                                13891          13893           2          1.1         883.2       3.4X
+Data column - Parquet Vectorized                    301            306           7         52.3          19.1     155.6X
+Data column - Parquet MR                           3565           3572          10          4.4         226.7      13.1X
+Data column - ORC Vectorized                        434            458          36         36.2          27.6     107.7X
+Data column - ORC MR                               2337           2354          24          6.7         148.6      20.0X
+Partition column - CSV                            10645          10688          61          1.5         676.8       4.4X
+Partition column - Json                           10912          10973          87          1.4         693.7       4.3X
+Partition column - Parquet Vectorized                93            103           9        169.4           5.9     503.8X
+Partition column - Parquet MR                      1588           1597          13          9.9         100.9      29.5X
+Partition column - ORC Vectorized                    92             99          11        170.7           5.9     507.6X
+Partition column - ORC MR                          1714           1716           3          9.2         109.0      27.3X
+Both columns - CSV                                46199          46222          32          0.3        2937.3       1.0X
+Both columns - Json                               17279          17291          18          0.9        1098.6       2.7X
+Both columns - Parquet Vectorized                   346            355          13         45.4          22.0     135.0X
+Both columns - Parquet MR                          3883           3908          35          4.1         246.9      12.0X
+Both columns - ORC Vectorized                       577            618          57         27.3          36.7      81.1X
+Both columns - ORC MR                              2967           3024          80          5.3         188.7      15.8X
+
+
+================================================================================================
+String with Nulls Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           23623          23731         153          0.4        2252.9       1.0X
+SQL Json                                          13299          13432         187          0.8        1268.3       1.8X
+SQL Parquet Vectorized                             1464           1466           4          7.2         139.6      16.1X
+SQL Parquet MR                                     7602           7628          37          1.4         724.9       3.1X
+ParquetReader Vectorized                           1032           1043          15         10.2          98.4      22.9X
+SQL ORC Vectorized                                 1206           1211           7          8.7         115.0      19.6X
+SQL ORC MR                                         4726           4991         374          2.2         450.7       5.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           23715          24152         619          0.4        2261.6       1.0X
+SQL Json                                          10120          10280         226          1.0         965.1       2.3X
+SQL Parquet Vectorized                             1063           1072          13          9.9         101.4      22.3X
+SQL Parquet MR                                     5460           5464           5          1.9         520.8       4.3X
+ParquetReader Vectorized                            934            936           4         11.2          89.0      25.4X
+SQL ORC Vectorized                                 1094           1094           0          9.6         104.3      21.7X
+SQL ORC MR                                         3964           4401         618          2.6         378.0       6.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           21348          21472         175          0.5        2035.9       1.0X
+SQL Json                                           5877           5956         112          1.8         560.5       3.6X
+SQL Parquet Vectorized                              244            256          22         43.0          23.2      87.6X
+SQL Parquet MR                                     3139           3371         328          3.3         299.4       6.8X
+ParquetReader Vectorized                            238            245           9         44.1          22.7      89.7X
+SQL ORC Vectorized                                  378            383           7         27.7          36.0      56.5X
+SQL ORC MR                                         2234           2315         115          4.7         213.0       9.6X
+
+
+================================================================================================
+Single Column Scan From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                            4053           4064          16          0.3        3865.4       1.0X
+SQL Json                                           4115           4118           4          0.3        3924.6       1.0X
+SQL Parquet Vectorized                               72             82          11         14.5          69.0      56.0X
+SQL Parquet MR                                      314            325          18          3.3         299.3      12.9X
+SQL ORC Vectorized                                   80             86           8         13.1          76.2      50.7X
+SQL ORC MR                                          250            253           2          4.2         238.5      16.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                            7802           7849          66          0.1        7440.8       1.0X
+SQL Json                                          16640          17481        1190          0.1       15868.8       0.5X
+SQL Parquet Vectorized                              106            126          31          9.9         101.0      73.7X
+SQL Parquet MR                                      349            358           7          3.0         332.8      22.4X
+SQL ORC Vectorized                                  108            115          10          9.7         102.7      72.5X
+SQL ORC MR                                          284            298          20          3.7         270.5      27.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           12639          12672          47          0.1       12053.5       1.0X
+SQL Json                                          30613          30688         106          0.0       29194.8       0.4X
+SQL Parquet Vectorized                              145            165          21          7.2         138.3      87.2X
+SQL Parquet MR                                      384            393           9          2.7         366.4      32.9X
+SQL ORC Vectorized                                  129            134           5          8.1         123.2      97.8X
+SQL ORC MR                                          280            319          66          3.7         266.9      45.2X
+
+
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
index f547f61654b5f..4e8bb019fba43 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
@@ -2,251 +2,251 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single TINYINT Column Scan:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     26366 / 26562          0.6        1676.3       1.0X
-SQL Json                                      8709 / 8724          1.8         553.7       3.0X
-SQL Parquet Vectorized                         166 /  187         94.8          10.5     159.0X
-SQL Parquet MR                                1706 / 1720          9.2         108.4      15.5X
-SQL ORC Vectorized                             167 /  174         94.2          10.6     157.9X
-SQL ORC MR                                    1433 / 1465         11.0          91.1      18.4X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single TINYINT Column Scan: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       200 /  207         78.7          12.7       1.0X
-ParquetReader Vectorized -> Row                117 /  119        134.7           7.4       1.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single SMALLINT Column Scan:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     26489 / 26547          0.6        1684.1       1.0X
-SQL Json                                      8990 / 8998          1.7         571.5       2.9X
-SQL Parquet Vectorized                         209 /  221         75.1          13.3     126.5X
-SQL Parquet MR                                1949 / 1949          8.1         123.9      13.6X
-SQL ORC Vectorized                             221 /  228         71.3          14.0     120.1X
-SQL ORC MR                                    1527 / 1549         10.3          97.1      17.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single SMALLINT Column Scan: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       286 /  296         54.9          18.2       1.0X
-ParquetReader Vectorized -> Row                249 /  253         63.1          15.8       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single INT Column Scan:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     27701 / 27744          0.6        1761.2       1.0X
-SQL Json                                      9703 / 9733          1.6         616.9       2.9X
-SQL Parquet Vectorized                         176 /  182         89.2          11.2     157.0X
-SQL Parquet MR                                2164 / 2173          7.3         137.6      12.8X
-SQL ORC Vectorized                             307 /  314         51.2          19.5      90.2X
-SQL ORC MR                                    1690 / 1700          9.3         107.4      16.4X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single INT Column Scan:   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       259 /  277         60.7          16.5       1.0X
-ParquetReader Vectorized -> Row                261 /  265         60.3          16.6       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single BIGINT Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     34813 / 34900          0.5        2213.3       1.0X
-SQL Json                                    12570 / 12617          1.3         799.2       2.8X
-SQL Parquet Vectorized                         270 /  308         58.2          17.2     128.9X
-SQL Parquet MR                                2427 / 2431          6.5         154.3      14.3X
-SQL ORC Vectorized                             388 /  398         40.6          24.6      89.8X
-SQL ORC MR                                    1819 / 1851          8.6         115.7      19.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single BIGINT Column Scan: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       372 /  379         42.3          23.7       1.0X
-ParquetReader Vectorized -> Row                357 /  368         44.1          22.7       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single FLOAT Column Scan:            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     28753 / 28781          0.5        1828.0       1.0X
-SQL Json                                    12039 / 12215          1.3         765.4       2.4X
-SQL Parquet Vectorized                         170 /  177         92.4          10.8     169.0X
-SQL Parquet MR                                2184 / 2196          7.2         138.9      13.2X
-SQL ORC Vectorized                             432 /  440         36.4          27.5      66.5X
-SQL ORC MR                                    1812 / 1833          8.7         115.2      15.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single FLOAT Column Scan: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       253 /  260         62.2          16.1       1.0X
-ParquetReader Vectorized -> Row                256 /  257         61.6          16.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single DOUBLE Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     36177 / 36188          0.4        2300.1       1.0X
-SQL Json                                    18895 / 18898          0.8        1201.3       1.9X
-SQL Parquet Vectorized                         267 /  276         58.9          17.0     135.6X
-SQL Parquet MR                                2355 / 2363          6.7         149.7      15.4X
-SQL ORC Vectorized                             543 /  546         29.0          34.5      66.6X
-SQL ORC MR                                    2246 / 2258          7.0         142.8      16.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Parquet Reader Single DOUBLE Column Scan: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ParquetReader Vectorized                       353 /  367         44.6          22.4       1.0X
-ParquetReader Vectorized -> Row                351 /  357         44.7          22.3       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           23037          23172         191          0.7        1464.7       1.0X
+SQL Json                                           8682           8686           5          1.8         552.0       2.7X
+SQL Parquet Vectorized                              183            205          32         85.9          11.6     125.8X
+SQL Parquet MR                                     2189           2200          15          7.2         139.2      10.5X
+SQL ORC Vectorized                                  296            306           5         53.1          18.8      77.7X
+SQL ORC MR                                         1705           1717          18          9.2         108.4      13.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single TINYINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            195            200           7         80.9          12.4       1.0X
+ParquetReader Vectorized -> Row                      96             97           2        163.0           6.1       2.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           25126          25265         196          0.6        1597.5       1.0X
+SQL Json                                           9442           9445           4          1.7         600.3       2.7X
+SQL Parquet Vectorized                              228            240           7         69.1          14.5     110.4X
+SQL Parquet MR                                     2432           2445          19          6.5         154.6      10.3X
+SQL ORC Vectorized                                  315            319           6         49.9          20.0      79.8X
+SQL ORC MR                                         1901           1916          21          8.3         120.9      13.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            293            302           9         53.6          18.7       1.0X
+ParquetReader Vectorized -> Row                     264            266           2         59.7          16.8       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           27419          27443          34          0.6        1743.3       1.0X
+SQL Json                                           9831           9836           8          1.6         625.0       2.8X
+SQL Parquet Vectorized                              192            198           9         81.8          12.2     142.7X
+SQL Parquet MR                                     2696           2740          62          5.8         171.4      10.2X
+SQL ORC Vectorized                                  329            335           8         47.9          20.9      83.4X
+SQL ORC MR                                         1932           2006         105          8.1         122.8      14.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single INT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            248            253           6         63.5          15.8       1.0X
+ParquetReader Vectorized -> Row                     250            256           7         62.9          15.9       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           34898          34907          14          0.5        2218.7       1.0X
+SQL Json                                          12760          12764           5          1.2         811.3       2.7X
+SQL Parquet Vectorized                              283            289           5         55.6          18.0     123.3X
+SQL Parquet MR                                     3238           3240           3          4.9         205.9      10.8X
+SQL ORC Vectorized                                  401            405           7         39.2          25.5      87.0X
+SQL ORC MR                                         2274           2290          23          6.9         144.6      15.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single BIGINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            339            351          16         46.5          21.5       1.0X
+ParquetReader Vectorized -> Row                     342            348          13         46.0          21.8       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           28872          28886          20          0.5        1835.6       1.0X
+SQL Json                                          13360          13377          24          1.2         849.4       2.2X
+SQL Parquet Vectorized                              181            185           6         86.8          11.5     159.3X
+SQL Parquet MR                                     2645           2651           8          5.9         168.2      10.9X
+SQL ORC Vectorized                                  456            459           5         34.5          29.0      63.4X
+SQL ORC MR                                         2047           2066          26          7.7         130.2      14.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single FLOAT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            240            246          10         65.5          15.3       1.0X
+ParquetReader Vectorized -> Row                     245            246           2         64.2          15.6       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           36298          36305          10          0.4        2307.7       1.0X
+SQL Json                                          18250          18276          36          0.9        1160.3       2.0X
+SQL Parquet Vectorized                              278            285           7         56.5          17.7     130.4X
+SQL Parquet MR                                     3144           3146           4          5.0         199.9      11.5X
+SQL ORC Vectorized                                  533            546          16         29.5          33.9      68.1X
+SQL ORC MR                                         2265           2302          53          6.9         144.0      16.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Parquet Reader Single DOUBLE Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ParquetReader Vectorized                            338            346          12         46.6          21.5       1.0X
+ParquetReader Vectorized -> Row                     338            344           9         46.5          21.5       1.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Int and String Scan:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     21130 / 21246          0.5        2015.1       1.0X
-SQL Json                                    12145 / 12174          0.9        1158.2       1.7X
-SQL Parquet Vectorized                        2363 / 2377          4.4         225.3       8.9X
-SQL Parquet MR                                4555 / 4557          2.3         434.4       4.6X
-SQL ORC Vectorized                            2361 / 2388          4.4         225.1       9.0X
-SQL ORC MR                                    4186 / 4209          2.5         399.2       5.0X
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           24839          25273         613          0.4        2368.9       1.0X
+SQL Json                                          11861          11869          11          0.9        1131.2       2.1X
+SQL Parquet Vectorized                             2298           2305           9          4.6         219.2      10.8X
+SQL Parquet MR                                     5045           5053          10          2.1         481.2       4.9X
+SQL ORC Vectorized                                 2391           2405          21          4.4         228.0      10.4X
+SQL ORC MR                                         4561           4645         118          2.3         435.0       5.4X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Repeated String:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     11693 / 11729          0.9        1115.1       1.0X
-SQL Json                                      7025 / 7025          1.5         669.9       1.7X
-SQL Parquet Vectorized                         803 /  821         13.1          76.6      14.6X
-SQL Parquet MR                                1776 / 1790          5.9         169.4       6.6X
-SQL ORC Vectorized                             491 /  494         21.4          46.8      23.8X
-SQL ORC MR                                    2050 / 2063          5.1         195.5       5.7X
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           14147          14244         137          0.7        1349.1       1.0X
+SQL Json                                           7289           7306          23          1.4         695.1       1.9X
+SQL Parquet Vectorized                              818            821           4         12.8          78.0      17.3X
+SQL Parquet MR                                     2562           2570          11          4.1         244.4       5.5X
+SQL ORC Vectorized                                  571            579           8         18.3          54.5      24.8X
+SQL ORC MR                                         2143           2164          31          4.9         204.3       6.6X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Partitioned Table:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Data column - CSV                           30965 / 31041          0.5        1968.7       1.0X
-Data column - Json                          12876 / 12882          1.2         818.6       2.4X
-Data column - Parquet Vectorized               277 /  282         56.7          17.6     111.6X
-Data column - Parquet MR                      3398 / 3402          4.6         216.0       9.1X
-Data column - ORC Vectorized                   399 /  407         39.4          25.4      77.5X
-Data column - ORC MR                          2583 / 2589          6.1         164.2      12.0X
-Partition column - CSV                        7403 / 7427          2.1         470.7       4.2X
-Partition column - Json                       5587 / 5625          2.8         355.2       5.5X
-Partition column - Parquet Vectorized           71 /   78        222.6           4.5     438.3X
-Partition column - Parquet MR                 1798 / 1808          8.7         114.3      17.2X
-Partition column - ORC Vectorized               72 /   75        219.0           4.6     431.2X
-Partition column - ORC MR                     1772 / 1778          8.9         112.6      17.5X
-Both columns - CSV                          30211 / 30212          0.5        1920.7       1.0X
-Both columns - Json                         13382 / 13391          1.2         850.8       2.3X
-Both columns - Parquet Vectorized              321 /  333         49.0          20.4      96.4X
-Both columns - Parquet MR                     3656 / 3661          4.3         232.4       8.5X
-Both columns - ORC Vectorized                  443 /  448         35.5          28.2      69.9X
-Both columns - ORC MR                         2626 / 2633          6.0         167.0      11.8X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column - CSV                                 38652          38680          40          0.4        2457.4       1.0X
+Data column - Json                                12756          12760           5          1.2         811.0       3.0X
+Data column - Parquet Vectorized                    304            314           9         51.7          19.3     127.2X
+Data column - Parquet MR                           3387           3393           9          4.6         215.3      11.4X
+Data column - ORC Vectorized                        425            436          10         37.0          27.0      91.0X
+Data column - ORC MR                               2303           2330          38          6.8         146.4      16.8X
+Partition column - CSV                            11239          11249          14          1.4         714.5       3.4X
+Partition column - Json                           10477          10479           3          1.5         666.1       3.7X
+Partition column - Parquet Vectorized                95            102           9        165.5           6.0     406.7X
+Partition column - Parquet MR                      1574           1575           1         10.0         100.1      24.6X
+Partition column - ORC Vectorized                    95            106          20        166.3           6.0     408.5X
+Partition column - ORC MR                          1682           1693          15          9.4         106.9      23.0X
+Both columns - CSV                                39146          39203          81          0.4        2488.8       1.0X
+Both columns - Json                               14675          14691          23          1.1         933.0       2.6X
+Both columns - Parquet Vectorized                   347            351           3         45.3          22.1     111.4X
+Both columns - Parquet MR                          3680           3717          52          4.3         234.0      10.5X
+Both columns - ORC Vectorized                       556            565           8         28.3          35.3      69.6X
+Both columns - ORC MR                              2909           2923          20          5.4         184.9      13.3X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (0.0%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     13918 / 13979          0.8        1327.3       1.0X
-SQL Json                                    10068 / 10068          1.0         960.1       1.4X
-SQL Parquet Vectorized                        1563 / 1564          6.7         149.0       8.9X
-SQL Parquet MR                                3835 / 3836          2.7         365.8       3.6X
-ParquetReader Vectorized                      1115 / 1118          9.4         106.4      12.5X
-SQL ORC Vectorized                            1172 / 1208          8.9         111.8      11.9X
-SQL ORC MR                                    3708 / 3711          2.8         353.6       3.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (50.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     13972 / 14043          0.8        1332.5       1.0X
-SQL Json                                      7436 / 7469          1.4         709.1       1.9X
-SQL Parquet Vectorized                        1103 / 1112          9.5         105.2      12.7X
-SQL Parquet MR                                2841 / 2847          3.7         271.0       4.9X
-ParquetReader Vectorized                       992 / 1012         10.6          94.6      14.1X
-SQL ORC Vectorized                            1275 / 1349          8.2         121.6      11.0X
-SQL ORC MR                                    3244 / 3259          3.2         309.3       4.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (95.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     11228 / 11244          0.9        1070.8       1.0X
-SQL Json                                      5200 / 5247          2.0         495.9       2.2X
-SQL Parquet Vectorized                         238 /  242         44.1          22.7      47.2X
-SQL Parquet MR                                1730 / 1734          6.1         165.0       6.5X
-ParquetReader Vectorized                       237 /  238         44.3          22.6      47.4X
-SQL ORC Vectorized                             459 /  462         22.8          43.8      24.4X
-SQL ORC MR                                    1767 / 1783          5.9         168.5       6.4X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           17457          17740         401          0.6        1664.9       1.0X
+SQL Json                                          12276          12287          16          0.9        1170.7       1.4X
+SQL Parquet Vectorized                             1525           1539          20          6.9         145.4      11.5X
+SQL Parquet MR                                     5051           5098          66          2.1         481.7       3.5X
+ParquetReader Vectorized                           1115           1123          12          9.4         106.3      15.7X
+SQL ORC Vectorized                                 1269           1294          37          8.3         121.0      13.8X
+SQL ORC MR                                         3938           3951          17          2.7         375.6       4.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           18086          18119          47          0.6        1724.8       1.0X
+SQL Json                                           8484           8851         520          1.2         809.1       2.1X
+SQL Parquet Vectorized                             1127           1131           5          9.3         107.5      16.0X
+SQL Parquet MR                                     4120           4131          15          2.5         392.9       4.4X
+ParquetReader Vectorized                            984           1019          49         10.7          93.9      18.4X
+SQL ORC Vectorized                                 1208           1211           4          8.7         115.2      15.0X
+SQL ORC MR                                         3401           3410          13          3.1         324.4       5.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           24825          24970         205          0.4        2367.5       1.0X
+SQL Json                                           9847           9857          14          1.1         939.1       2.5X
+SQL Parquet Vectorized                              258            261           6         40.7          24.6      96.3X
+SQL Parquet MR                                     3182           3242          85          3.3         303.4       7.8X
+ParquetReader Vectorized                            241            242           2         43.6          22.9     103.2X
+SQL ORC Vectorized                                  453            456           4         23.1          43.2      54.8X
+SQL ORC MR                                         1917           1927          13          5.5         182.8      12.9X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 10 columns:      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                       3322 / 3356          0.3        3167.9       1.0X
-SQL Json                                      2808 / 2843          0.4        2678.2       1.2X
-SQL Parquet Vectorized                          56 /   63         18.9          52.9      59.8X
-SQL Parquet MR                                 215 /  219          4.9         205.4      15.4X
-SQL ORC Vectorized                              64 /   76         16.4          60.9      52.0X
-SQL ORC MR                                     314 /  316          3.3         299.6      10.6X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 50 columns:      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                       7978 / 7989          0.1        7608.5       1.0X
-SQL Json                                    10294 / 10325          0.1        9816.9       0.8X
-SQL Parquet Vectorized                          72 /   85         14.5          69.0     110.3X
-SQL Parquet MR                                 237 /  241          4.4         226.4      33.6X
-SQL ORC Vectorized                              82 /   92         12.7          78.5      97.0X
-SQL ORC MR                                     900 /  909          1.2         858.5       8.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 100 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-SQL CSV                                     13489 / 13508          0.1       12864.3       1.0X
-SQL Json                                    18813 / 18827          0.1       17941.4       0.7X
-SQL Parquet Vectorized                         107 /  111          9.8         101.8     126.3X
-SQL Parquet MR                                 275 /  286          3.8         262.3      49.0X
-SQL ORC Vectorized                             107 /  115          9.8         101.7     126.4X
-SQL ORC MR                                    1659 / 1664          0.6        1582.3       8.1X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                            5163           5174          16          0.2        4923.5       1.0X
+SQL Json                                           4459           4538         111          0.2        4252.7       1.2X
+SQL Parquet Vectorized                               78             84           8         13.4          74.7      65.9X
+SQL Parquet MR                                      511            519           9          2.1         486.9      10.1X
+SQL ORC Vectorized                                   86             93          11         12.2          82.1      60.0X
+SQL ORC MR                                          350            359           7          3.0         333.6      14.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                            9839           9842           4          0.1        9383.4       1.0X
+SQL Json                                          15887          15889           4          0.1       15150.7       0.6X
+SQL Parquet Vectorized                              115            125          11          9.1         109.9      85.4X
+SQL Parquet MR                                      666            671           8          1.6         635.4      14.8X
+SQL ORC Vectorized                                  115            120           6          9.1         110.1      85.2X
+SQL ORC MR                                          455            458           3          2.3         433.7      21.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+SQL CSV                                           15858          15891          46          0.1       15123.5       1.0X
+SQL Json                                          30200          30256          80          0.0       28800.6       0.5X
+SQL Parquet Vectorized                              160            165           7          6.5         153.0      98.8X
+SQL Parquet MR                                      682            690           7          1.5         650.3      23.3X
+SQL ORC Vectorized                                  143            150          10          7.4         136.0     111.2X
+SQL ORC MR                                          494            509          15          2.1         471.4      32.1X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-jdk11-results.txt b/sql/core/benchmarks/DatasetBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..f07fae9121df1
--- /dev/null
+++ b/sql/core/benchmarks/DatasetBenchmark-jdk11-results.txt
@@ -0,0 +1,46 @@
+================================================================================================
+Dataset Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                               14574          14759         261          6.9         145.7       1.0X
+DataFrame                                          2468           2655         264         40.5          24.7       5.9X
+Dataset                                            3498           3533          50         28.6          35.0       4.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                               17877          18133         361          5.6         178.8       1.0X
+DataFrame                                          5968           5991          33         16.8          59.7       3.0X
+Dataset                                           12638          12859         313          7.9         126.4       1.4X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                                3399           3464          92         29.4          34.0       1.0X
+DataFrame                                          1609           1628          28         62.2          16.1       2.1X
+Dataset                                            3637           3648          16         27.5          36.4       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                                4850           4859          13         20.6          48.5       1.0X
+DataFrame                                           211            244          21        472.9           2.1      22.9X
+Dataset                                            5864           6126         372         17.1          58.6       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD sum                                            4821           4914         131         20.7          48.2       1.0X
+DataFrame sum                                        71             83           8       1412.4           0.7      68.1X
+Dataset sum using Aggregator                       6001           6012          16         16.7          60.0       0.8X
+Dataset complex Aggregator                        10247          10455         294          9.8         102.5       0.5X
+
+
diff --git a/sql/core/benchmarks/DatasetBenchmark-results.txt b/sql/core/benchmarks/DatasetBenchmark-results.txt
index dcc190eb45c03..2774f14664230 100644
--- a/sql/core/benchmarks/DatasetBenchmark-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-back-to-back map long:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-RDD                                         11800 / 12042          8.5         118.0       1.0X
-DataFrame                                     1927 / 2189         51.9          19.3       6.1X
-Dataset                                       2483 / 2605         40.3          24.8       4.8X
+back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                               12720          12777          80          7.9         127.2       1.0X
+DataFrame                                          2242           2501         366         44.6          22.4       5.7X
+Dataset                                            3040           3174         189         32.9          30.4       4.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-back-to-back map:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-RDD                                         16286 / 16301          6.1         162.9       1.0X
-DataFrame                                     8101 / 8104         12.3          81.0       2.0X
-Dataset                                     17445 / 17811          5.7         174.4       0.9X
+back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                               15865          15922          82          6.3         158.6       1.0X
+DataFrame                                          8423           8476          75         11.9          84.2       1.9X
+Dataset                                           17180          18142        1361          5.8         171.8       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-back-to-back filter Long:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-RDD                                           2971 / 3184         33.7          29.7       1.0X
-DataFrame                                     1243 / 1296         80.5          12.4       2.4X
-Dataset                                       3062 / 3091         32.7          30.6       1.0X
+back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                                2928           3009         114         34.1          29.3       1.0X
+DataFrame                                          1386           1427          59         72.2          13.9       2.1X
+Dataset                                            3448           3451           5         29.0          34.5       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-back-to-back filter:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-RDD                                           5253 / 5269         19.0          52.5       1.0X
-DataFrame                                      211 /  234        473.4           2.1      24.9X
-Dataset                                       9550 / 9552         10.5          95.5       0.6X
+back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD                                                5476           5483          10         18.3          54.8       1.0X
+DataFrame                                           209            235          23        479.1           2.1      26.2X
+Dataset                                            9433           9549         163         10.6          94.3       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-aggregate:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-RDD sum                                       5086 / 5108         19.7          50.9       1.0X
-DataFrame sum                                   65 /   73       1548.9           0.6      78.8X
-Dataset sum using Aggregator                  9024 / 9320         11.1          90.2       0.6X
-Dataset complex Aggregator                  15079 / 15171          6.6         150.8       0.3X
+aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+RDD sum                                            5146           5239         132         19.4          51.5       1.0X
+DataFrame sum                                        84             99          15       1196.9           0.8      61.6X
+Dataset sum using Aggregator                       8944           9021         109         11.2          89.4       0.6X
+Dataset complex Aggregator                        12832          13141         436          7.8         128.3       0.4X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..27cb1008fa4a1
--- /dev/null
+++ b/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt
@@ -0,0 +1,429 @@
+================================================================================================
+Extract components
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp wholestage off                    460            486          36         21.7          46.0       1.0X
+cast to timestamp wholestage on                     412            455          70         24.3          41.2       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+year of timestamp wholestage off                   1351           1351           1          7.4         135.1       1.0X
+year of timestamp wholestage on                    1277           1312          47          7.8         127.7       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+quarter of timestamp wholestage off                1591           1604          19          6.3         159.1       1.0X
+quarter of timestamp wholestage on                 1583           1600          12          6.3         158.3       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+month of timestamp wholestage off                  1263           1268           7          7.9         126.3       1.0X
+month of timestamp wholestage on                   1281           1316          46          7.8         128.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+weekofyear of timestamp wholestage off             1756           1757           1          5.7         175.6       1.0X
+weekofyear of timestamp wholestage on              1748           1762          15          5.7         174.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+day of timestamp wholestage off                    1250           1253           4          8.0         125.0       1.0X
+day of timestamp wholestage on                     1270           1285          13          7.9         127.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofyear of timestamp wholestage off              1295           1303          12          7.7         129.5       1.0X
+dayofyear of timestamp wholestage on               1294           1308          16          7.7         129.4       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofmonth of timestamp wholestage off             1277           1311          48          7.8         127.7       1.0X
+dayofmonth of timestamp wholestage on              1270           1288          24          7.9         127.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofweek of timestamp wholestage off              1472           1472           0          6.8         147.2       1.0X
+dayofweek of timestamp wholestage on               1434           1437           4          7.0         143.4       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+weekday of timestamp wholestage off                1363           1372          13          7.3         136.3       1.0X
+weekday of timestamp wholestage on                 1362           1368           3          7.3         136.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+hour of timestamp wholestage off                    748            753           7         13.4          74.8       1.0X
+hour of timestamp wholestage on                     749            756           9         13.4          74.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+minute of timestamp wholestage off                  722            725           4         13.9          72.2       1.0X
+minute of timestamp wholestage on                   750            754           4         13.3          75.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+second of timestamp wholestage off                  614            620           8         16.3          61.4       1.0X
+second of timestamp wholestage on                   611            617           5         16.4          61.1       1.0X
+
+
+================================================================================================
+Current date and time
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+current_date wholestage off                         291            298          10         34.4          29.1       1.0X
+current_date wholestage on                          303            316          13         33.0          30.3       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+current_timestamp wholestage off                    328            355          39         30.5          32.8       1.0X
+current_timestamp wholestage on                     309            497         387         32.4          30.9       1.1X
+
+
+================================================================================================
+Date arithmetic
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date wholestage off                        1094           1103          12          9.1         109.4       1.0X
+cast to date wholestage on                         1105           1111           5          9.0         110.5       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+last_day wholestage off                            1258           1258           0          7.9         125.8       1.0X
+last_day wholestage on                             1279           1289           9          7.8         127.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+next_day wholestage off                            1148           1152           7          8.7         114.8       1.0X
+next_day wholestage on                             1148           1152           4          8.7         114.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_add wholestage off                            1069           1074           6          9.4         106.9       1.0X
+date_add wholestage on                             1087           1091           4          9.2         108.7       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_sub wholestage off                            1076           1076           1          9.3         107.6       1.0X
+date_sub wholestage on                             1086           1094           7          9.2         108.6       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+add_months wholestage off                          1442           1442           1          6.9         144.2       1.0X
+add_months wholestage on                           1406           1426          12          7.1         140.6       1.0X
+
+
+================================================================================================
+Formatting dates
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+format date wholestage off                         5082           5179         138          2.0         508.2       1.0X
+format date wholestage on                          4899           4904           6          2.0         489.9       1.0X
+
+
+================================================================================================
+Formatting timestamps
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+from_unixtime wholestage off                       7148           7170          31          1.4         714.8       1.0X
+from_unixtime wholestage on                        7228           7235           9          1.4         722.8       1.0X
+
+
+================================================================================================
+Convert timestamps
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+from_utc_timestamp wholestage off                   982            990          11         10.2          98.2       1.0X
+from_utc_timestamp wholestage on                   1053           1058           5          9.5         105.3       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_utc_timestamp wholestage off                    1063           1075          16          9.4         106.3       1.0X
+to_utc_timestamp wholestage on                     1121           1136          15          8.9         112.1       0.9X
+
+
+================================================================================================
+Intervals
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast interval wholestage off                        356            374          25         28.1          35.6       1.0X
+cast interval wholestage on                         341            353          14         29.3          34.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+datediff wholestage off                            1883           1898          22          5.3         188.3       1.0X
+datediff wholestage on                             1854           1862           8          5.4         185.4       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+months_between wholestage off                      1989           1996          10          5.0         198.9       1.0X
+months_between wholestage on                       2006           2041          36          5.0         200.6       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+window wholestage off                              2761           2820          83          0.4        2761.1       1.0X
+window wholestage on                              47075          47156          75          0.0       47075.0       0.1X
+
+
+================================================================================================
+Truncation
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YEAR wholestage off                      916            919           5         10.9          91.6       1.0X
+date_trunc YEAR wholestage on                       902            904           2         11.1          90.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YYYY wholestage off                      909            915           9         11.0          90.9       1.0X
+date_trunc YYYY wholestage on                       902            909           5         11.1          90.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YY wholestage off                        916            925          12         10.9          91.6       1.0X
+date_trunc YY wholestage on                         903            908           4         11.1          90.3       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MON wholestage off                       921            928          11         10.9          92.1       1.0X
+date_trunc MON wholestage on                        907            912           4         11.0          90.7       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MONTH wholestage off                     923            928           7         10.8          92.3       1.0X
+date_trunc MONTH wholestage on                      902            911           9         11.1          90.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MM wholestage off                        922            924           3         10.8          92.2       1.0X
+date_trunc MM wholestage on                         908            914           7         11.0          90.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc DAY wholestage off                       644            665          30         15.5          64.4       1.0X
+date_trunc DAY wholestage on                        632            636           7         15.8          63.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc DD wholestage off                        626            636          14         16.0          62.6       1.0X
+date_trunc DD wholestage on                         635            640           5         15.8          63.5       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc HOUR wholestage off                      652            656           5         15.3          65.2       1.0X
+date_trunc HOUR wholestage on                       644            648           5         15.5          64.4       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MINUTE wholestage off                    617            618           2         16.2          61.7       1.0X
+date_trunc MINUTE wholestage on                     621            625           4         16.1          62.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc SECOND wholestage off                    631            631           0         15.8          63.1       1.0X
+date_trunc SECOND wholestage on                     625            630           8         16.0          62.5       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc WEEK wholestage off                      760            761           1         13.2          76.0       1.0X
+date_trunc WEEK wholestage on                       760            766           5         13.2          76.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc QUARTER wholestage off                  1656           1657           1          6.0         165.6       1.0X
+date_trunc QUARTER wholestage on                   1648           1678          36          6.1         164.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc year wholestage off                           309            309           1         32.4          30.9       1.0X
+trunc year wholestage on                            310            315           6         32.3          31.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc yyyy wholestage off                           310            310           1         32.3          31.0       1.0X
+trunc yyyy wholestage on                            310            313           2         32.3          31.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc yy wholestage off                             307            308           2         32.6          30.7       1.0X
+trunc yy wholestage on                              310            313           3         32.2          31.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc mon wholestage off                            308            317          12         32.5          30.8       1.0X
+trunc mon wholestage on                             309            316           7         32.3          30.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc month wholestage off                          306            307           2         32.7          30.6       1.0X
+trunc month wholestage on                           309            314           6         32.4          30.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc mm wholestage off                             308            308           0         32.5          30.8       1.0X
+trunc mm wholestage on                              309            325          11         32.4          30.9       1.0X
+
+
+================================================================================================
+Parsing
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to timestamp str wholestage off                     170            170           1          5.9         170.0       1.0X
+to timestamp str wholestage on                      160            165           4          6.2         160.2       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_timestamp wholestage off                        1727           1730           4          0.6        1727.1       1.0X
+to_timestamp wholestage on                         1775           1793          19          0.6        1774.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_unix_timestamp wholestage off                   1760           1765           6          0.6        1760.4       1.0X
+to_unix_timestamp wholestage on                    1756           1763           9          0.6        1756.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to date str wholestage off                          159            160           0          6.3         159.4       1.0X
+to date str wholestage on                           156            162           7          6.4         156.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_date wholestage off                             2509           2511           3          0.4        2509.2       1.0X
+to_date wholestage on                              2500           2514           9          0.4        2499.8       1.0X
+
+
+================================================================================================
+Conversion from/to external types
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+To/from java.sql.Timestamp:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+From java.sql.Timestamp                             353            356           5         14.2          70.5       1.0X
+Collect longs                                      1561           1864         476          3.2         312.2       0.2X
+Collect timestamps                                 2010           2182         149          2.5         402.0       0.2X
+
+
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index 1a58b05a2abba..32578f6573dfd 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -2,424 +2,428 @@
 Extract components
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-cast to timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off               275 /  287         36.4          27.5       1.0X
-cast to timestamp wholestage on                243 /  253         41.2          24.3       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-year of timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-year of timestamp wholestage off               661 /  667         15.1          66.1       1.0X
-year of timestamp wholestage on                659 /  669         15.2          65.9       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-quarter of timestamp:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off            820 /  822         12.2          82.0       1.0X
-quarter of timestamp wholestage on             768 /  776         13.0          76.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-month of timestamp:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-month of timestamp wholestage off              636 /  638         15.7          63.6       1.0X
-month of timestamp wholestage on               648 /  654         15.4          64.8       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-weekofyear of timestamp:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off        1093 / 1097          9.2         109.3       1.0X
-weekofyear of timestamp wholestage on         1101 / 1107          9.1         110.1       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-day of timestamp:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                643 /  644         15.6          64.3       1.0X
-day of timestamp wholestage on                 655 /  657         15.3          65.5       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-dayofyear of timestamp:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off          681 /  692         14.7          68.1       1.0X
-dayofyear of timestamp wholestage on           675 /  680         14.8          67.5       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-dayofmonth of timestamp:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off         656 /  657         15.2          65.6       1.0X
-dayofmonth of timestamp wholestage on          651 /  658         15.4          65.1       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-dayofweek of timestamp:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off          775 /  776         12.9          77.5       1.0X
-dayofweek of timestamp wholestage on           777 /  781         12.9          77.7       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-weekday of timestamp:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off            737 /  737         13.6          73.7       1.0X
-weekday of timestamp wholestage on             737 /  739         13.6          73.7       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-hour of timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off               425 /  426         23.5          42.5       1.0X
-hour of timestamp wholestage on                430 /  434         23.2          43.0       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-minute of timestamp:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off             430 /  439         23.3          43.0       1.0X
-minute of timestamp wholestage on              436 /  438         23.0          43.6       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-second of timestamp:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-second of timestamp wholestage off             413 /  413         24.2          41.3       1.0X
-second of timestamp wholestage on              413 /  425         24.2          41.3       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp wholestage off                    408            434          37         24.5          40.8       1.0X
+cast to timestamp wholestage on                     363            372           6         27.6          36.3       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+year of timestamp wholestage off                   1317           1327          14          7.6         131.7       1.0X
+year of timestamp wholestage on                    1258           1308          79          8.0         125.8       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+quarter of timestamp wholestage off                1464           1490          37          6.8         146.4       1.0X
+quarter of timestamp wholestage on                 1389           1404          13          7.2         138.9       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+month of timestamp wholestage off                  1239           1245           7          8.1         123.9       1.0X
+month of timestamp wholestage on                   1241           1253           8          8.1         124.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+weekofyear of timestamp wholestage off             1896           1899           4          5.3         189.6       1.0X
+weekofyear of timestamp wholestage on              1876           1885           6          5.3         187.6       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+day of timestamp wholestage off                    1234           1235           1          8.1         123.4       1.0X
+day of timestamp wholestage on                     1221           1231           8          8.2         122.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofyear of timestamp wholestage off              1269           1275           8          7.9         126.9       1.0X
+dayofyear of timestamp wholestage on               1266           1277           9          7.9         126.6       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofmonth of timestamp wholestage off             1236           1238           2          8.1         123.6       1.0X
+dayofmonth of timestamp wholestage on              1223           1235          10          8.2         122.3       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+dayofweek of timestamp wholestage off              1408           1409           2          7.1         140.8       1.0X
+dayofweek of timestamp wholestage on               1393           1418          38          7.2         139.3       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+weekday of timestamp wholestage off                1339           1348          12          7.5         133.9       1.0X
+weekday of timestamp wholestage on                 1329           1341          22          7.5         132.9       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+hour of timestamp wholestage off                    359            366          10         27.9          35.9       1.0X
+hour of timestamp wholestage on                     344            354          11         29.1          34.4       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+minute of timestamp wholestage off                  343            343           0         29.1          34.3       1.0X
+minute of timestamp wholestage on                   336            339           6         29.8          33.6       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+second of timestamp wholestage off                  367            383          23         27.2          36.7       1.0X
+second of timestamp wholestage on                   335            341           6         29.8          33.5       1.1X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-current_date:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-current_date wholestage off                    205 /  206         48.7          20.5       1.0X
-current_date wholestage on                     219 /  224         45.8          21.9       0.9X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+current_date wholestage off                         284            286           2         35.2          28.4       1.0X
+current_date wholestage on                          286            297          18         35.0          28.6       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-current_timestamp:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-current_timestamp wholestage off               212 /  213         47.3          21.2       1.0X
-current_timestamp wholestage on                202 /  205         49.6          20.2       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+current_timestamp wholestage off                    291            336          65         34.4          29.1       1.0X
+current_timestamp wholestage on                     279            290          11         35.9          27.9       1.0X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-cast to date:                            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-cast to date wholestage off                    459 /  462         21.8          45.9       1.0X
-cast to date wholestage on                     493 /  500         20.3          49.3       0.9X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-last_day:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-last_day wholestage off                        680 /  686         14.7          68.0       1.0X
-last_day wholestage on                         671 /  681         14.9          67.1       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-next_day:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-next_day wholestage off                        532 /  533         18.8          53.2       1.0X
-next_day wholestage on                         576 /  580         17.4          57.6       0.9X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_add:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_add wholestage off                        445 /  446         22.5          44.5       1.0X
-date_add wholestage on                         455 /  457         22.0          45.5       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_sub:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_sub wholestage off                        454 /  457         22.0          45.4       1.0X
-date_sub wholestage on                         455 /  458         22.0          45.5       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-add_months:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-add_months wholestage off                      898 /  900         11.1          89.8       1.0X
-add_months wholestage on                       894 /  909         11.2          89.4       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date wholestage off                        1069           1069           0          9.4         106.9       1.0X
+cast to date wholestage on                         1037           1043           7          9.6         103.7       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+last_day wholestage off                            1275           1287          18          7.8         127.5       1.0X
+last_day wholestage on                             1257           1267          10          8.0         125.7       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+next_day wholestage off                            1114           1116           2          9.0         111.4       1.0X
+next_day wholestage on                             1077           1081           7          9.3         107.7       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_add wholestage off                            1056           1058           2          9.5         105.6       1.0X
+date_add wholestage on                             1043           1047           3          9.6         104.3       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_sub wholestage off                            1045           1049           5          9.6         104.5       1.0X
+date_sub wholestage on                             1044           1052          16          9.6         104.4       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+add_months wholestage off                          1379           1384           7          7.3         137.9       1.0X
+add_months wholestage on                           1356           1364           6          7.4         135.6       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-format date:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-format date wholestage off                    7180 / 7181          1.4         718.0       1.0X
-format date wholestage on                     7051 / 7194          1.4         705.1       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+format date wholestage off                         5436           5538         144          1.8         543.6       1.0X
+format date wholestage on                          5283           5295          16          1.9         528.3       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-from_unixtime:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                  7136 / 7163          1.4         713.6       1.0X
-from_unixtime wholestage on                   7144 / 7174          1.4         714.4       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+from_unixtime wholestage off                       8980           8998          26          1.1         898.0       1.0X
+from_unixtime wholestage on                        8921           8939          19          1.1         892.1       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-from_utc_timestamp:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off              880 /  888         11.4          88.0       1.0X
-from_utc_timestamp wholestage on               841 /  854         11.9          84.1       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+from_utc_timestamp wholestage off                   726            736          15         13.8          72.6       1.0X
+from_utc_timestamp wholestage on                    701            710          16         14.3          70.1       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-to_utc_timestamp:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                879 /  884         11.4          87.9       1.0X
-to_utc_timestamp wholestage on                 862 /  876         11.6          86.2       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_utc_timestamp wholestage off                    1234           1240           8          8.1         123.4       1.0X
+to_utc_timestamp wholestage on                      757            770          15         13.2          75.7       1.6X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-cast interval:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-cast interval wholestage off                   242 /  250         41.3          24.2       1.0X
-cast interval wholestage on                    221 /  223         45.3          22.1       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-datediff:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-datediff wholestage off                        697 /  698         14.3          69.7       1.0X
-datediff wholestage on                         680 /  683         14.7          68.0       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-months_between:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-months_between wholestage off                 1675 / 1677          6.0         167.5       1.0X
-months_between wholestage on                  1636 / 1649          6.1         163.6       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-window:                                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-window wholestage off                         1600 / 1627          0.6        1599.7       1.0X
-window wholestage on                        19480 / 19530          0.1       19479.6       0.1X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast interval wholestage off                        327            328           2         30.6          32.7       1.0X
+cast interval wholestage on                         299            307          13         33.5          29.9       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+datediff wholestage off                            1865           1872          10          5.4         186.5       1.0X
+datediff wholestage on                             1815           1821           7          5.5         181.5       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+months_between wholestage off                      1524           1525           1          6.6         152.4       1.0X
+months_between wholestage on                       1511           1518          11          6.6         151.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+window wholestage off                              2441           2519         111          0.4        2440.7       1.0X
+window wholestage on                              46149          46165          16          0.0       46149.1       0.1X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc YEAR:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                 863 /  864         11.6          86.3       1.0X
-date_trunc YEAR wholestage on                  812 /  814         12.3          81.2       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc YYYY:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                 865 /  926         11.6          86.5       1.0X
-date_trunc YYYY wholestage on                  811 /  820         12.3          81.1       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc YY:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                   863 /  867         11.6          86.3       1.0X
-date_trunc YY wholestage on                    810 /  822         12.3          81.0       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc MON:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                  917 /  921         10.9          91.7       1.0X
-date_trunc MON wholestage on                   857 /  860         11.7          85.7       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc MONTH:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                919 /  919         10.9          91.9       1.0X
-date_trunc MONTH wholestage on                 862 /  863         11.6          86.2       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc MM:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                   923 /  924         10.8          92.3       1.0X
-date_trunc MM wholestage on                    855 /  859         11.7          85.5       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc DAY:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                  444 /  444         22.5          44.4       1.0X
-date_trunc DAY wholestage on                   404 /  406         24.7          40.4       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc DD:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                   445 /  446         22.5          44.5       1.0X
-date_trunc DD wholestage on                    404 /  406         24.7          40.4       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc HOUR:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                 462 /  464         21.6          46.2       1.0X
-date_trunc HOUR wholestage on                  416 /  422         24.1          41.6       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc MINUTE:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off               294 /  294         34.0          29.4       1.0X
-date_trunc MINUTE wholestage on                258 /  266         38.8          25.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc SECOND:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off               292 /  295         34.2          29.2       1.0X
-date_trunc SECOND wholestage on                271 /  276         36.9          27.1       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc WEEK:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                 739 /  740         13.5          73.9       1.0X
-date_trunc WEEK wholestage on                  712 /  715         14.0          71.2       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-date_trunc QUARTER:                      Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off             1465 / 1467          6.8         146.5       1.0X
-date_trunc QUARTER wholestage on              1419 / 1423          7.0         141.9       1.0X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc year:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc year wholestage off                      222 /  222         45.0          22.2       1.0X
-trunc year wholestage on                       207 /  214         48.3          20.7       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc yyyy:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                      221 /  225         45.2          22.1       1.0X
-trunc yyyy wholestage on                       208 /  212         48.0          20.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc yy:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc yy wholestage off                        221 /  222         45.3          22.1       1.0X
-trunc yy wholestage on                         208 /  210         48.0          20.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc mon:                               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc mon wholestage off                       231 /  239         43.3          23.1       1.0X
-trunc mon wholestage on                        208 /  214         48.0          20.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc month:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc month wholestage off                     222 /  222         45.1          22.2       1.0X
-trunc month wholestage on                      208 /  224         48.1          20.8       1.1X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-trunc mm:                                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-trunc mm wholestage off                        222 /  226         45.1          22.2       1.0X
-trunc mm wholestage on                         208 /  216         48.0          20.8       1.1X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YEAR wholestage off                      773            777           6         12.9          77.3       1.0X
+date_trunc YEAR wholestage on                       720            733          21         13.9          72.0       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YYYY wholestage off                      753            754           1         13.3          75.3       1.0X
+date_trunc YYYY wholestage on                       721            725           3         13.9          72.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc YY wholestage off                        759            762           4         13.2          75.9       1.0X
+date_trunc YY wholestage on                         727            731           3         13.8          72.7       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MON wholestage off                       728            731           4         13.7          72.8       1.0X
+date_trunc MON wholestage on                        717            728          17         13.9          71.7       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MONTH wholestage off                     728            729           1         13.7          72.8       1.0X
+date_trunc MONTH wholestage on                      716            719           2         14.0          71.6       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MM wholestage off                        725            732           9         13.8          72.5       1.0X
+date_trunc MM wholestage on                         721            728           6         13.9          72.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc DAY wholestage off                       515            520           7         19.4          51.5       1.0X
+date_trunc DAY wholestage on                        475            484           8         21.1          47.5       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc DD wholestage off                        515            520           7         19.4          51.5       1.0X
+date_trunc DD wholestage on                         471            484           9         21.2          47.1       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc HOUR wholestage off                      504            504           1         19.9          50.4       1.0X
+date_trunc HOUR wholestage on                       465            471           5         21.5          46.5       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc MINUTE wholestage off                    485            488           4         20.6          48.5       1.0X
+date_trunc MINUTE wholestage on                     452            458           6         22.1          45.2       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc SECOND wholestage off                    483            484           1         20.7          48.3       1.0X
+date_trunc SECOND wholestage on                     447            453           5         22.4          44.7       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc WEEK wholestage off                      615            617           2         16.3          61.5       1.0X
+date_trunc WEEK wholestage on                       576            586          11         17.4          57.6       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+date_trunc QUARTER wholestage off                  1512           1513           1          6.6         151.2       1.0X
+date_trunc QUARTER wholestage on                   1482           1494          11          6.7         148.2       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc year wholestage off                           342            343           1         29.2          34.2       1.0X
+trunc year wholestage on                            301            304           2         33.3          30.1       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc yyyy wholestage off                           321            323           2         31.1          32.1       1.0X
+trunc yyyy wholestage on                            303            305           2         33.0          30.3       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc yy wholestage off                             314            325          15         31.8          31.4       1.0X
+trunc yy wholestage on                              299            307          10         33.4          29.9       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc mon wholestage off                            311            312           2         32.2          31.1       1.0X
+trunc mon wholestage on                             300            307           6         33.3          30.0       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc month wholestage off                          312            314           2         32.1          31.2       1.0X
+trunc month wholestage on                           299            300           1         33.5          29.9       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+trunc mm wholestage off                             308            316          11         32.5          30.8       1.0X
+trunc mm wholestage on                              298            301           2         33.6          29.8       1.0X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-to timestamp str:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                165 /  166          6.1         164.7       1.0X
-to timestamp str wholestage on                 160 /  163          6.2         160.5       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to timestamp str wholestage off                     228            232           5          4.4         228.3       1.0X
+to timestamp str wholestage on                      213            214           2          4.7         212.9       1.1X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                        1308           1353          64          0.8        1307.9       1.0X
-to_timestamp wholestage on                         1197           1230          21          0.8        1197.0       1.1X
+to_timestamp wholestage off                        1850           1850           0          0.5        1850.0       1.0X
+to_timestamp wholestage on                         1877           1899          27          0.5        1876.8       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                   1221           1224           4          0.8        1221.0       1.0X
-to_unix_timestamp wholestage on                    1224           1228           4          0.8        1223.8       1.0X
+to_unix_timestamp wholestage off                   1883           1886           4          0.5        1882.7       1.0X
+to_unix_timestamp wholestage on                    1817           1831          17          0.6        1817.3       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-to date str:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-to date str wholestage off                     155 /  157          6.4         155.4       1.0X
-to date str wholestage on                      154 /  156          6.5         154.3       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to date str wholestage off                          212            212           1          4.7         211.8       1.0X
+to date str wholestage on                           206            208           2          4.8         206.3       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+to_date wholestage off                             2334           2335           0          0.4        2334.3       1.0X
+to_date wholestage on                              2224           2236          13          0.4        2224.2       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.3
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-to_date:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-to_date wholestage off                        1477 / 1479          0.7        1477.3       1.0X
-to_date wholestage on                         1468 / 1473          0.7        1468.2       1.0X
 
+================================================================================================
+Conversion from/to external types
+================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 To/from java.sql.Timestamp:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Timestamp                             283            301          19         17.7          56.6       1.0X
-Collect longs                                      1048           1087          36          4.8         209.6       0.3X
-Collect timestamps                                 1425           1479          56          3.5         285.1       0.2X
+From java.sql.Timestamp                             271            274           3         18.4          54.2       1.0X
+Collect longs                                      1386           1925         681          3.6         277.1       0.2X
+Collect timestamps                                 1961           2169         331          2.5         392.2       0.1X
 
 
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk11-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..4f50a894e5c07
--- /dev/null
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk11-results.txt
@@ -0,0 +1,45 @@
+================================================================================================
+WITHOUT SPILL
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                        5371           5392          30         19.1          52.5       1.0X
+ExternalAppendOnlyUnsafeRowArray                   6724           6778          77         15.2          65.7       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                        9120           9121           1         28.7          34.8       1.0X
+ExternalAppendOnlyUnsafeRowArray                  37713          37739          38          7.0         143.9       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                       18037          18052          21         27.3          36.7       1.0X
+ExternalAppendOnlyUnsafeRowArray                  34726          34771          63         14.2          70.7       0.5X
+
+
+================================================================================================
+WITH SPILL
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeExternalSorter                              29668          29676          11          8.8         113.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                  12658          12663           6         20.7          48.3       2.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeExternalSorter                                 12             12           0         13.8          72.7       1.0X
+ExternalAppendOnlyUnsafeRowArray                      8              8           0         19.8          50.6       1.4X
+
+
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
index 02c6b72f32216..c4be80af1334b 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Array with 100000 rows:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ArrayBuffer                                   6378 / 6550         16.1          62.3       1.0X
-ExternalAppendOnlyUnsafeRowArray              6196 / 6242         16.5          60.5       1.0X
+Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                        7626           7776         212         13.4          74.5       1.0X
+ExternalAppendOnlyUnsafeRowArray                   8432           8498          93         12.1          82.3       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Array with 1000 rows:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ArrayBuffer                                 11988 / 12027         21.9          45.7       1.0X
-ExternalAppendOnlyUnsafeRowArray            37480 / 37574          7.0         143.0       0.3X
+Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                       11753          11763          15         22.3          44.8       1.0X
+ExternalAppendOnlyUnsafeRowArray                  38921          39236         446          6.7         148.5       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Array with 30000 rows:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-ArrayBuffer                                 23536 / 23538         20.9          47.9       1.0X
-ExternalAppendOnlyUnsafeRowArray            31275 / 31277         15.7          63.6       0.8X
+Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ArrayBuffer                                       23820          23829          12         20.6          48.5       1.0X
+ExternalAppendOnlyUnsafeRowArray                  33449          33466          25         14.7          68.1       0.7X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Spilling with 1000 rows:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                        29241 / 29279          9.0         111.5       1.0X
-ExternalAppendOnlyUnsafeRowArray            14309 / 14313         18.3          54.6       2.0X
+Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeExternalSorter                              31384          31479         135          8.4         119.7       1.0X
+ExternalAppendOnlyUnsafeRowArray                  15278          15303          35         17.2          58.3       2.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Spilling with 10000 rows:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                            11 /   11         14.8          67.4       1.0X
-ExternalAppendOnlyUnsafeRowArray                 9 /    9         17.6          56.8       1.2X
+Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UnsafeExternalSorter                                 11             11           1         14.7          68.0       1.0X
+ExternalAppendOnlyUnsafeRowArray                      9             10           1         17.1          58.5       1.2X
 
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-jdk11-results.txt b/sql/core/benchmarks/ExtractBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..74f01f6ee460a
--- /dev/null
+++ b/sql/core/benchmarks/ExtractBenchmark-jdk11-results.txt
@@ -0,0 +1,100 @@
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp                                   342            456         120         29.2          34.2       1.0X
+MILLENNIUM of timestamp                            1462           1498          53          6.8         146.2       0.2X
+CENTURY of timestamp                               1229           1293          86          8.1         122.9       0.3X
+DECADE of timestamp                                1204           1242          56          8.3         120.4       0.3X
+YEAR of timestamp                                  1180           1226          72          8.5         118.0       0.3X
+ISOYEAR of timestamp                               1279           1301          33          7.8         127.9       0.3X
+QUARTER of timestamp                               1357           1379          22          7.4         135.7       0.3X
+MONTH of timestamp                                 1155           1182          24          8.7         115.5       0.3X
+WEEK of timestamp                                  1634           1641          10          6.1         163.4       0.2X
+DAY of timestamp                                   1167           1171           6          8.6         116.7       0.3X
+DAYOFWEEK of timestamp                             1335           1344           8          7.5         133.5       0.3X
+DOW of timestamp                                   1344           1352          11          7.4         134.4       0.3X
+ISODOW of timestamp                                1279           1281           2          7.8         127.9       0.3X
+DOY of timestamp                                   1172           1183          16          8.5         117.2       0.3X
+HOUR of timestamp                                   722            731          12         13.8          72.2       0.5X
+MINUTE of timestamp                                 725            734           9         13.8          72.5       0.5X
+SECOND of timestamp                                 656            661           6         15.3          65.6       0.5X
+MILLISECONDS of timestamp                           630            637           9         15.9          63.0       0.5X
+MICROSECONDS of timestamp                           587            592           7         17.0          58.7       0.6X
+EPOCH of timestamp                                 1036           1047          13          9.7         103.6       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date                                       1044           1081          63          9.6         104.4       1.0X
+MILLENNIUM of date                                 1367           1375          10          7.3         136.7       0.8X
+CENTURY of date                                    1181           1183           3          8.5         118.1       0.9X
+DECADE of date                                     1154           1163           8          8.7         115.4       0.9X
+YEAR of date                                       1138           1154          16          8.8         113.8       0.9X
+ISOYEAR of date                                    1394           1401           8          7.2         139.4       0.7X
+QUARTER of date                                    1336           1362          26          7.5         133.6       0.8X
+MONTH of date                                      1137           1144          11          8.8         113.7       0.9X
+WEEK of date                                       1619           1632          18          6.2         161.9       0.6X
+DAY of date                                        1144           1158          22          8.7         114.4       0.9X
+DAYOFWEEK of date                                  1328           1330           2          7.5         132.8       0.8X
+DOW of date                                        1336           1339           4          7.5         133.6       0.8X
+ISODOW of date                                     1272           1276           3          7.9         127.2       0.8X
+DOY of date                                        1170           1174           4          8.6         117.0       0.9X
+HOUR of date                                       1963           1986          35          5.1         196.3       0.5X
+MINUTE of date                                     1969           1984          19          5.1         196.9       0.5X
+SECOND of date                                     1926           1929           3          5.2         192.6       0.5X
+MILLISECONDS of date                               1928           1939          16          5.2         192.8       0.5X
+MICROSECONDS of date                               1816           1821           4          5.5         181.6       0.6X
+EPOCH of date                                      2331           2336           4          4.3         233.1       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp                                   315            320           8         31.8          31.5       1.0X
+MILLENNIUM of timestamp                            1365           1369           4          7.3         136.5       0.2X
+CENTURY of timestamp                               1176           1179           4          8.5         117.6       0.3X
+DECADE of timestamp                                1153           1168          14          8.7         115.3       0.3X
+YEAR of timestamp                                  1131           1135           5          8.8         113.1       0.3X
+ISOYEAR of timestamp                               1388           1397           8          7.2         138.8       0.2X
+QUARTER of timestamp                               1344           1363          25          7.4         134.4       0.2X
+MONTH of timestamp                                 1135           1141           6          8.8         113.5       0.3X
+WEEK of timestamp                                  1612           1615           5          6.2         161.2       0.2X
+DAY of timestamp                                   1138           1145           7          8.8         113.8       0.3X
+DAYOFWEEK of timestamp                             1322           1330           7          7.6         132.2       0.2X
+DOW of timestamp                                   1317           1323           5          7.6         131.7       0.2X
+ISODOW of timestamp                                1284           1287           4          7.8         128.4       0.2X
+DOY of timestamp                                   1163           1172          11          8.6         116.3       0.3X
+HOUR of timestamp                                   709            709           1         14.1          70.9       0.4X
+MINUTE of timestamp                                 713            714           1         14.0          71.3       0.4X
+SECOND of timestamp                                 635            641           6         15.7          63.5       0.5X
+MILLISECONDS of timestamp                           625            629           7         16.0          62.5       0.5X
+MICROSECONDS of timestamp                           572            573           2         17.5          57.2       0.6X
+EPOCH of timestamp                                 1031           1033           3          9.7         103.1       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date                                        994            998           3         10.1          99.4       1.0X
+MILLENNIUM of date                                 1361           1371           9          7.3         136.1       0.7X
+CENTURY of date                                    1171           1180          15          8.5         117.1       0.8X
+DECADE of date                                     1149           1153           4          8.7         114.9       0.9X
+YEAR of date                                       1128           1130           2          8.9         112.8       0.9X
+ISOYEAR of date                                    1384           1398          19          7.2         138.4       0.7X
+QUARTER of date                                    1333           1345          19          7.5         133.3       0.7X
+MONTH of date                                      1132           1138           7          8.8         113.2       0.9X
+WEEK of date                                       1616           1624           9          6.2         161.6       0.6X
+DAY of date                                        1132           1135           3          8.8         113.2       0.9X
+DAYOFWEEK of date                                  1320           1322           3          7.6         132.0       0.8X
+DOW of date                                        1317           1319           2          7.6         131.7       0.8X
+ISODOW of date                                     1269           1272           5          7.9         126.9       0.8X
+DOY of date                                        1171           1190          17          8.5         117.1       0.8X
+HOUR of date                                       1960           1962           2          5.1         196.0       0.5X
+MINUTE of date                                     1974           1975           1          5.1         197.4       0.5X
+SECOND of date                                     1915           1922           8          5.2         191.5       0.5X
+MILLISECONDS of date                               1923           1924           2          5.2         192.3       0.5X
+MICROSECONDS of date                               1817           1819           1          5.5         181.7       0.5X
+EPOCH of date                                      2323           2326           2          4.3         232.3       0.4X
+
diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt
index 7ee185e9adc59..07c794fe0c75d 100644
--- a/sql/core/benchmarks/ExtractBenchmark-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-results.txt
@@ -1,145 +1,100 @@
-================================================================================================
-Extract
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    407            432          36         24.6          40.7       1.0X
-cast to timestamp wholestage on                     348            396          80         28.7          34.8       1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MILLENNIUM of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MILLENNIUM of timestamp wholestage off             1407           1408           2          7.1         140.7       1.0X
-MILLENNIUM of timestamp wholestage on              1334           1380          81          7.5         133.4       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-CENTURY of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-CENTURY of timestamp wholestage off                1362           1364           3          7.3         136.2       1.0X
-CENTURY of timestamp wholestage on                 1334           1342           8          7.5         133.4       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DECADE of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DECADE of timestamp wholestage off                 1226           1229           4          8.2         122.6       1.0X
-DECADE of timestamp wholestage on                  1218           1225           8          8.2         121.8       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-YEAR of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-YEAR of timestamp wholestage off                   1207           1210           4          8.3         120.7       1.0X
-YEAR of timestamp wholestage on                    1201           1216          17          8.3         120.1       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-ISOYEAR of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ISOYEAR of timestamp wholestage off                1442           1446           6          6.9         144.2       1.0X
-ISOYEAR of timestamp wholestage on                 1315           1336          18          7.6         131.5       1.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-QUARTER of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-QUARTER of timestamp wholestage off                1443           1454          16          6.9         144.3       1.0X
-QUARTER of timestamp wholestage on                 1429           1442           9          7.0         142.9       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MONTH of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MONTH of timestamp wholestage off                  1196           1200           5          8.4         119.6       1.0X
-MONTH of timestamp wholestage on                   1192           1204          10          8.4         119.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-WEEK of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-WEEK of timestamp wholestage off                   2103           2104           2          4.8         210.3       1.0X
-WEEK of timestamp wholestage on                    1798           1804           8          5.6         179.8       1.2X
-
 OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DAY of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DAY of timestamp wholestage off                    1211           1228          23          8.3         121.1       1.0X
-DAY of timestamp wholestage on                     1204           1212           6          8.3         120.4       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DAYOFWEEK of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DAYOFWEEK of timestamp wholestage off              1387           1389           3          7.2         138.7       1.0X
-DAYOFWEEK of timestamp wholestage on               1353           1360           8          7.4         135.3       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DOW of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DOW of timestamp wholestage off                    1373           1373           0          7.3         137.3       1.0X
-DOW of timestamp wholestage on                     1361           1372          15          7.3         136.1       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-ISODOW of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-ISODOW of timestamp wholestage off                 1311           1366          77          7.6         131.1       1.0X
-ISODOW of timestamp wholestage on                  1307           1314           6          7.7         130.7       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-DOY of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-DOY of timestamp wholestage off                    1241           1243           2          8.1         124.1       1.0X
-DOY of timestamp wholestage on                     1229           1239           9          8.1         122.9       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-HOUR of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-HOUR of timestamp wholestage off                    353            358           8         28.3          35.3       1.0X
-HOUR of timestamp wholestage on                     358            365           5         27.9          35.8       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MINUTE of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MINUTE of timestamp wholestage off                  353            354           2         28.3          35.3       1.0X
-MINUTE of timestamp wholestage on                   362            368           9         27.6          36.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SECOND of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-SECOND of timestamp wholestage off                  341            350          13         29.3          34.1       1.0X
-SECOND of timestamp wholestage on                   362            368           7         27.6          36.2       0.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MILLISECONDS of timestamp:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MILLISECONDS of timestamp wholestage off          36785          36808          32          0.3        3678.5       1.0X
-MILLISECONDS of timestamp wholestage on           36644          36760          72          0.3        3664.4       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-MICROSECONDS of timestamp:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-MICROSECONDS of timestamp wholestage off            446            447           0         22.4          44.6       1.0X
-MICROSECONDS of timestamp wholestage on             458            463           4         21.8          45.8       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-EPOCH of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-EPOCH of timestamp wholestage off                 29807          29811           5          0.3        2980.7       1.0X
-EPOCH of timestamp wholestage on                  29843          29930          64          0.3        2984.3       1.0X
-
+Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp                                   404            460          71         24.8          40.4       1.0X
+MILLENNIUM of timestamp                            1432           1580         128          7.0         143.2       0.3X
+CENTURY of timestamp                               1380           1390          10          7.2         138.0       0.3X
+DECADE of timestamp                                1254           1261           8          8.0         125.4       0.3X
+YEAR of timestamp                                  1229           1236           9          8.1         122.9       0.3X
+ISOYEAR of timestamp                               1382           1415          44          7.2         138.2       0.3X
+QUARTER of timestamp                               1444           1458          16          6.9         144.4       0.3X
+MONTH of timestamp                                 1212           1257          60          8.2         121.2       0.3X
+WEEK of timestamp                                  1718           1730          12          5.8         171.8       0.2X
+DAY of timestamp                                   1180           1201          22          8.5         118.0       0.3X
+DAYOFWEEK of timestamp                             1371           1427          68          7.3         137.1       0.3X
+DOW of timestamp                                   1365           1378          18          7.3         136.5       0.3X
+ISODOW of timestamp                                1312           1319           7          7.6         131.2       0.3X
+DOY of timestamp                                   1224           1238          16          8.2         122.4       0.3X
+HOUR of timestamp                                   343            355          16         29.2          34.3       1.2X
+MINUTE of timestamp                                 341            344           3         29.3          34.1       1.2X
+SECOND of timestamp                                 553            556           2         18.1          55.3       0.7X
+MILLISECONDS of timestamp                           562            565           3         17.8          56.2       0.7X
+MICROSECONDS of timestamp                           435            465          49         23.0          43.5       0.9X
+EPOCH of timestamp                                 1211           1217           9          8.3         121.1       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date                                       1030           1038           8          9.7         103.0       1.0X
+MILLENNIUM of date                                 1302           1306           6          7.7         130.2       0.8X
+CENTURY of date                                    1309           1315           6          7.6         130.9       0.8X
+DECADE of date                                     1197           1202           7          8.4         119.7       0.9X
+YEAR of date                                       1182           1192          10          8.5         118.2       0.9X
+ISOYEAR of date                                    1421           1429          10          7.0         142.1       0.7X
+QUARTER of date                                    1454           1468          14          6.9         145.4       0.7X
+MONTH of date                                      1217           1230          19          8.2         121.7       0.8X
+WEEK of date                                       1723           1725           3          5.8         172.3       0.6X
+DAY of date                                        1188           1191           4          8.4         118.8       0.9X
+DAYOFWEEK of date                                  1348           1355           6          7.4         134.8       0.8X
+DOW of date                                        1344           1353           8          7.4         134.4       0.8X
+ISODOW of date                                     1301           1311           9          7.7         130.1       0.8X
+DOY of date                                        1237           1241           4          8.1         123.7       0.8X
+HOUR of date                                       1465           1474          16          6.8         146.5       0.7X
+MINUTE of date                                     1463           1471           8          6.8         146.3       0.7X
+SECOND of date                                     1720           1725           5          5.8         172.0       0.6X
+MILLISECONDS of date                               1739           1749           9          5.8         173.9       0.6X
+MICROSECONDS of date                               1607           1611           4          6.2         160.7       0.6X
+EPOCH of date                                      2370           2377          10          4.2         237.0       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to timestamp                                   307            312           6         32.6          30.7       1.0X
+MILLENNIUM of timestamp                            1294           1298           4          7.7         129.4       0.2X
+CENTURY of timestamp                               1317           1321           6          7.6         131.7       0.2X
+DECADE of timestamp                                1202           1207           8          8.3         120.2       0.3X
+YEAR of timestamp                                  1196           1201           8          8.4         119.6       0.3X
+ISOYEAR of timestamp                               1431           1440          10          7.0         143.1       0.2X
+QUARTER of timestamp                               1483           1487           5          6.7         148.3       0.2X
+MONTH of timestamp                                 1205           1209           4          8.3         120.5       0.3X
+WEEK of timestamp                                  1728           1731           4          5.8         172.8       0.2X
+DAY of timestamp                                   1190           1193           3          8.4         119.0       0.3X
+DAYOFWEEK of timestamp                             1369           1371           2          7.3         136.9       0.2X
+DOW of timestamp                                   1371           1373           3          7.3         137.1       0.2X
+ISODOW of timestamp                                1316           1321           8          7.6         131.6       0.2X
+DOY of timestamp                                   1236           1241           6          8.1         123.6       0.2X
+HOUR of timestamp                                   342            343           1         29.3          34.2       0.9X
+MINUTE of timestamp                                 338            352          23         29.6          33.8       0.9X
+SECOND of timestamp                                 555            556           1         18.0          55.5       0.6X
+MILLISECONDS of timestamp                           570            573           4         17.6          57.0       0.5X
+MICROSECONDS of timestamp                           436            442           5         22.9          43.6       0.7X
+EPOCH of timestamp                                 1212           1218           7          8.3         121.2       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+cast to date                                       1017           1025          13          9.8         101.7       1.0X
+MILLENNIUM of date                                 1314           1318           5          7.6         131.4       0.8X
+CENTURY of date                                    1318           1334          16          7.6         131.8       0.8X
+DECADE of date                                     1205           1210           5          8.3         120.5       0.8X
+YEAR of date                                       1187           1190           3          8.4         118.7       0.9X
+ISOYEAR of date                                    1409           1432          21          7.1         140.9       0.7X
+QUARTER of date                                    1477           1482           6          6.8         147.7       0.7X
+MONTH of date                                      1193           1199           7          8.4         119.3       0.9X
+WEEK of date                                       1711           1719           7          5.8         171.1       0.6X
+DAY of date                                        1183           1192           7          8.5         118.3       0.9X
+DAYOFWEEK of date                                  1368           1371           6          7.3         136.8       0.7X
+DOW of date                                        1361           1366           8          7.3         136.1       0.7X
+ISODOW of date                                     1312           1315           3          7.6         131.2       0.8X
+DOY of date                                        1230           1233           2          8.1         123.0       0.8X
+HOUR of date                                       1480           1483           2          6.8         148.0       0.7X
+MINUTE of date                                     1473           1489          27          6.8         147.3       0.7X
+SECOND of date                                     1731           1737           7          5.8         173.1       0.6X
+MILLISECONDS of date                               1744           1749           6          5.7         174.4       0.6X
+MICROSECONDS of date                               1592           1594           1          6.3         159.2       0.6X
+EPOCH of date                                      2368           2371           3          4.2         236.8       0.4X
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk11-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..e01561364b989
--- /dev/null
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk11-results.txt
@@ -0,0 +1,11 @@
+================================================================================================
+LongToUnsafeRowMap metrics
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+LongToUnsafeRowMap                                  568            575           5          0.9        1136.4       1.0X
+
+
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
index 338244ad542f4..7483f517a62e8 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_181-b13 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
-LongToUnsafeRowMap metrics:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                             234 /  315          2.1         467.3       1.0X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+LongToUnsafeRowMap                                  513            629         106          1.0        1026.6       1.0X
 
 
diff --git a/sql/core/benchmarks/JSONBenchmark-jdk11-results.txt b/sql/core/benchmarks/JSONBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..56a87d41e3d47
--- /dev/null
+++ b/sql/core/benchmarks/JSONBenchmark-jdk11-results.txt
@@ -0,0 +1,112 @@
+================================================================================================
+Benchmark for performance of JSON parsing
+================================================================================================
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       70863          71147         417          1.4         708.6       1.0X
+UTF-8 is set                                     133520         133658         131          0.7        1335.2       0.5X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       44462          44655         313          2.2         444.6       1.0X
+UTF-8 is set                                      89342          89386          53          1.1         893.4       0.5X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       58617          59071         541          0.2        5861.7       1.0X
+UTF-8 is set                                     102650         102948         493          0.1       10265.0       0.6X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                      168197         168256          61          0.0      336393.0       1.0X
+UTF-8 is set                                     191539         191724         186          0.0      383078.2       0.9X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 10 columns                                 18983          18998          22          0.5        1898.3       1.0X
+Select 1 column                                   23544          23575          32          0.4        2354.4       0.8X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Short column without encoding                      9336           9402          66          1.1         933.6       1.0X
+Short column with UTF-8                           13310          13870         489          0.8        1331.0       0.7X
+Wide column without encoding                     112821         113012         202          0.1       11282.1       0.1X
+Wide column with UTF-8                           156408         156910         776          0.1       15640.8       0.1X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                           743            744           1         13.5          74.3       1.0X
+from_json                                         22379          22566         279          0.4        2237.9       0.0X
+json_tuple                                        32201          32271          89          0.3        3220.1       0.0X
+get_json_object                                   21973          22405         375          0.5        2197.3       0.0X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                          3771           3780           8         13.3          75.4       1.0X
+schema inferring                                  34808          35213         506          1.4         696.2       0.1X
+parsing                                           36917          38889        1757          1.4         738.3       0.1X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                         11928          11955          38          4.2         238.6       1.0X
+Schema inferring                                  44716          44921         220          1.1         894.3       0.3X
+Parsing without charset                           35749          35838          77          1.4         715.0       0.3X
+Parsing with UTF-8                                50521          50530          12          1.0        1010.4       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Create a dataset of timestamps                     2473           2487          18          4.0         247.3       1.0X
+to_json(timestamp)                                16100          16284         236          0.6        1610.0       0.2X
+write timestamps to files                         13226          13852         676          0.8        1322.6       0.2X
+Create a dataset of dates                          2661           2680          16          3.8         266.1       0.9X
+to_json(date)                                     10696          10730          55          0.9        1069.6       0.2X
+write dates to files                               8543           8926         384          1.2         854.3       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                     2822           2829           8          3.5         282.2       1.0X
+read timestamps from files                        34884          34900          23          0.3        3488.4       0.1X
+infer timestamps from files                       70310          71551        1079          0.1        7031.0       0.0X
+read date text from files                          2649           2655           6          3.8         264.9       1.1X
+read date from files                              22875          22904          33          0.4        2287.5       0.1X
+timestamp strings                                  4025           4059          31          2.5         402.5       0.7X
+parse timestamps from Dataset[String]             43040          43157         115          0.2        4304.0       0.1X
+infer timestamps from Dataset[String]             81762          81927         144          0.1        8176.2       0.0X
+date strings                                       4899           4913          21          2.0         489.9       0.6X
+parse dates from Dataset[String]                  33530          33560          27          0.3        3353.0       0.1X
+from_json(timestamp)                              60762          60877         142          0.2        6076.2       0.0X
+from_json(date)                                   52482          52620         150          0.2        5248.2       0.1X
+
+
diff --git a/sql/core/benchmarks/JSONBenchmark-results.txt b/sql/core/benchmarks/JSONBenchmark-results.txt
deleted file mode 100644
index 7846983b44fb3..0000000000000
--- a/sql/core/benchmarks/JSONBenchmark-results.txt
+++ /dev/null
@@ -1,112 +0,0 @@
-================================================================================================
-Benchmark for performance of JSON parsing
-================================================================================================
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-No encoding                                       50949          51086         150          2.0         509.5       1.0X
-UTF-8 is set                                      72012          72147         120          1.4         720.1       0.7X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-No encoding                                       36799          36891          80          2.7         368.0       1.0X
-UTF-8 is set                                      59796          59880          74          1.7         598.0       0.6X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-No encoding                                       55803          55967         152          0.2        5580.3       1.0X
-UTF-8 is set                                      80623          80825         178          0.1        8062.3       0.7X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-No encoding                                       84263          85750        1476          0.0      168526.2       1.0X
-UTF-8 is set                                      98848         100183        1592          0.0      197696.0       0.9X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                 13930          13996          60          0.7        1393.0       1.0X
-Select 1 column                                   17092          17394         360          0.6        1709.2       0.8X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                      5596           5711         101          1.8         559.6       1.0X
-Short column with UTF-8                            7983           8158         160          1.3         798.3       0.7X
-Wide column without encoding                     110189         118451         NaN          0.1       11018.9       0.1X
-Wide column with UTF-8                           137827         142813         NaN          0.1       13782.7       0.0X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Text read                                           951            953           2         10.5          95.1       1.0X
-from_json                                         13015          13045          27          0.8        1301.5       0.1X
-json_tuple                                        16257          16306          43          0.6        1625.7       0.1X
-get_json_object                                   13195          13225          39          0.8        1319.5       0.1X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Text read                                          4632           4687          49         10.8          92.6       1.0X
-schema inferring                                  29176          29297         146          1.7         583.5       0.2X
-parsing                                           24268          24457         175          2.1         485.4       0.2X
-
-Preparing data for benchmarking ...
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Text read                                          8264           8272           7          6.1         165.3       1.0X
-Schema inferring                                  31910          32375         543          1.6         638.2       0.3X
-Parsing without charset                           29290          29397         124          1.7         585.8       0.3X
-Parsing with UTF-8                                41301          41390          81          1.2         826.0       0.2X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1149           1160          11          8.7         114.9       1.0X
-to_json(timestamp)                                11585          11688         140          0.9        1158.5       0.1X
-write timestamps to files                         10212          10260          49          1.0        1021.2       0.1X
-Create a dataset of dates                          1322           1328          10          7.6         132.2       0.9X
-to_json(date)                                      7226           7241          14          1.4         722.6       0.2X
-write dates to files                               5634           5648          20          1.8         563.4       0.2X
-
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_202-b08 on Mac OS X 10.14.4
-Intel(R) Core(TM) i7-4850HQ CPU @ 2.30GHz
-Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2097           2137          41          4.8         209.7       1.0X
-read timestamps from files                        20438          20451          11          0.5        2043.8       0.1X
-infer timestamps from files                       41694          41770          66          0.2        4169.4       0.1X
-read date text from files                          1832           1847          16          5.5         183.2       1.1X
-read date from files                              13796          13837          49          0.7        1379.6       0.2X
-timestamp strings                                  3213           3233          26          3.1         321.3       0.7X
-parse timestamps from Dataset[String]             22686          22743          53          0.4        2268.6       0.1X
-infer timestamps from Dataset[String]             45301          45368          58          0.2        4530.1       0.0X
-date strings                                       3431           3439           7          2.9         343.1       0.6X
-parse dates from Dataset[String]                  17688          17734          41          0.6        1768.8       0.1X
-from_json(timestamp)                              33439          33456          24          0.3        3343.9       0.1X
-from_json(date)                                   24055          24164         107          0.4        2405.5       0.1X
-
-
diff --git a/sql/core/benchmarks/JoinBenchmark-jdk11-results.txt b/sql/core/benchmarks/JoinBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..06b220586ed94
--- /dev/null
+++ b/sql/core/benchmarks/JoinBenchmark-jdk11-results.txt
@@ -0,0 +1,75 @@
+================================================================================================
+Join Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w long wholestage off                         4872           4888          24          4.3         232.3       1.0X
+Join w long wholestage on                           337            425          61         62.3          16.1      14.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w long duplicated wholestage off              6290           6346          79          3.3         299.9       1.0X
+Join w long duplicated wholestage on                328            347          17         64.0          15.6      19.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 ints wholestage off                     174475         174532          82          0.1        8319.6       1.0X
+Join w 2 ints wholestage on                      165490         167355        1507          0.1        7891.2       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 longs wholestage off                      7025           7121         135          3.0         335.0       1.0X
+Join w 2 longs wholestage on                       1878           1954         103         11.2          89.5       3.7X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 longs duplicated wholestage off          20822          20879          81          1.0         992.9       1.0X
+Join w 2 longs duplicated wholestage on            2454           2512          60          8.5         117.0       8.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+outer join w long wholestage off                   3900           3907          10          5.4         186.0       1.0X
+outer join w long wholestage on                     226            235          10         92.6          10.8      17.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+semi join w long wholestage off                    2560           2621          87          8.2         122.1       1.0X
+semi join w long wholestage on                      209            219          15        100.5          10.0      12.3X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort merge join wholestage off                     1707           1712           7          1.2         814.0       1.0X
+sort merge join wholestage on                      1447           1482          43          1.4         690.0       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+sort merge join with duplicates:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort merge join with duplicates wholestage off           2293           2307          19          0.9        1093.4       1.0X
+sort merge join with duplicates wholestage on           2018           2061          66          1.0         962.1       1.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+shuffle hash join wholestage off                   1424           1458          47          2.9         339.6       1.0X
+shuffle hash join wholestage on                    1203           1227          28          3.5         286.9       1.2X
+
+
diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt
index 8ceb5e7a7fe94..3b88d5eaccc7e 100644
--- a/sql/core/benchmarks/JoinBenchmark-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-results.txt
@@ -2,74 +2,74 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Join w long:                             Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Join w long wholestage off                    4464 / 4483          4.7         212.9       1.0X
-Join w long wholestage on                      289 /  339         72.6          13.8      15.5X
+Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w long wholestage off                         4685           4814         182          4.5         223.4       1.0X
+Join w long wholestage on                           440            524         102         47.7          21.0      10.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Join w long duplicated:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off         5662 / 5678          3.7         270.0       1.0X
-Join w long duplicated wholestage on           332 /  345         63.1          15.8      17.0X
+Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w long duplicated wholestage off              6266           6291          35          3.3         298.8       1.0X
+Join w long duplicated wholestage on                340            359          27         61.7          16.2      18.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Join w 2 ints:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off              173174 / 173183          0.1        8257.6       1.0X
-Join w 2 ints wholestage on               166350 / 198362          0.1        7932.2       1.0X
+Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 ints wholestage off                     174733         174916         259          0.1        8331.9       1.0X
+Join w 2 ints wholestage on                      166815         167619         823          0.1        7954.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Join w 2 longs:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                 7055 / 7214          3.0         336.4       1.0X
-Join w 2 longs wholestage on                  1869 / 1985         11.2          89.1       3.8X
+Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 longs wholestage off                      7835           7889          76          2.7         373.6       1.0X
+Join w 2 longs wholestage on                       2057           2148          73         10.2          98.1       3.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Join w 2 longs duplicated:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off    19256 / 20283          1.1         918.2       1.0X
-Join w 2 longs duplicated wholestage on       2467 / 2544          8.5         117.7       7.8X
+Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Join w 2 longs duplicated wholestage off          19435          19497          87          1.1         926.7       1.0X
+Join w 2 longs duplicated wholestage on            2472           2528          57          8.5         117.9       7.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-outer join w long:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-outer join w long wholestage off              3756 / 3761          5.6         179.1       1.0X
-outer join w long wholestage on                218 /  250         96.2          10.4      17.2X
+outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+outer join w long wholestage off                   3945           3992          66          5.3         188.1       1.0X
+outer join w long wholestage on                     227            235          11         92.5          10.8      17.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-semi join w long:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-semi join w long wholestage off               2393 / 2416          8.8         114.1       1.0X
-semi join w long wholestage on                 214 /  218         97.9          10.2      11.2X
+semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+semi join w long wholestage off                    2436           2469          46          8.6         116.2       1.0X
+semi join w long wholestage on                      231            242          11         91.0          11.0      10.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-sort merge join:                              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sort merge join wholestage off                     2318 / 2392          0.9        1105.3       1.0X
-sort merge join wholestage on                      1669 / 1811          1.3         795.9       1.4X
+sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort merge join wholestage off                     1820           1825           7          1.2         867.9       1.0X
+sort merge join wholestage on                      1507           1552          34          1.4         718.8       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-sort merge join with duplicates:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off      2966 / 2976          0.7        1414.5       1.0X
-sort merge join with duplicates wholestage on      2413 / 2641          0.9        1150.5       1.2X
+sort merge join with duplicates:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort merge join with duplicates wholestage off           2319           2380          86          0.9        1105.7       1.0X
+sort merge join with duplicates wholestage on           2087           2139          51          1.0         995.3       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-shuffle hash join:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off              1475 / 1479          2.8         351.7       1.0X
-shuffle hash join wholestage on               1209 / 1238          3.5         288.3       1.2X
+shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+shuffle hash join wholestage off                   1554           1591          52          2.7         370.6       1.0X
+shuffle hash join wholestage on                    1142           1200          51          3.7         272.2       1.4X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt
new file mode 100644
index 0000000000000..56d371eade685
--- /dev/null
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -0,0 +1,112 @@
+================================================================================================
+Benchmark for performance of JSON parsing
+================================================================================================
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       69387          69850         407          1.4         693.9       1.0X
+UTF-8 is set                                     112131         112205          83          0.9        1121.3       0.6X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       44542          44671         122          2.2         445.4       1.0X
+UTF-8 is set                                      71793          71945         146          1.4         717.9       0.6X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                       58615          59011         347          0.2        5861.5       1.0X
+UTF-8 is set                                     102542         102719         153          0.1       10254.2       0.6X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+No encoding                                      168861         170014        1552          0.0      337722.0       1.0X
+UTF-8 is set                                     191140         191250         112          0.0      382280.3       0.9X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select 10 columns                                 28017          28066          47          0.4        2801.7       1.0X
+Select 1 column                                   24590          24641          67          0.4        2459.0       1.1X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Short column without encoding                     17179          17465         487          0.6        1717.9       1.0X
+Short column with UTF-8                           25173          25255         139          0.4        2517.3       0.7X
+Wide column without encoding                     146956         147069         104          0.1       14695.6       0.1X
+Wide column with UTF-8                           196626         197233         549          0.1       19662.6       0.1X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                          1206           1212           5          8.3         120.6       1.0X
+from_json                                         27641          27680          34          0.4        2764.1       0.0X
+json_tuple                                        43404          44377         860          0.2        4340.4       0.0X
+get_json_object                                   26821          27239         619          0.4        2682.1       0.0X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                          5842           5865          33          8.6         116.8       1.0X
+schema inferring                                  69673          70082         478          0.7        1393.5       0.1X
+parsing                                           78805          81812         NaN          0.6        1576.1       0.1X
+
+Preparing data for benchmarking ...
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Text read                                         10782          10801          18          4.6         215.6       1.0X
+Schema inferring                                  76817          77205         623          0.7        1536.3       0.1X
+Parsing without charset                           90638          91395         794          0.6        1812.8       0.1X
+Parsing with UTF-8                               120085         121975        1705          0.4        2401.7       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Create a dataset of timestamps                     4706           4717           9          2.1         470.6       1.0X
+to_json(timestamp)                                29447          29615         226          0.3        2944.7       0.2X
+write timestamps to files                         20251          20673         503          0.5        2025.1       0.2X
+Create a dataset of dates                          4157           4172          18          2.4         415.7       1.1X
+to_json(date)                                     21267          21301          53          0.5        2126.7       0.2X
+write dates to files                              13477          13897         485          0.7        1347.7       0.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+read timestamp text from files                     2666           2687          29          3.8         266.6       1.0X
+read timestamps from files                        46967          47354         438          0.2        4696.7       0.1X
+infer timestamps from files                       97693          97745          65          0.1        9769.3       0.0X
+read date text from files                          2594           2599           5          3.9         259.4       1.0X
+read date from files                              35796          36008         195          0.3        3579.6       0.1X
+timestamp strings                                  6367           6424          84          1.6         636.7       0.4X
+parse timestamps from Dataset[String]             58863          59255         340          0.2        5886.3       0.0X
+infer timestamps from Dataset[String]            114148         114820         836          0.1       11414.8       0.0X
+date strings                                       7847           7863          22          1.3         784.7       0.3X
+parse dates from Dataset[String]                  49085          49289         212          0.2        4908.5       0.1X
+from_json(timestamp)                              77030          77335         395          0.1        7703.0       0.0X
+from_json(date)                                   63275          63290          15          0.2        6327.5       0.0X
+
+
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk11-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..f7332d4cea16e
--- /dev/null
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk11-results.txt
@@ -0,0 +1,22 @@
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+prepare make_date()                                2952           3104         224         33.9          29.5       1.0X
+make_date(2019, 9, 16)                             2341           2585         359         42.7          23.4       1.3X
+make_date(*, *, *)                                 4751           4808          53         21.0          47.5       0.6X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+make_timestamp():                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+prepare make_timestamp()                           3462           3585         143          0.3        3461.8       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)            114            120           6          8.7         114.4      30.3X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             74             82          13         13.5          74.0      46.8X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             82             95          11         12.1          82.5      42.0X
+make_timestamp(*, *, *, 3, 4, 50.123456)            293            308          14          3.4         292.9      11.8X
+make_timestamp(*, *, *, *, *, 0)                    278            284           9          3.6         278.1      12.4X
+make_timestamp(*, *, *, *, *, 60.0)                 287            289           2          3.5         287.3      12.0X
+make_timestamp(2019, 1, 2, *, *, *)                3556           3574          21          0.3        3555.7       1.0X
+make_timestamp(*, *, *, *, *, *)                   3578           3590          16          0.3        3577.9       1.0X
+
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
new file mode 100644
index 0000000000000..2b184fd26eeeb
--- /dev/null
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
@@ -0,0 +1,22 @@
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+prepare make_date()                                2990           3159         267         33.4          29.9       1.0X
+make_date(2019, 9, 16)                             2376           2446          90         42.1          23.8       1.3X
+make_date(*, *, *)                                 4751           4786          31         21.0          47.5       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+make_timestamp():                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+prepare make_timestamp()                           3855           3936         104          0.3        3855.3       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             83             87           5         12.0          83.1      46.4X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             73             82           9         13.8          72.7      53.0X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             66             74           9         15.2          65.8      58.6X
+make_timestamp(*, *, *, 3, 4, 50.123456)            266            277          11          3.8         265.9      14.5X
+make_timestamp(*, *, *, *, *, 0)                    268            275          13          3.7         267.6      14.4X
+make_timestamp(*, *, *, *, *, 60.0)                 272            273           0          3.7         272.4      14.2X
+make_timestamp(2019, 1, 2, *, *, *)                3940           3966          24          0.3        3940.4       1.0X
+make_timestamp(*, *, *, *, *, *)                   3867           3917          68          0.3        3867.4       1.0X
+
diff --git a/sql/core/benchmarks/MiscBenchmark-jdk11-results.txt b/sql/core/benchmarks/MiscBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..991a447fe3237
--- /dev/null
+++ b/sql/core/benchmarks/MiscBenchmark-jdk11-results.txt
@@ -0,0 +1,127 @@
+================================================================================================
+filter & aggregate without group
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+range/filter/sum wholestage off                   52841          53513         951         39.7          25.2       1.0X
+range/filter/sum wholestage on                     3104           3188         109        675.6           1.5      17.0X
+
+
+================================================================================================
+range/limit/sum
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+range/limit/sum wholestage off                      199            222          32       2633.5           0.4       1.0X
+range/limit/sum wholestage on                       131            138           7       3991.0           0.3       1.5X
+
+
+================================================================================================
+sample
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sample with replacement wholestage off            13015          13441         602         10.1          99.3       1.0X
+sample with replacement wholestage on              7688           7745          91         17.0          58.7       1.7X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+sample without replacement:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sample without replacement wholestage off           2990           2993           4         43.8          22.8       1.0X
+sample without replacement wholestage on           1172           1186          13        111.9           8.9       2.6X
+
+
+================================================================================================
+collect
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+collect 1 million                                   317            425         188          3.3         301.9       1.0X
+collect 2 millions                                  575            598          26          1.8         548.2       0.6X
+collect 4 millions                                 1350           1848         704          0.8        1287.6       0.2X
+
+
+================================================================================================
+collect limit
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+collect limit 1 million                             375            387          14          2.8         358.0       1.0X
+collect limit 2 millions                            714            736          28          1.5         681.3       0.5X
+
+
+================================================================================================
+generate explode
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate explode array wholestage off             19669          19829         227          0.9        1172.3       1.0X
+generate explode array wholestage on              10983          11020          35          1.5         654.7       1.8X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate explode map wholestage off               57756          57928         243          0.3        3442.5       1.0X
+generate explode map wholestage on                47398          47703         250          0.4        2825.2       1.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate posexplode array wholestage off          21402          21525         174          0.8        1275.7       1.0X
+generate posexplode array wholestage on           11898          11982          67          1.4         709.2       1.8X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate inline array wholestage off              15570          15597          37          1.1         928.1       1.0X
+generate inline array wholestage on               10044          10161          87          1.7         598.6       1.6X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate big struct array wholestage off            601            615          19          0.1       10023.5       1.0X
+generate big struct array wholestage on             388            410          25          0.2        6458.7       1.6X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate big nested struct array:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate big nested struct array wholestage off            492            525          48          0.1        8191.7       1.0X
+generate big nested struct array wholestage on            468            496          30          0.1        7796.4       1.1X
+
+
+================================================================================================
+generate regular generator
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate stack wholestage off                     28014          28091         109          0.6        1669.8       1.0X
+generate stack wholestage on                      19246          19267          27          0.9        1147.2       1.5X
+
+
diff --git a/sql/core/benchmarks/MiscBenchmark-results.txt b/sql/core/benchmarks/MiscBenchmark-results.txt
index c4ae052095656..012e81b03c8df 100644
--- a/sql/core/benchmarks/MiscBenchmark-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-range/filter/sum:                        Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off             47752 / 48952         43.9          22.8       1.0X
-range/filter/sum wholestage on                3123 / 3558        671.5           1.5      15.3X
+range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+range/filter/sum wholestage off                   45894          47528        2310         45.7          21.9       1.0X
+range/filter/sum wholestage on                     3193           3572         213        656.8           1.5      14.4X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-range/limit/sum:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                 229 /  236       2288.9           0.4       1.0X
-range/limit/sum wholestage on                  257 /  267       2041.0           0.5       0.9X
+range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+range/limit/sum wholestage off                      202            207           7       2593.1           0.4       1.0X
+range/limit/sum wholestage on                       148            161          20       3545.1           0.3       1.4X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-sample with replacement:                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sample with replacement wholestage off      12908 / 13076         10.2          98.5       1.0X
-sample with replacement wholestage on         7334 / 7346         17.9          56.0       1.8X
+sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sample with replacement wholestage off            12337          12657         452         10.6          94.1       1.0X
+sample with replacement wholestage on              7355           7368          14         17.8          56.1       1.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-sample without replacement:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sample without replacement wholestage off      3082 / 3095         42.5          23.5       1.0X
-sample without replacement wholestage on      1125 / 1211        116.5           8.6       2.7X
+sample without replacement:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sample without replacement wholestage off           3050           3060          14         43.0          23.3       1.0X
+sample without replacement wholestage on           1103           1118          22        118.8           8.4       2.8X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-collect:                                 Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-collect 1 million                              291 /  311          3.6         277.3       1.0X
-collect 2 millions                             552 /  564          1.9         526.6       0.5X
-collect 4 millions                            1104 / 1108          0.9        1053.0       0.3X
+collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+collect 1 million                                   324            361          41          3.2         309.4       1.0X
+collect 2 millions                                  619            700          70          1.7         590.4       0.5X
+collect 4 millions                                 1214           1293         111          0.9        1158.2       0.3X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-collect limit:                           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-collect limit 1 million                        311 /  340          3.4         296.2       1.0X
-collect limit 2 millions                       581 /  614          1.8         554.4       0.5X
+collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+collect limit 1 million                             391            396           5          2.7         373.0       1.0X
+collect limit 2 millions                            746            769          26          1.4         711.8       0.5X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate explode array:                  Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate explode array wholestage off       15211 / 15368          1.1         906.6       1.0X
-generate explode array wholestage on        10761 / 10776          1.6         641.4       1.4X
+generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate explode array wholestage off             15063          16007        1335          1.1         897.8       1.0X
+generate explode array wholestage on              10909          10932          20          1.5         650.2       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate explode map:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate explode map wholestage off         22128 / 22578          0.8        1318.9       1.0X
-generate explode map wholestage on          16421 / 16520          1.0         978.8       1.3X
+generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate explode map wholestage off               55011          55580         806          0.3        3278.9       1.0X
+generate explode map wholestage on                46009          46445         399          0.4        2742.3       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate posexplode array:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off    17108 / 18019          1.0        1019.7       1.0X
-generate posexplode array wholestage on     11715 / 11804          1.4         698.3       1.5X
+generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate posexplode array wholestage off          17987          18866        1244          0.9        1072.1       1.0X
+generate posexplode array wholestage on           11400          11444          38          1.5         679.5       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate inline array:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate inline array wholestage off        16358 / 16418          1.0         975.0       1.0X
-generate inline array wholestage on         11152 / 11472          1.5         664.7       1.5X
+generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate inline array wholestage off              14905          14991         122          1.1         888.4       1.0X
+generate inline array wholestage on                9919          10129         160          1.7         591.2       1.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate big struct array:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate big struct array wholestage off       708 /  776          0.1       11803.5       1.0X
-generate big struct array wholestage on        535 /  589          0.1        8913.9       1.3X
+generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate big struct array wholestage off            508            577          98          0.1        8474.0       1.0X
+generate big struct array wholestage on             451            485          26          0.1        7523.2       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 generate big nested struct array:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off            540            553          19          0.1        8997.4       1.0X
-generate big nested struct array wholestage on            523            554          31          0.1        8725.0       1.0X
+generate big nested struct array wholestage off            537            567          43          0.1        8944.4       1.0X
+generate big nested struct array wholestage on            528            546          24          0.1        8804.2       1.0X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-generate stack:                          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-generate stack wholestage off               29082 / 29393          0.6        1733.4       1.0X
-generate stack wholestage on                21066 / 21128          0.8        1255.6       1.4X
+generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+generate stack wholestage off                     27220          27608         548          0.6        1622.5       1.0X
+generate stack wholestage on                      20070          20170         117          0.8        1196.3       1.4X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk11-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..4d35f872871f0
--- /dev/null
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk11-results.txt
@@ -0,0 +1,53 @@
+================================================================================================
+Nested Schema Pruning Benchmark For ORC v1
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    150            192          23          6.7         150.1       1.0X
+Nested column                                      1241           1289          82          0.8        1241.3       0.1X
+Nested column in array                             5466           5574         114          0.2        5465.6       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    463            515          30          2.2         462.6       1.0X
+Nested column                                      1920           1967          27          0.5        1920.1       0.2X
+Nested column in array                             6565           6685         123          0.2        6564.8       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    387            431          80          2.6         387.0       1.0X
+Nested column                                      1846           1865          13          0.5        1846.2       0.2X
+Nested column in array                             6458           6569         114          0.2        6458.2       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    391            405          13          2.6         390.9       1.0X
+Nested column                                      4597           4684          81          0.2        4597.4       0.1X
+Nested column in array                             9434           9499          54          0.1        9433.9       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    131            165          27          7.6         130.9       1.0X
+Nested column                                      1258           1298          31          0.8        1257.7       0.1X
+Nested column in array                             5360           5393          16          0.2        5359.7       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    584            632          63          1.7         583.7       1.0X
+Nested column                                      5211           5384         108          0.2        5211.1       0.1X
+Nested column in array                            10279          10397          94          0.1       10279.0       0.1X
+
+
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
index 765193d6c6436..0dd85968b937e 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    127            163          24          7.9         127.1       1.0X
-Nested column                                       974           1023          39          1.0         974.2       0.1X
-Nested column in array                             4834           4857          23          0.2        4834.1       0.0X
+Top-level column                                    131            167          24          7.6         131.0       1.0X
+Nested column                                      1296           1340          33          0.8        1296.0       0.1X
+Nested column in array                             5568           5745         243          0.2        5567.8       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    454            488          45          2.2         454.3       1.0X
-Nested column                                      1539           1602          80          0.6        1539.3       0.3X
-Nested column in array                             5765           5848          69          0.2        5764.7       0.1X
+Top-level column                                    432            468          33          2.3         431.6       1.0X
+Nested column                                      1778           1828          35          0.6        1777.7       0.2X
+Nested column in array                             6565           6727         137          0.2        6565.1       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    365            395          58          2.7         364.9       1.0X
-Nested column                                      1456           1477          23          0.7        1456.0       0.3X
-Nested column in array                             5734           5842          91          0.2        5734.4       0.1X
+Top-level column                                    365            406          60          2.7         365.3       1.0X
+Nested column                                      1803           1821          22          0.6        1803.2       0.2X
+Nested column in array                             6453           6530         111          0.2        6453.5       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    373            387          15          2.7         372.8       1.0X
-Nested column                                      4349           4397          59          0.2        4348.8       0.1X
-Nested column in array                             8893           8971          73          0.1        8893.2       0.0X
+Top-level column                                    370            390          30          2.7         369.6       1.0X
+Nested column                                      4955           5028          61          0.2        4955.0       0.1X
+Nested column in array                            10490          10553          49          0.1       10490.5       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    130            159          24          7.7         129.9       1.0X
-Nested column                                      1160           1216          50          0.9        1159.8       0.1X
-Nested column in array                             5297           5420         176          0.2        5296.8       0.0X
+Top-level column                                    135            150          18          7.4         134.5       1.0X
+Nested column                                      1615           1717          78          0.6        1615.0       0.1X
+Nested column in array                             6919           7024          53          0.1        6919.4       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    585            615          60          1.7         585.5       1.0X
-Nested column                                      4972           5213         156          0.2        4972.2       0.1X
-Nested column in array                            10095          10156          32          0.1       10095.4       0.1X
+Top-level column                                    580            619          28          1.7         580.4       1.0X
+Nested column                                      5547           5752         185          0.2        5547.3       0.1X
+Nested column in array                            11639          11838         183          0.1       11639.4       0.0X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk11-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..0036510d62fc9
--- /dev/null
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk11-results.txt
@@ -0,0 +1,53 @@
+================================================================================================
+Nested Schema Pruning Benchmark For ORC v2
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    150            187          20          6.7         150.2       1.0X
+Nested column                                      1370           1463         104          0.7        1369.8       0.1X
+Nested column in array                             6575           6656          66          0.2        6575.2       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    134            161          20          7.5         133.7       1.0X
+Nested column                                      1532           1550          15          0.7        1532.4       0.1X
+Nested column in array                             6601           6688         126          0.2        6601.4       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    409            491         190          2.4         409.5       1.0X
+Nested column                                      1896           1938          60          0.5        1896.2       0.2X
+Nested column in array                             7414           7472          43          0.1        7414.0       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    408            433          37          2.5         408.1       1.0X
+Nested column                                      4703           4795         101          0.2        4703.4       0.1X
+Nested column in array                            10477          10556          61          0.1       10476.6       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    130            164          31          7.7         130.3       1.0X
+Nested column                                      1471           1513          44          0.7        1470.5       0.1X
+Nested column in array                             6492           6551          55          0.2        6492.0       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    268            283          14          3.7         268.4       1.0X
+Nested column                                      3347           3401          77          0.3        3347.3       0.1X
+Nested column in array                             9297           9433         237          0.1        9296.6       0.0X
+
+
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
index fdd347f4bad9b..8aff152f0c136 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    122            161          29          8.2         121.9       1.0X
-Nested column                                      1255           1279          23          0.8        1255.4       0.1X
-Nested column in array                             5352           5393          37          0.2        5352.3       0.0X
+Top-level column                                    141            161          22          7.1         140.6       1.0X
+Nested column                                      1425           1455          26          0.7        1424.7       0.1X
+Nested column in array                             5248           5300          46          0.2        5247.5       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    132            162          32          7.6         131.8       1.0X
-Nested column                                      1246           1286          32          0.8        1245.6       0.1X
-Nested column in array                             5395           5542         143          0.2        5394.9       0.0X
+Top-level column                                    133            163          22          7.5         132.8       1.0X
+Nested column                                      1254           1308          40          0.8        1254.0       0.1X
+Nested column in array                             5303           5418          81          0.2        5303.3       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    385            403          20          2.6         385.4       1.0X
-Nested column                                      1663           1691          52          0.6        1663.2       0.2X
-Nested column in array                             6264           6335          73          0.2        6264.4       0.1X
+Top-level column                                    377            401          19          2.7         376.7       1.0X
+Nested column                                      1676           1722          21          0.6        1676.1       0.2X
+Nested column in array                             6019           6127         109          0.2        6018.7       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    392            422          58          2.5         392.2       1.0X
-Nested column                                      4104           4153          57          0.2        4104.0       0.1X
-Nested column in array                             8668           8748          55          0.1        8668.3       0.0X
+Top-level column                                    390            447         151          2.6         390.1       1.0X
+Nested column                                      4300           4364          60          0.2        4299.7       0.1X
+Nested column in array                             8832           9030         114          0.1        8832.4       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    130            146          22          7.7         130.1       1.0X
-Nested column                                      1127           1166          53          0.9        1127.3       0.1X
-Nested column in array                             4906           4968          40          0.2        4905.8       0.0X
+Top-level column                                    132            143           7          7.6         131.6       1.0X
+Nested column                                      1260           1303          20          0.8        1260.2       0.1X
+Nested column in array                             5359           5453          74          0.2        5359.1       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    291            308          25          3.4         290.5       1.0X
-Nested column                                      3016           3091          58          0.3        3016.0       0.1X
-Nested column in array                             7730           7821         140          0.1        7729.5       0.0X
+Top-level column                                    288            302          20          3.5         287.6       1.0X
+Nested column                                      3169           3242          53          0.3        3168.7       0.1X
+Nested column in array                             8151           8301         123          0.1        8151.3       0.0X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk11-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..4535220aeab0b
--- /dev/null
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk11-results.txt
@@ -0,0 +1,53 @@
+================================================================================================
+Nested Schema Pruning Benchmark For Parquet
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    164            191          18          6.1         163.6       1.0X
+Nested column                                       416            444          26          2.4         415.7       0.4X
+Nested column in array                             1349           1398          39          0.7        1349.4       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    153            169          14          6.5         152.7       1.0X
+Nested column                                       540            566          26          1.9         540.5       0.3X
+Nested column in array                             1378           1447          87          0.7        1378.0       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    414            433          26          2.4         414.0       1.0X
+Nested column                                       736            777          56          1.4         736.2       0.6X
+Nested column in array                             1895           1954          86          0.5        1895.2       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    402            436          49          2.5         402.1       1.0X
+Nested column                                      3190           3225          26          0.3        3190.4       0.1X
+Nested column in array                             3749           3806          68          0.3        3748.7       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    135            159          25          7.4         134.5       1.0X
+Nested column                                       462            497          36          2.2         462.4       0.3X
+Nested column in array                             1453           1508          45          0.7        1453.5       0.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Top-level column                                    277            301          27          3.6         277.3       1.0X
+Nested column                                      2057           2097          33          0.5        2057.3       0.1X
+Nested column in array                             3027           3134         122          0.3        3027.4       0.1X
+
+
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
index 4e0c368b5370e..182a1aed497a3 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    151            174          16          6.6         151.3       1.0X
-Nested column                                       316            375          88          3.2         315.7       0.5X
-Nested column in array                             1277           1292          11          0.8        1277.0       0.1X
+Top-level column                                    149            183          22          6.7         148.8       1.0X
+Nested column                                       413            436          20          2.4         413.4       0.4X
+Nested column in array                             1309           1327          24          0.8        1308.9       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    452            501          67          2.2         451.9       1.0X
-Nested column                                       664            722          77          1.5         664.4       0.7X
-Nested column in array                             1906           1997          91          0.5        1905.6       0.2X
+Top-level column                                    160            184          17          6.3         159.5       1.0X
+Nested column                                       436            481          77          2.3         436.5       0.4X
+Nested column in array                             1354           1381          26          0.7        1353.6       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    385            410          39          2.6         385.5       1.0X
-Nested column                                       612            620          10          1.6         611.9       0.6X
-Nested column in array                             1790           1845          80          0.6        1789.5       0.2X
+Top-level column                                    378            412          61          2.6         378.0       1.0X
+Nested column                                       744            790          51          1.3         744.0       0.5X
+Nested column in array                             1915           1987          76          0.5        1915.0       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    386            402          26          2.6         386.0       1.0X
-Nested column                                      2982           3057          64          0.3        2982.0       0.1X
-Nested column in array                             3504           3690         248          0.3        3503.7       0.1X
+Top-level column                                    389            407          16          2.6         388.6       1.0X
+Nested column                                      3152           3217          48          0.3        3151.9       0.1X
+Nested column in array                             3698           3860         274          0.3        3698.2       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    138            152          10          7.2         138.3       1.0X
-Nested column                                       345            369          16          2.9         344.8       0.4X
-Nested column in array                             1358           1405          50          0.7        1358.5       0.1X
+Top-level column                                    143            170          27          7.0         142.7       1.0X
+Nested column                                       450            461           8          2.2         449.9       0.3X
+Nested column in array                             1411           1460          41          0.7        1411.2       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_212-b04 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    606            632          45          1.6         606.3       1.0X
-Nested column                                      3586           3679         107          0.3        3585.8       0.2X
-Nested column in array                             4452           4831         244          0.2        4451.8       0.1X
+Top-level column                                    293            309          22          3.4         292.9       1.0X
+Nested column                                      2109           2142          19          0.5        2109.1       0.1X
+Nested column in array                             3018           3096          79          0.3        3017.7       0.1X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk11-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..0e5a051d92c90
--- /dev/null
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk11-results.txt
@@ -0,0 +1,12 @@
+================================================================================================
+Write primitive arrays in dataset
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                 636            681          50         13.2          75.8       1.0X
+Double                                              727            809          71         11.5          86.7       0.9X
+
+
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
index b06b5c092b61a..837145e7c93a5 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
@@ -2,12 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
-
-Write an array in Dataset:               Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Int                                            437 /  529         19.2          52.1       1.0X
-Double                                         638 /  670         13.1          76.1       0.7X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                 631            684          65         13.3          75.2       1.0X
+Double                                              792            854          61         10.6          94.4       0.8X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-jdk11-results.txt b/sql/core/benchmarks/RangeBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..dc86fb686fb77
--- /dev/null
+++ b/sql/core/benchmarks/RangeBenchmark-jdk11-results.txt
@@ -0,0 +1,15 @@
+================================================================================================
+range
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+full scan                                         20357          21595        1692         25.8          38.8       1.0X
+limit after range                                   115            130          15       4563.9           0.2     177.2X
+filter after range                                 1890           1906          21        277.5           3.6      10.8X
+count after range                                    85             87           3       6200.7           0.2     240.8X
+count after limit after range                       101            109           9       5213.9           0.2     202.4X
+
+
diff --git a/sql/core/benchmarks/RangeBenchmark-results.txt b/sql/core/benchmarks/RangeBenchmark-results.txt
index 21766e0fd8664..7b6daf9b9c40d 100644
--- a/sql/core/benchmarks/RangeBenchmark-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-results.txt
@@ -2,15 +2,14 @@
 range
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_161-b12 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
-
-range:                                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-full scan                                   12674 / 12840         41.4          24.2       1.0X
-limit after range                               33 /   37      15900.2           0.1     384.4X
-filter after range                             969 /  985        541.0           1.8      13.1X
-count after range                               42 /   42      12510.5           0.1     302.4X
-count after limit after range                   32 /   33      16337.0           0.1     394.9X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+full scan                                         19094          19768         459         27.5          36.4       1.0X
+limit after range                                    90            108          19       5803.9           0.2     211.4X
+filter after range                                 1807           1820          12        290.1           3.4      10.6X
+count after range                                    88             93           6       5941.4           0.2     216.4X
+count after limit after range                        82             86           4       6398.2           0.2     233.0X
 
 
diff --git a/sql/core/benchmarks/SortBenchmark-jdk11-results.txt b/sql/core/benchmarks/SortBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..a61989b174bf5
--- /dev/null
+++ b/sql/core/benchmarks/SortBenchmark-jdk11-results.txt
@@ -0,0 +1,16 @@
+================================================================================================
+radix sort
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+reference TimSort key prefix array                15588          15592           6          1.6         623.5       1.0X
+reference Arrays.sort                              3367           3377          14          7.4         134.7       4.6X
+radix sort one byte                                 436            447          10         57.3          17.5      35.7X
+radix sort two bytes                                875            885           9         28.6          35.0      17.8X
+radix sort eight bytes                             3409           3429          27          7.3         136.4       4.6X
+radix sort key prefix array                        5985           6058         104          4.2         239.4       2.6X
+
+
diff --git a/sql/core/benchmarks/SortBenchmark-results.txt b/sql/core/benchmarks/SortBenchmark-results.txt
index 0d00a0c89d02d..b321d1115b727 100644
--- a/sql/core/benchmarks/SortBenchmark-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-results.txt
@@ -2,16 +2,15 @@
 radix sort
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_162-b12 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-7820HQ CPU @ 2.90GHz
-
-radix sort 25000000:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-reference TimSort key prefix array          11770 / 11960          2.1         470.8       1.0X
-reference Arrays.sort                         2106 / 2128         11.9          84.3       5.6X
-radix sort one byte                             93 /  100        269.7           3.7     126.9X
-radix sort two bytes                           171 /  179        146.0           6.9      68.7X
-radix sort eight bytes                         659 /  664         37.9          26.4      17.9X
-radix sort key prefix array                   1024 / 1053         24.4          41.0      11.5X
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+reference TimSort key prefix array                15736          15778          59          1.6         629.4       1.0X
+reference Arrays.sort                              3051           3057          10          8.2         122.0       5.2X
+radix sort one byte                                 442            453          10         56.6          17.7      35.6X
+radix sort two bytes                                883            885           1         28.3          35.3      17.8X
+radix sort eight bytes                             3422           3440          26          7.3         136.9       4.6X
+radix sort key prefix array                        6025           6138         159          4.1         241.0       2.6X
 
 
diff --git a/sql/core/benchmarks/UDFBenchmark-jdk11-results.txt b/sql/core/benchmarks/UDFBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..3454c1bccda20
--- /dev/null
+++ b/sql/core/benchmarks/UDFBenchmark-jdk11-results.txt
@@ -0,0 +1,59 @@
+================================================================================================
+UDF with mixed input types
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int/string to string:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int/string to string wholestage off            289            302          18          0.3        2893.0       1.0X
+long/nullable int/string to string wholestage on            141            165          20          0.7        1405.4       2.1X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int/string to option:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int/string to option wholestage off            112            120          11          0.9        1124.6       1.0X
+long/nullable int/string to option wholestage on             68             77           8          1.5         681.3       1.7X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int/string to primitive:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int/string to primitive wholestage off             74             79           6          1.3         744.3       1.0X
+long/nullable int/string to primitive wholestage on             71             83          10          1.4         713.5       1.0X
+
+
+================================================================================================
+UDF with primitive types
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int to string:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int to string wholestage off             94             99           6          1.1         942.4       1.0X
+long/nullable int to string wholestage on             58             60           1          1.7         584.3       1.6X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int to option:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int to option wholestage off             43             46           5          2.3         425.7       1.0X
+long/nullable int to option wholestage on             42             49           7          2.4         423.2       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+long/nullable int to primitive:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+long/nullable int to primitive wholestage off             40             41           1          2.5         401.7       1.0X
+long/nullable int to primitive wholestage on             40             44           5          2.5         400.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Baseline                                             35             40           6          2.9         348.2       1.0X
+With identity UDF                                    38             38           0          2.6         380.4       0.9X
+
+
diff --git a/sql/core/benchmarks/UDFBenchmark-results.txt b/sql/core/benchmarks/UDFBenchmark-results.txt
index 3dfd0c1caeb28..1490b4439640c 100644
--- a/sql/core/benchmarks/UDFBenchmark-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int/string to string:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off            194            248          76          0,5        1941,4       1,0X
-long/nullable int/string to string wholestage on            127            136           8          0,8        1269,5       1,5X
+long/nullable int/string to string wholestage off            242            308          94          0.4        2416.5       1.0X
+long/nullable int/string to string wholestage on            141            179          25          0.7        1410.3       1.7X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int/string to option:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             91             97           8          1,1         910,1       1,0X
-long/nullable int/string to option wholestage on             60             79          29          1,7         603,8       1,5X
+long/nullable int/string to option wholestage off             91             97           9          1.1         908.1       1.0X
+long/nullable int/string to option wholestage on             77             91          12          1.3         774.4       1.2X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int/string to primitive:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             55             63          12          1,8         547,9       1,0X
-long/nullable int/string to primitive wholestage on             43             44           2          2,3         428,0       1,3X
+long/nullable int/string to primitive wholestage off             68             75           9          1.5         684.7       1.0X
+long/nullable int/string to primitive wholestage on             62             64           3          1.6         619.8       1.1X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int to string:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             46             48           2          2,2         461,2       1,0X
-long/nullable int to string wholestage on             49             56           8          2,0         488,9       0,9X
+long/nullable int to string wholestage off             67             68           0          1.5         672.2       1.0X
+long/nullable int to string wholestage on             66             72          11          1.5         660.5       1.0X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int to option:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             41             47           9          2,4         408,2       1,0X
-long/nullable int to option wholestage on             26             28           2          3,9         256,7       1,6X
+long/nullable int to option wholestage off             53             57           5          1.9         528.1       1.0X
+long/nullable int to option wholestage on             41             44           4          2.4         410.2       1.3X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 long/nullable int to primitive:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             26             27           0          3,8         263,7       1,0X
-long/nullable int to primitive wholestage on             26             31           5          3,8         262,2       1,0X
+long/nullable int to primitive wholestage off             46             48           4          2.2         459.1       1.0X
+long/nullable int to primitive wholestage on             40             40           0          2.5         398.0       1.2X
 
-Java HotSpot(TM) 64-Bit Server VM 1.8.0_152-b16 on Mac OS X 10.13.6
-Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                             20             22           1          4,9         204,3       1,0X
-With identity UDF                                    24             26           2          4,1         241,3       0,8X
+Baseline                                             53             58           4          1.9         525.6       1.0X
+With identity UDF                                    38             38           0          2.7         376.3       1.4X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk11-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..577cdb0db62b3
--- /dev/null
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk11-results.txt
@@ -0,0 +1,33 @@
+================================================================================================
+Benchmark UnsafeArrayData
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                 183            184           0        914.7           1.1       1.0X
+Double                                              188            189           0        891.1           1.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  31             32           1        670.0           1.5       1.0X
+Double                                               70             71           1        300.3           3.3       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  71             73           3        887.3           1.1       1.0X
+Double                                              144            151           6        438.2           2.3       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  69             70           1        906.7           1.1       1.0X
+Double                                              140            142           1        448.8           2.2       0.5X
+
+
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
index 4ecc1f1fad4b9..6c4669d7ea442 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Read UnsafeArrayData:                    Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Int                                            233 /  234        718.6           1.4       1.0X
-Double                                         244 /  244        687.0           1.5       1.0X
+Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                 183            183           0        919.0           1.1       1.0X
+Double                                              192            196           4        872.4           1.1       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Write UnsafeArrayData:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Int                                             32 /   33        658.6           1.5       1.0X
-Double                                          73 /   75        287.0           3.5       0.4X
+Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  33             36           3        639.3           1.6       1.0X
+Double                                               76             79           2        274.6           3.6       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Get primitive array from UnsafeArrayData: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Int                                             70 /   72        895.0           1.1       1.0X
-Double                                         141 /  143        446.9           2.2       0.5X
+Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  71             76           4        890.8           1.1       1.0X
+Double                                              147            151           6        427.4           2.3       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Create UnsafeArrayData from primitive array: Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Int                                             72 /   73        874.7           1.1       1.0X
-Double                                         145 /  146        433.7           2.3       0.5X
+Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Int                                                  73             75           3        866.8           1.2       1.0X
+Double                                              148            150           1        424.8           2.4       0.5X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-jdk11-results.txt b/sql/core/benchmarks/WideTableBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..5ba8e8ff9a62b
--- /dev/null
+++ b/sql/core/benchmarks/WideTableBenchmark-jdk11-results.txt
@@ -0,0 +1,17 @@
+================================================================================================
+projection on wide table
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.4+11-LTS on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+split threshold 10                                42942          43023         108          0.0       40952.8       1.0X
+split threshold 100                               37236          37310          93          0.0       35510.9       1.2X
+split threshold 1024                              12198          12260          56          0.1       11633.1       3.5X
+split threshold 2048                               9789           9860          76          0.1        9335.2       4.4X
+split threshold 4096                              10120          10161          63          0.1        9651.0       4.2X
+split threshold 8192                               9961           9972           7          0.1        9499.1       4.3X
+split threshold 65536                             58987          59087          95          0.0       56254.5       0.7X
+
+
diff --git a/sql/core/benchmarks/WideTableBenchmark-results.txt b/sql/core/benchmarks/WideTableBenchmark-results.txt
index 8c09f9ca11307..eeb8c34232382 100644
--- a/sql/core/benchmarks/WideTableBenchmark-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-projection on wide table:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-split threshold 10                          40571 / 40937          0.0       38691.7       1.0X
-split threshold 100                         31116 / 31669          0.0       29674.6       1.3X
-split threshold 1024                        10077 / 10199          0.1        9609.7       4.0X
-split threshold 2048                          8654 / 8692          0.1        8253.2       4.7X
-split threshold 4096                          8006 / 8038          0.1        7634.7       5.1X
-split threshold 8192                          8069 / 8107          0.1        7695.3       5.0X
-split threshold 65536                       56973 / 57204          0.0       54333.7       0.7X
+projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+split threshold 10                                43737          44157         345          0.0       41711.3       1.0X
+split threshold 100                               33514          35770        1455          0.0       31961.3       1.3X
+split threshold 1024                              14959          15206         256          0.1       14265.8       2.9X
+split threshold 2048                              12024          12193         171          0.1       11467.2       3.6X
+split threshold 4096                              11378          11869         450          0.1       10851.1       3.8X
+split threshold 8192                              10588          10797         154          0.1       10097.8       4.1X
+split threshold 65536                             56801          57025         130          0.0       54169.4       0.8X
 
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 02ed6f8adaa62..461d6f9ed8a47 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -215,4 +215,16 @@
       </plugin>
     </plugins>
   </build>
+  
+  <profiles>
+    <profile>
+      <id>scala-2.13</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 </project>
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java
index 7c167dc012329..707812e5af61c 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/streaming/Offset.java
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.execution.streaming;
 
 /**
- * This class is an alias of {@link org.apache.spark.sql.sources.v2.reader.streaming.Offset}. It's
+ * This class is an alias of {@link org.apache.spark.sql.connector.read.streaming.Offset}. It's
  * internal and deprecated. New streaming data source implementations should use data source v2 API,
  * which will be supported in the long term.
  *
  * This class will be removed in a future release.
  */
-public abstract class Offset extends org.apache.spark.sql.sources.v2.reader.streaming.Offset {}
+public abstract class Offset extends org.apache.spark.sql.connector.read.streaming.Offset {}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index b0de3c85aaef8..7b903a3f7f148 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 private[sql] object Column {
@@ -808,7 +809,14 @@ class Column(val expr: Expression) extends Logging {
    * @group expr_ops
    * @since 2.4.0
    */
-  def isInCollection(values: scala.collection.Iterable[_]): Column = isin(values.toSeq: _*)
+  def isInCollection(values: scala.collection.Iterable[_]): Column = withExpr {
+    val hSet = values.toSet[Any]
+    if (hSet.size > SQLConf.get.optimizerInSetConversionThreshold) {
+      InSet(expr, hSet)
+    } else {
+      In(expr, values.toSeq.map(lit(_).expr))
+    }
+  }
 
   /**
    * A boolean expression that is evaluated to true if the value of this expression is contained
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 53e9f810d7c85..6dd21f114c902 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -488,7 +488,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
     }
 
     val columnEquals = df.sparkSession.sessionState.analyzer.resolver
-    val projections = df.schema.fields.map { f =>
+    val filledColumns = df.schema.fields.filter { f =>
       val typeMatches = (targetType, f.dataType) match {
         case (NumericType, dt) => dt.isInstanceOf[NumericType]
         case (StringType, dt) => dt == StringType
@@ -497,12 +497,8 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
           throw new IllegalArgumentException(s"$targetType is not matched at fillValue")
       }
       // Only fill if the column is part of the cols list.
-      if (typeMatches && cols.exists(col => columnEquals(f.name, col))) {
-        fillCol[T](f, value)
-      } else {
-        df.col(f.name)
-      }
+      typeMatches && cols.exists(col => columnEquals(f.name, col))
     }
-    df.select(projections : _*)
+    df.withColumns(filledColumns.map(_.name), filledColumns.map(fillCol[T](_, value)))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index f901005ad4fcf..b9cc25817d2f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -32,14 +32,14 @@ import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, Univocit
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions}
 import org.apache.spark.sql.catalyst.util.FailureSafeParser
+import org.apache.spark.sql.connector.catalog.SupportsRead
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.csv._
 import org.apache.spark.sql.execution.datasources.jdbc._
 import org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2Utils}
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.unsafe.types.UTF8String
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index c782e5012d8d7..3f7016df2eb42 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -17,28 +17,27 @@
 
 package org.apache.spark.sql
 
-import java.util.{Locale, Properties, UUID}
+import java.util.{Locale, Properties}
 
 import scala.collection.JavaConverters._
 
 import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, NoSuchTableException, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, InsertIntoTable, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect}
+import org.apache.spark.sql.catalyst.plans.logical.sql.InsertIntoStatement
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, SupportsWrite, TableCatalog, TableProvider, V1Table}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, LiteralValue, Transform}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, DataSourceUtils, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
 import org.apache.spark.sql.sources.BaseRelation
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.sources.v2.internal.V1Table
 import org.apache.spark.sql.types.{IntegerType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -68,7 +67,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * @since 1.4.0
    */
   def mode(saveMode: SaveMode): DataFrameWriter[T] = {
-    this.mode = Some(saveMode)
+    this.mode = saveMode
     this
   }
 
@@ -254,11 +253,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
     val maybeV2Provider = lookupV2Provider()
     if (maybeV2Provider.isDefined) {
-      if (partitioningColumns.nonEmpty) {
-        throw new AnalysisException(
-          "Cannot write data to TableProvider implementation if partition columns are specified.")
-      }
-
       val provider = maybeV2Provider.get
       val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
         provider, df.sparkSession.sessionState.conf)
@@ -268,17 +262,22 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
       provider.getTable(dsOptions) match {
         case table: SupportsWrite if table.supports(BATCH_WRITE) =>
+          if (partitioningColumns.nonEmpty) {
+            throw new AnalysisException("Cannot write data to TableProvider implementation " +
+              "if partition columns are specified.")
+          }
           lazy val relation = DataSourceV2Relation.create(table, dsOptions)
-          modeForDSV2 match {
+          mode match {
             case SaveMode.Append =>
               runCommand(df.sparkSession, "save") {
-                AppendData.byName(relation, df.logicalPlan)
+                AppendData.byName(relation, df.logicalPlan, extraOptions.toMap)
               }
 
             case SaveMode.Overwrite if table.supportsAny(TRUNCATE, OVERWRITE_BY_FILTER) =>
               // truncate the table
               runCommand(df.sparkSession, "save") {
-                OverwriteByExpression.byName(relation, df.logicalPlan, Literal(true))
+                OverwriteByExpression.byName(
+                  relation, df.logicalPlan, Literal(true), extraOptions.toMap)
               }
 
             case other =>
@@ -309,7 +308,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         sparkSession = df.sparkSession,
         className = source,
         partitionColumns = partitioningColumns.getOrElse(Nil),
-        options = extraOptions.toMap).planForWriting(modeForDSV1, df.logicalPlan)
+        options = extraOptions.toMap).planForWriting(mode, df.logicalPlan)
     }
   }
 
@@ -320,6 +319,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    * @note Unlike `saveAsTable`, `insertInto` ignores the column names and just uses position-based
    * resolution. For example:
    *
+   * @note SaveMode.ErrorIfExists and SaveMode.Ignore behave as SaveMode.Append in `insertInto` as
+   *       `insertInto` is not a table creating operation.
+   *
    * {{{
    *    scala> Seq((1, 2)).toDF("i", "j").write.mode("overwrite").saveAsTable("t1")
    *    scala> Seq((3, 4)).toDF("j", "i").write.insertInto("t1")
@@ -340,7 +342,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    */
   def insertInto(tableName: String): Unit = {
     import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, CatalogObjectIdentifier}
-    import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
     assertNotBucketed("insertInto")
 
@@ -354,15 +356,14 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
     val session = df.sparkSession
     val canUseV2 = lookupV2Provider().isDefined
-    val sessionCatalogOpt = session.sessionState.analyzer.sessionCatalog
+    val sessionCatalog = session.sessionState.analyzer.sessionCatalog
 
     session.sessionState.sqlParser.parseMultipartIdentifier(tableName) match {
       case CatalogObjectIdentifier(Some(catalog), ident) =>
         insertInto(catalog, ident)
 
-      case CatalogObjectIdentifier(None, ident)
-          if canUseV2 && sessionCatalogOpt.isDefined && ident.namespace().length <= 1 =>
-        insertInto(sessionCatalogOpt.get, ident)
+      case CatalogObjectIdentifier(None, ident) if canUseV2 && ident.namespace().length <= 1 =>
+        insertInto(sessionCatalog, ident)
 
       case AsTableIdentifier(tableIdentifier) =>
         insertInto(tableIdentifier)
@@ -373,7 +374,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   private def insertInto(catalog: CatalogPlugin, ident: Identifier): Unit = {
-    import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
     val table = catalog.asTableCatalog.loadTable(ident) match {
       case _: V1Table =>
@@ -382,9 +383,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         DataSourceV2Relation.create(t)
     }
 
-    val command = modeForDSV2 match {
-      case SaveMode.Append =>
-        AppendData.byPosition(table, df.logicalPlan)
+    val command = mode match {
+      case SaveMode.Append | SaveMode.ErrorIfExists | SaveMode.Ignore =>
+        AppendData.byPosition(table, df.logicalPlan, extraOptions.toMap)
 
       case SaveMode.Overwrite =>
         val conf = df.sparkSession.sessionState.conf
@@ -392,14 +393,10 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
           conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC
 
         if (dynamicPartitionOverwrite) {
-          OverwritePartitionsDynamic.byPosition(table, df.logicalPlan)
+          OverwritePartitionsDynamic.byPosition(table, df.logicalPlan, extraOptions.toMap)
         } else {
-          OverwriteByExpression.byPosition(table, df.logicalPlan, Literal(true))
+          OverwriteByExpression.byPosition(table, df.logicalPlan, Literal(true), extraOptions.toMap)
         }
-
-      case other =>
-        throw new AnalysisException(s"insertInto does not support $other mode, " +
-          s"please use Append or Overwrite mode instead.")
     }
 
     runCommand(df.sparkSession, "insertInto") {
@@ -409,11 +406,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
   private def insertInto(tableIdent: TableIdentifier): Unit = {
     runCommand(df.sparkSession, "insertInto") {
-      InsertIntoTable(
+      InsertIntoStatement(
         table = UnresolvedRelation(tableIdent),
-        partition = Map.empty[String, Option[String]],
+        partitionSpec = Map.empty[String, Option[String]],
         query = df.logicalPlan,
-        overwrite = modeForDSV1 == SaveMode.Overwrite,
+        overwrite = mode == SaveMode.Overwrite,
         ifPartitionNotExists = false)
     }
   }
@@ -484,21 +481,18 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
    */
   def saveAsTable(tableName: String): Unit = {
     import df.sparkSession.sessionState.analyzer.{AsTableIdentifier, CatalogObjectIdentifier}
-    import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
     val session = df.sparkSession
     val canUseV2 = lookupV2Provider().isDefined
-    val sessionCatalogOpt = session.sessionState.analyzer.sessionCatalog
+    val sessionCatalog = session.sessionState.analyzer.sessionCatalog
 
     session.sessionState.sqlParser.parseMultipartIdentifier(tableName) match {
       case CatalogObjectIdentifier(Some(catalog), ident) =>
-        saveAsTable(catalog.asTableCatalog, ident, modeForDSV2)
+        saveAsTable(catalog.asTableCatalog, ident)
 
-      case CatalogObjectIdentifier(None, ident)
-          if canUseV2 && sessionCatalogOpt.isDefined && ident.namespace().length <= 1 =>
-        // We pass in the modeForDSV1, as using the V2 session catalog should maintain compatibility
-        // for now.
-        saveAsTable(sessionCatalogOpt.get.asTableCatalog, ident, modeForDSV1)
+      case CatalogObjectIdentifier(None, ident) if canUseV2 && ident.namespace().length <= 1 =>
+        saveAsTable(sessionCatalog.asTableCatalog, ident)
 
       case AsTableIdentifier(tableIdentifier) =>
         saveAsTable(tableIdentifier)
@@ -510,7 +504,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   }
 
 
-  private def saveAsTable(catalog: TableCatalog, ident: Identifier, mode: SaveMode): Unit = {
+  private def saveAsTable(catalog: TableCatalog, ident: Identifier): Unit = {
     val partitioning = partitioningColumns.map { colNames =>
       colNames.map(name => IdentityTransform(FieldReference(name)))
     }.getOrElse(Seq.empty[Transform])
@@ -571,7 +565,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     val tableIdentWithDB = tableIdent.copy(database = Some(db))
     val tableName = tableIdentWithDB.unquotedString
 
-    (tableExists, modeForDSV1) match {
+    (tableExists, mode) match {
       case (true, SaveMode.Ignore) =>
         // Do nothing
 
@@ -627,7 +621,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       bucketSpec = getBucketSpec)
 
     runCommand(df.sparkSession, "saveAsTable")(
-      CreateTable(tableDesc, modeForDSV1, Some(df.logicalPlan)))
+      CreateTable(tableDesc, mode, Some(df.logicalPlan)))
   }
 
   /**
@@ -833,10 +827,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     SQLExecution.withNewExecutionId(session, qe, Some(name))(qe.toRdd)
   }
 
-  private def modeForDSV1 = mode.getOrElse(SaveMode.ErrorIfExists)
-
-  private def modeForDSV2 = mode.getOrElse(SaveMode.Append)
-
   private def lookupV2Provider(): Option[TableProvider] = {
     DataSource.lookupDataSourceV2(source, df.sparkSession.sessionState.conf) match {
       // TODO(SPARK-28396): File source v2 write path is currently broken.
@@ -851,7 +841,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
   private var source: String = df.sparkSession.sessionState.conf.defaultDataSourceName
 
-  private var mode: Option[SaveMode] = None
+  private var mode: SaveMode = SaveMode.ErrorIfExists
 
   private val extraOptions = new scala.collection.mutable.HashMap[String, String]
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
new file mode 100644
index 0000000000000..17782e8ab1f0d
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
@@ -0,0 +1,363 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Bucket, Days, Hours, Literal, Months, Years}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect}
+import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.types.IntegerType
+
+/**
+ * Interface used to write a [[org.apache.spark.sql.Dataset]] to external storage using the v2 API.
+ *
+ * @since 3.0.0
+ */
+@Experimental
+final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
+    extends CreateTableWriter[T] {
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Util._
+  import df.sparkSession.sessionState.analyzer.CatalogObjectIdentifier
+
+  private val df: DataFrame = ds.toDF()
+
+  private val sparkSession = ds.sparkSession
+
+  private val catalogManager = sparkSession.sessionState.analyzer.catalogManager
+
+  private val tableName = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(table)
+
+  private val (catalog, identifier) = {
+    val CatalogObjectIdentifier(maybeCatalog, identifier) = tableName
+    val catalog = maybeCatalog.getOrElse(catalogManager.currentCatalog).asTableCatalog
+    (catalog, identifier)
+  }
+
+  private val logicalPlan = df.queryExecution.logical
+
+  private var provider: Option[String] = None
+
+  private val options = new mutable.HashMap[String, String]()
+
+  private val properties = new mutable.HashMap[String, String]()
+
+  private var partitioning: Option[Seq[Transform]] = None
+
+  override def using(provider: String): CreateTableWriter[T] = {
+    this.provider = Some(provider)
+    this
+  }
+
+  override def option(key: String, value: String): DataFrameWriterV2[T] = {
+    this.options.put(key, value)
+    this
+  }
+
+  override def options(options: scala.collection.Map[String, String]): DataFrameWriterV2[T] = {
+    options.foreach {
+      case (key, value) =>
+        this.options.put(key, value)
+    }
+    this
+  }
+
+  override def options(options: java.util.Map[String, String]): DataFrameWriterV2[T] = {
+    this.options(options.asScala)
+    this
+  }
+
+  override def tableProperty(property: String, value: String): CreateTableWriter[T] = {
+    this.properties.put(property, value)
+    this
+  }
+
+  @scala.annotation.varargs
+  override def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T] = {
+    val asTransforms = (column +: columns).map(_.expr).map {
+      case Years(attr: Attribute) =>
+        LogicalExpressions.years(attr.name)
+      case Months(attr: Attribute) =>
+        LogicalExpressions.months(attr.name)
+      case Days(attr: Attribute) =>
+        LogicalExpressions.days(attr.name)
+      case Hours(attr: Attribute) =>
+        LogicalExpressions.hours(attr.name)
+      case Bucket(Literal(numBuckets: Int, IntegerType), attr: Attribute) =>
+        LogicalExpressions.bucket(numBuckets, attr.name)
+      case attr: Attribute =>
+        LogicalExpressions.identity(attr.name)
+      case expr =>
+        throw new AnalysisException(s"Invalid partition transformation: ${expr.sql}")
+    }
+
+    this.partitioning = Some(asTransforms)
+    this
+  }
+
+  override def create(): Unit = {
+    // create and replace could alternatively create ParsedPlan statements, like
+    // `CreateTableFromDataFrameStatement(UnresolvedRelation(tableName), ...)`, to keep the catalog
+    // resolution logic in the analyzer.
+    runCommand("create") {
+      CreateTableAsSelect(
+        catalog,
+        identifier,
+        partitioning.getOrElse(Seq.empty),
+        logicalPlan,
+        properties = provider.map(p => properties + ("provider" -> p)).getOrElse(properties).toMap,
+        writeOptions = options.toMap,
+        ignoreIfExists = false)
+    }
+  }
+
+  override def replace(): Unit = {
+    internalReplace(orCreate = false)
+  }
+
+  override def createOrReplace(): Unit = {
+    internalReplace(orCreate = true)
+  }
+
+
+  /**
+   * Append the contents of the data frame to the output table.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
+   * validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException If the table does not exist
+   */
+  @throws(classOf[NoSuchTableException])
+  def append(): Unit = {
+    val append = loadTable(catalog, identifier) match {
+      case Some(t) =>
+        AppendData.byName(DataSourceV2Relation.create(t), logicalPlan, options.toMap)
+      case _ =>
+        throw new NoSuchTableException(identifier)
+    }
+
+    runCommand("append")(append)
+  }
+
+  /**
+   * Overwrite rows matching the given filter condition with the contents of the data frame in
+   * the output table.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]].
+   * The data frame will be validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException If the table does not exist
+   */
+  @throws(classOf[NoSuchTableException])
+  def overwrite(condition: Column): Unit = {
+    val overwrite = loadTable(catalog, identifier) match {
+      case Some(t) =>
+        OverwriteByExpression.byName(
+          DataSourceV2Relation.create(t), logicalPlan, condition.expr, options.toMap)
+      case _ =>
+        throw new NoSuchTableException(identifier)
+    }
+
+    runCommand("overwrite")(overwrite)
+  }
+
+  /**
+   * Overwrite all partition for which the data frame contains at least one row with the contents
+   * of the data frame in the output table.
+   *
+   * This operation is equivalent to Hive's `INSERT OVERWRITE ... PARTITION`, which replaces
+   * partitions dynamically depending on the contents of the data frame.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
+   * validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException If the table does not exist
+   */
+  @throws(classOf[NoSuchTableException])
+  def overwritePartitions(): Unit = {
+    val dynamicOverwrite = loadTable(catalog, identifier) match {
+      case Some(t) =>
+        OverwritePartitionsDynamic.byName(
+          DataSourceV2Relation.create(t), logicalPlan, options.toMap)
+      case _ =>
+        throw new NoSuchTableException(identifier)
+    }
+
+    runCommand("overwritePartitions")(dynamicOverwrite)
+  }
+
+  /**
+   * Wrap an action to track the QueryExecution and time cost, then report to the user-registered
+   * callback functions.
+   */
+  private def runCommand(name: String)(command: LogicalPlan): Unit = {
+    val qe = sparkSession.sessionState.executePlan(command)
+    // call `QueryExecution.toRDD` to trigger the execution of commands.
+    SQLExecution.withNewExecutionId(sparkSession, qe, Some(name))(qe.toRdd)
+  }
+
+  private def internalReplace(orCreate: Boolean): Unit = {
+    runCommand("replace") {
+      ReplaceTableAsSelect(
+        catalog,
+        identifier,
+        partitioning.getOrElse(Seq.empty),
+        logicalPlan,
+        properties = provider.map(p => properties + ("provider" -> p)).getOrElse(properties).toMap,
+        writeOptions = options.toMap,
+        orCreate = orCreate)
+    }
+  }
+}
+
+/**
+ * Configuration methods common to create/replace operations and insert/overwrite operations.
+ * @tparam R builder type to return
+ */
+trait WriteConfigMethods[R] {
+  /**
+   * Add a write option.
+   *
+   * @since 3.0.0
+   */
+  def option(key: String, value: String): R
+
+  /**
+   * Add a boolean output option.
+   *
+   * @since 3.0.0
+   */
+  def option(key: String, value: Boolean): R = option(key, value.toString)
+
+  /**
+   * Add a long output option.
+   *
+   * @since 3.0.0
+   */
+  def option(key: String, value: Long): R = option(key, value.toString)
+
+  /**
+   * Add a double output option.
+   *
+   * @since 3.0.0
+   */
+  def option(key: String, value: Double): R = option(key, value.toString)
+
+  /**
+   * Add write options from a Scala Map.
+   *
+   * @since 3.0.0
+   */
+  def options(options: scala.collection.Map[String, String]): R
+
+  /**
+   * Add write options from a Java Map.
+   *
+   * @since 3.0.0
+   */
+  def options(options: java.util.Map[String, String]): R
+}
+
+/**
+ * Trait to restrict calls to create and replace operations.
+ */
+trait CreateTableWriter[T] extends WriteConfigMethods[CreateTableWriter[T]] {
+  /**
+   * Create a new table from the contents of the data frame.
+   *
+   * The new table's schema, partition layout, properties, and other configuration will be
+   * based on the configuration set on this writer.
+   *
+   * If the output table exists, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException]].
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+   *         If the table already exists
+   */
+  @throws(classOf[TableAlreadyExistsException])
+  def create(): Unit
+
+  /**
+   * Replace an existing table with the contents of the data frame.
+   *
+   * The existing table's schema, partition layout, properties, and other configuration will be
+   * replaced with the contents of the data frame and the configuration set on this writer.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException]].
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException
+   *         If the table already exists
+   */
+  @throws(classOf[CannotReplaceMissingTableException])
+  def replace(): Unit
+
+  /**
+   * Create a new table or replace an existing table with the contents of the data frame.
+   *
+   * The output table's schema, partition layout, properties, and other configuration will be based
+   * on the contents of the data frame and the configuration set on this writer. If the table
+   * exists, its configuration and data will be replaced.
+   */
+  def createOrReplace(): Unit
+
+  /**
+   * Partition the output table created by `create`, `createOrReplace`, or `replace` using
+   * the given columns or transforms.
+   *
+   * When specified, the table data will be stored by these values for efficient reads.
+   *
+   * For example, when a table is partitioned by day, it may be stored in a directory layout like:
+   * <ul>
+   * <li>`table/day=2019-06-01/`</li>
+   * <li>`table/day=2019-06-02/`</li>
+   * </ul>
+   *
+   * Partitioning is one of the most widely used techniques to optimize physical data layout.
+   * It provides a coarse-grained index for skipping unnecessary data reads when queries have
+   * predicates on the partitioned columns. In order for partitioning to work well, the number
+   * of distinct values in each column should typically be less than tens of thousands.
+   *
+   * @since 3.0.0
+   */
+  def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T]
+
+  /**
+   * Specifies a provider for the underlying output data source. Spark's default catalog supports
+   * "parquet", "json", etc.
+   *
+   * @since 3.0.0
+   */
+  def using(provider: String): CreateTableWriter[T]
+
+  /**
+   * Add a table property.
+   */
+  def tableProperty(property: String, value: String): CreateTableWriter[T]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 88fe7a3f380ab..076270a9f1c6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -254,10 +254,16 @@ class Dataset[T] private[sql](
   @transient lazy val sqlContext: SQLContext = sparkSession.sqlContext
 
   private[sql] def resolve(colName: String): NamedExpression = {
-    queryExecution.analyzed.resolveQuoted(colName, sparkSession.sessionState.analyzer.resolver)
+    val resolver = sparkSession.sessionState.analyzer.resolver
+    queryExecution.analyzed.resolveQuoted(colName, resolver)
       .getOrElse {
-        throw new AnalysisException(
-          s"""Cannot resolve column name "$colName" among (${schema.fieldNames.mkString(", ")})""")
+        val fields = schema.fieldNames
+        val extraMsg = if (fields.exists(resolver(_, colName))) {
+          s"; did you mean to quote the `$colName` column?"
+        } else ""
+        val fieldsStr = fields.mkString(", ")
+        val errorMsg = s"""Cannot resolve column name "$colName" among (${fieldsStr})${extraMsg}"""
+        throw new AnalysisException(errorMsg)
       }
   }
 
@@ -3120,6 +3126,34 @@ class Dataset[T] private[sql](
     new DataFrameWriter[T](this)
   }
 
+  /**
+   * Create a write configuration builder for v2 sources.
+   *
+   * This builder is used to configure and execute write operations. For example, to append to an
+   * existing table, run:
+   *
+   * {{{
+   *   df.writeTo("catalog.db.table").append()
+   * }}}
+   *
+   * This can also be used to create or replace existing tables:
+   *
+   * {{{
+   *   df.writeTo("catalog.db.table").partitionedBy($"col").createOrReplace()
+   * }}}
+   *
+   * @group basic
+   * @since 3.0.0
+   */
+  def writeTo(table: String): DataFrameWriterV2[T] = {
+    // TODO: streaming could be adapted to use this interface
+    if (isStreaming) {
+      logicalPlan.failAnalysis(
+        "'writeTo' can not be called on streaming Dataset/DataFrame")
+    }
+    new DataFrameWriterV2[T](table, this)
+  }
+
   /**
    * Interface for saving the content of the streaming Dataset out into external storage.
    *
@@ -3328,9 +3362,9 @@ class Dataset[T] private[sql](
     }
   }
 
-  private[sql] def toPythonIterator(): Array[Any] = {
+  private[sql] def toPythonIterator(prefetchPartitions: Boolean = false): Array[Any] = {
     withNewExecutionId {
-      PythonRDD.toLocalIteratorAndServe(javaToPython.rdd)
+      PythonRDD.toLocalIteratorAndServe(javaToPython.rdd, prefetchPartitions)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index e85636d82a62c..4d4731870700c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -47,8 +47,8 @@ import org.apache.spark.sql.types.{NumericType, StructType}
  */
 @Stable
 class RelationalGroupedDataset protected[sql](
-    df: DataFrame,
-    groupingExprs: Seq[Expression],
+    private[sql] val df: DataFrame,
+    private[sql] val groupingExprs: Seq[Expression],
     groupType: RelationalGroupedDataset.GroupType) {
 
   private[this] def toDF(aggExprs: Seq[Expression]): DataFrame = {
@@ -523,6 +523,48 @@ class RelationalGroupedDataset protected[sql](
     Dataset.ofRows(df.sparkSession, plan)
   }
 
+  /**
+   * Applies a vectorized python user-defined function to each cogrouped data.
+   * The user-defined function defines a transformation:
+   * `pandas.DataFrame`, `pandas.DataFrame` -> `pandas.DataFrame`.
+   *  For each group in the cogrouped data, all elements in the group are passed as a
+   * `pandas.DataFrame` and the results for all cogroups are combined into a new [[DataFrame]].
+   *
+   * This function uses Apache Arrow as serialization format between Java executors and Python
+   * workers.
+   */
+  private[sql] def flatMapCoGroupsInPandas(
+      r: RelationalGroupedDataset,
+      expr: PythonUDF): DataFrame = {
+    require(expr.evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+      "Must pass a cogrouped map udf")
+    require(expr.dataType.isInstanceOf[StructType],
+      s"The returnType of the udf must be a ${StructType.simpleString}")
+
+    val leftGroupingNamedExpressions = groupingExprs.map {
+      case ne: NamedExpression => ne
+      case other => Alias(other, other.toString)()
+    }
+
+    val rightGroupingNamedExpressions = r.groupingExprs.map {
+      case ne: NamedExpression => ne
+      case other => Alias(other, other.toString)()
+    }
+
+    val leftAttributes = leftGroupingNamedExpressions.map(_.toAttribute)
+    val rightAttributes = rightGroupingNamedExpressions.map(_.toAttribute)
+
+    val leftChild = df.logicalPlan
+    val rightChild = r.df.logicalPlan
+
+    val left = Project(leftGroupingNamedExpressions ++ leftChild.output, leftChild)
+    val right = Project(rightGroupingNamedExpressions ++ rightChild.output, rightChild)
+
+    val output = expr.dataType.asInstanceOf[StructType].toAttributes
+    val plan = FlatMapCoGroupsInPandas(leftAttributes, rightAttributes, expr, output, left, right)
+    Dataset.ofRows(df.sparkSession, plan)
+  }
+
   override def toString: String = {
     val builder = new StringBuilder
     builder.append("RelationalGroupedDataset: [grouping expressions: [")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 45d0bd4122535..2054874e5e07b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -479,97 +479,6 @@ class SQLContext private[sql](val sparkSession: SparkSession)
   def readStream: DataStreamReader = sparkSession.readStream
 
 
-  /**
-   * Creates an external table from the given path and returns the corresponding DataFrame.
-   * It will use the default data source configured by spark.sql.sources.default.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String): DataFrame = {
-    sparkSession.catalog.createTable(tableName, path)
-  }
-
-  /**
-   * Creates an external table from the given path based on a data source
-   * and returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      path: String,
-      source: String): DataFrame = {
-    sparkSession.catalog.createTable(tableName, path, source)
-  }
-
-  /**
-   * Creates an external table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, options)
-  }
-
-  /**
-   * (Scala-specific)
-   * Creates an external table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, options)
-  }
-
-  /**
-   * Create an external table from the given path based on a data source, a schema and
-   * a set of options. Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, schema, options)
-  }
-
-  /**
-   * (Scala-specific)
-   * Create an external table from the given path based on a data source, a schema and
-   * a set of options. Then, returns the corresponding DataFrame.
-   *
-   * @group ddl_ops
-   * @since 1.3.0
-   */
-  @deprecated("use sparkSession.catalog.createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    sparkSession.catalog.createTable(tableName, source, schema, options)
-  }
-
   /**
    * Registers the given `DataFrame` as a temporary table in the catalog. Temporary tables exist
    * only during the lifetime of this instance of SQLContext.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index 60738e6d4ef9e..318cc629e7a34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalog
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.annotation.{Evolving, Experimental, Stable}
+import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.storage.StorageLevel
@@ -208,20 +208,6 @@ abstract class Catalog {
    */
   def functionExists(dbName: String, functionName: String): Boolean
 
-  /**
-   * Creates a table from the given path and returns the corresponding DataFrame.
-   * It will use the default data source configured by spark.sql.sources.default.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String): DataFrame = {
-    createTable(tableName, path)
-  }
-
   /**
    * Creates a table from the given path and returns the corresponding DataFrame.
    * It will use the default data source configured by spark.sql.sources.default.
@@ -233,20 +219,6 @@ abstract class Catalog {
    */
   def createTable(tableName: String, path: String): DataFrame
 
-  /**
-   * Creates a table from the given path based on a data source and returns the corresponding
-   * DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String, source: String): DataFrame = {
-    createTable(tableName, path, source)
-  }
-
   /**
    * Creates a table from the given path based on a data source and returns the corresponding
    * DataFrame.
@@ -258,23 +230,6 @@ abstract class Catalog {
    */
   def createTable(tableName: String, path: String, source: String): DataFrame
 
-  /**
-   * Creates a table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, options)
-  }
-
   /**
    * Creates a table based on the dataset in a data source and a set of options.
    * Then, returns the corresponding DataFrame.
@@ -291,24 +246,6 @@ abstract class Catalog {
     createTable(tableName, source, options.asScala.toMap)
   }
 
-  /**
-   * (Scala-specific)
-   * Creates a table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      options: Map[String, String]): DataFrame = {
-    createTable(tableName, source, options)
-  }
-
   /**
    * (Scala-specific)
    * Creates a table based on the dataset in a data source and a set of options.
@@ -324,24 +261,6 @@ abstract class Catalog {
       source: String,
       options: Map[String, String]): DataFrame
 
-  /**
-   * Create a table from the given path based on a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options)
-  }
-
   /**
    * Create a table based on the dataset in a data source, a schema and a set of options.
    * Then, returns the corresponding DataFrame.
@@ -359,25 +278,6 @@ abstract class Catalog {
     createTable(tableName, source, schema, options.asScala.toMap)
   }
 
-  /**
-   * (Scala-specific)
-   * Create a table from the given path based on a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
-      tableName: String,
-      source: String,
-      schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options)
-  }
-
   /**
    * (Scala-specific)
    * Create a table based on the dataset in a data source, a schema and a set of options.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
new file mode 100644
index 0000000000000..1f3b4319c0ea4
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.{AnalysisException, SaveMode}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType, CatalogUtils}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.sql._
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog, Table, TableChange, V1Table}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.execution.command.{AlterTableAddColumnsCommand, AlterTableSetLocationCommand, AlterTableSetPropertiesCommand, AlterTableUnsetPropertiesCommand, DescribeColumnCommand, DescribeTableCommand, DropTableCommand, ShowTablesCommand}
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileDataSourceV2}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType}
+
+/**
+ * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements
+ * to the corresponding v1 or v2 commands if the resolved catalog is the session catalog.
+ *
+ * We can remove this rule once we implement all the catalog functionality in `V2SessionCatalog`.
+ */
+class ResolveSessionCatalog(
+    val catalogManager: CatalogManager,
+    conf: SQLConf,
+    isView: Seq[String] => Boolean)
+  extends Rule[LogicalPlan] with LookupCatalog {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Util._
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+    case AlterTableAddColumnsStatement(
+         nameParts @ SessionCatalog(catalog, tableName), cols) =>
+      loadTable(catalog, tableName.asIdentifier).collect {
+        case v1Table: V1Table =>
+          cols.foreach(c => assertTopLevelColumn(c.name, "AlterTableAddColumnsCommand"))
+          AlterTableAddColumnsCommand(tableName.asTableIdentifier, cols.map(convertToStructField))
+      }.getOrElse {
+        val changes = cols.map { col =>
+          TableChange.addColumn(col.name.toArray, col.dataType, true, col.comment.orNull)
+        }
+        createAlterTable(nameParts, catalog, tableName, changes)
+      }
+
+    case AlterTableAlterColumnStatement(
+         nameParts @ SessionCatalog(catalog, tableName), colName, dataType, comment) =>
+      loadTable(catalog, tableName.asIdentifier).collect {
+        case v1Table: V1Table =>
+          // TODO(SPARK-29353): we should fallback to the v1 `AlterTableChangeColumnCommand`.
+          throw new AnalysisException("ALTER COLUMN is only supported with v2 tables.")
+      }.getOrElse {
+        val typeChange = dataType.map { newDataType =>
+          TableChange.updateColumnType(colName.toArray, newDataType, true)
+        }
+        val commentChange = comment.map { newComment =>
+          TableChange.updateColumnComment(colName.toArray, newComment)
+        }
+        createAlterTable(nameParts, catalog, tableName, typeChange.toSeq ++ commentChange)
+      }
+
+    case AlterTableRenameColumnStatement(
+         nameParts @ SessionCatalog(catalog, tableName), col, newName) =>
+      loadTable(catalog, tableName.asIdentifier).collect {
+        case v1Table: V1Table =>
+          throw new AnalysisException("RENAME COLUMN is only supported with v2 tables.")
+      }.getOrElse {
+        val changes = Seq(TableChange.renameColumn(col.toArray, newName))
+        createAlterTable(nameParts, catalog, tableName, changes)
+      }
+
+    case AlterTableDropColumnsStatement(
+         nameParts @ SessionCatalog(catalog, tableName), cols) =>
+      loadTable(catalog, tableName.asIdentifier).collect {
+        case v1Table: V1Table =>
+          throw new AnalysisException("DROP COLUMN is only supported with v2 tables.")
+      }.getOrElse {
+        val changes = cols.map(col => TableChange.deleteColumn(col.toArray))
+        createAlterTable(nameParts, catalog, tableName, changes)
+      }
+
+    case AlterTableSetPropertiesStatement(
+         nameParts @ SessionCatalog(catalog, tableName), props) =>
+      loadTable(catalog, tableName.asIdentifier).collect {
+        case v1Table: V1Table =>
+          AlterTableSetPropertiesCommand(tableName.asTableIdentifier, props, isView = false)
+      }.getOrElse {
+        val changes = props.map { case (key, value) =>
+          TableChange.setProperty(key, value)
+        }.toSeq
+        createAlterTable(nameParts, catalog, tableName, changes)
+      }
+
+    case AlterTableUnsetPropertiesStatement(
+         nameParts @ SessionCatalog(catalog, tableName), keys, ifExists) =>
+      loadTable(catalog, tableName.asIdentifier).collect {
+        case v1Table: V1Table =>
+          AlterTableUnsetPropertiesCommand(
+            tableName.asTableIdentifier, keys, ifExists, isView = false)
+      }.getOrElse {
+        val changes = keys.map(key => TableChange.removeProperty(key))
+        createAlterTable(nameParts, catalog, tableName, changes)
+      }
+
+    case AlterTableSetLocationStatement(
+         nameParts @ SessionCatalog(catalog, tableName), newLoc) =>
+      loadTable(catalog, tableName.asIdentifier).collect {
+        case v1Table: V1Table =>
+          AlterTableSetLocationCommand(tableName.asTableIdentifier, None, newLoc)
+      }.getOrElse {
+        val changes = Seq(TableChange.setProperty("location", newLoc))
+        createAlterTable(nameParts, catalog, tableName, changes)
+      }
+
+    // ALTER VIEW should always use v1 command if the resolved catalog is session catalog.
+    case AlterViewSetPropertiesStatement(SessionCatalog(catalog, tableName), props) =>
+      AlterTableSetPropertiesCommand(tableName.asTableIdentifier, props, isView = true)
+
+    case AlterViewUnsetPropertiesStatement(SessionCatalog(catalog, tableName), keys, ifExists) =>
+      AlterTableUnsetPropertiesCommand(tableName.asTableIdentifier, keys, ifExists, isView = true)
+
+    case DeleteFromStatement(
+         nameParts @ SessionCatalog(catalog, tableName), tableAlias, condition) =>
+      loadTable(catalog, tableName.asIdentifier).collect {
+        case v1Table: V1Table =>
+          throw new AnalysisException("DELETE FROM is only supported with v2 tables.")
+      }.getOrElse {
+        val r = UnresolvedV2Relation(nameParts, catalog.asTableCatalog, tableName.asIdentifier)
+        val aliased = tableAlias.map(SubqueryAlias(_, r)).getOrElse(r)
+        DeleteFromTable(aliased, condition)
+      }
+
+    case DescribeTableStatement(
+         nameParts @ SessionCatalog(catalog, tableName), partitionSpec, isExtended) =>
+      loadTable(catalog, tableName.asIdentifier).collect {
+        case v1Table: V1Table =>
+          DescribeTableCommand(tableName.asTableIdentifier, partitionSpec, isExtended)
+      }.getOrElse {
+        // The v1 `DescribeTableCommand` can describe view as well.
+        if (isView(tableName)) {
+          DescribeTableCommand(tableName.asTableIdentifier, partitionSpec, isExtended)
+        } else {
+          if (partitionSpec.nonEmpty) {
+            throw new AnalysisException("DESCRIBE TABLE does not support partition for v2 tables.")
+          }
+          val r = UnresolvedV2Relation(nameParts, catalog.asTableCatalog, tableName.asIdentifier)
+          DescribeTable(r, isExtended)
+        }
+      }
+
+    case DescribeColumnStatement(SessionCatalog(catalog, tableName), colNameParts, isExtended) =>
+      loadTable(catalog, tableName.asIdentifier).collect {
+        case v1Table: V1Table =>
+          DescribeColumnCommand(tableName.asTableIdentifier, colNameParts, isExtended)
+      }.getOrElse {
+        if (isView(tableName)) {
+          DescribeColumnCommand(tableName.asTableIdentifier, colNameParts, isExtended)
+        } else {
+          throw new AnalysisException("Describing columns is not supported for v2 tables.")
+        }
+      }
+
+    // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the
+    // session catalog and the table provider is not v2.
+    case c @ CreateTableStatement(
+         SessionCatalog(catalog, tableName), _, _, _, _, _, _, _, _, _) =>
+      if (!isV2Provider(c.provider)) {
+        val tableDesc = buildCatalogTable(c.tableName.asTableIdentifier, c.tableSchema,
+          c.partitioning, c.bucketSpec, c.properties, c.provider, c.options, c.location,
+          c.comment, c.ifNotExists)
+        val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
+        CreateTable(tableDesc, mode, None)
+      } else {
+        CreateV2Table(
+          catalog.asTableCatalog,
+          tableName.asIdentifier,
+          c.tableSchema,
+          // convert the bucket spec and add it as a transform
+          c.partitioning ++ c.bucketSpec.map(_.asTransform),
+          convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+          ignoreIfExists = c.ifNotExists)
+      }
+
+    case c @ CreateTableAsSelectStatement(
+         SessionCatalog(catalog, tableName), _, _, _, _, _, _, _, _, _) =>
+      if (!isV2Provider(c.provider)) {
+        val tableDesc = buildCatalogTable(c.tableName.asTableIdentifier, new StructType,
+          c.partitioning, c.bucketSpec, c.properties, c.provider, c.options, c.location,
+          c.comment, c.ifNotExists)
+        val mode = if (c.ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
+        CreateTable(tableDesc, mode, Some(c.asSelect))
+      } else {
+        CreateTableAsSelect(
+          catalog.asTableCatalog,
+          tableName.asIdentifier,
+          // convert the bucket spec and add it as a transform
+          c.partitioning ++ c.bucketSpec.map(_.asTransform),
+          c.asSelect,
+          convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+          writeOptions = c.options.filterKeys(_ != "path"),
+          ignoreIfExists = c.ifNotExists)
+      }
+
+    // For REPLACE TABLE [AS SELECT], we should fail if the catalog is resolved to the
+    // session catalog and the table provider is not v2.
+    case c @ ReplaceTableStatement(
+         SessionCatalog(catalog, tableName), _, _, _, _, _, _, _, _, _) =>
+      if (!isV2Provider(c.provider)) {
+        throw new AnalysisException("REPLACE TABLE is only supported with v2 tables.")
+      } else {
+        ReplaceTable(
+          catalog.asTableCatalog,
+          tableName.asIdentifier,
+          c.tableSchema,
+          // convert the bucket spec and add it as a transform
+          c.partitioning ++ c.bucketSpec.map(_.asTransform),
+          convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+          orCreate = c.orCreate)
+      }
+
+    case c @ ReplaceTableAsSelectStatement(
+         SessionCatalog(catalog, tableName), _, _, _, _, _, _, _, _, _) =>
+      if (!isV2Provider(c.provider)) {
+        throw new AnalysisException("REPLACE TABLE AS SELECT is only supported with v2 tables.")
+      } else {
+        ReplaceTableAsSelect(
+          catalog.asTableCatalog,
+          tableName.asIdentifier,
+          // convert the bucket spec and add it as a transform
+          c.partitioning ++ c.bucketSpec.map(_.asTransform),
+          c.asSelect,
+          convertTableProperties(c.properties, c.options, c.location, c.comment, c.provider),
+          writeOptions = c.options.filterKeys(_ != "path"),
+          orCreate = c.orCreate)
+      }
+
+    case d @ DropTableStatement(SessionCatalog(catalog, tableName), ifExists, purge) =>
+      DropTableCommand(d.tableName.asTableIdentifier, ifExists, isView = false, purge = purge)
+
+    case DropViewStatement(SessionCatalog(catalog, viewName), ifExists) =>
+      DropTableCommand(viewName.asTableIdentifier, ifExists, isView = true, purge = false)
+
+    case ShowNamespacesStatement(Some(SessionCatalog(catalog, nameParts)), pattern) =>
+      throw new AnalysisException(
+        "SHOW NAMESPACES is not supported with the session catalog.")
+
+    // TODO (SPARK-29014): we should check if the current catalog is session catalog here.
+    case ShowNamespacesStatement(None, pattern) if defaultCatalog.isEmpty =>
+      throw new AnalysisException(
+        "SHOW NAMESPACES is not supported with the session catalog.")
+
+    case ShowTablesStatement(Some(SessionCatalog(catalog, nameParts)), pattern) =>
+      if (nameParts.length != 1) {
+        throw new AnalysisException(
+          s"The database name is not valid: ${nameParts.quoted}")
+      }
+      ShowTablesCommand(Some(nameParts.head), pattern)
+
+    // TODO (SPARK-29014): we should check if the current catalog is session catalog here.
+    case ShowTablesStatement(None, pattern) if defaultCatalog.isEmpty =>
+      ShowTablesCommand(None, pattern)
+  }
+
+  private def buildCatalogTable(
+      table: TableIdentifier,
+      schema: StructType,
+      partitioning: Seq[Transform],
+      bucketSpec: Option[BucketSpec],
+      properties: Map[String, String],
+      provider: String,
+      options: Map[String, String],
+      location: Option[String],
+      comment: Option[String],
+      ifNotExists: Boolean): CatalogTable = {
+
+    val storage = DataSource.buildStorageFormatFromOptions(options)
+    if (location.isDefined && storage.locationUri.isDefined) {
+      throw new AnalysisException(
+        "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " +
+            "you can only specify one of them.")
+    }
+    val customLocation = storage.locationUri.orElse(location.map(CatalogUtils.stringToURI))
+
+    val tableType = if (customLocation.isDefined) {
+      CatalogTableType.EXTERNAL
+    } else {
+      CatalogTableType.MANAGED
+    }
+
+    CatalogTable(
+      identifier = table,
+      tableType = tableType,
+      storage = storage.copy(locationUri = customLocation),
+      schema = schema,
+      provider = Some(provider),
+      partitionColumnNames = partitioning.asPartitionColumns,
+      bucketSpec = bucketSpec,
+      properties = properties,
+      comment = comment)
+  }
+
+  object SessionCatalog {
+    def unapply(nameParts: Seq[String]): Option[(CatalogPlugin, Seq[String])] = nameParts match {
+      case CatalogAndIdentifierParts(catalog, parts) if isSessionCatalog(catalog) =>
+        Some(catalog -> parts)
+      case _ => None
+    }
+  }
+
+  private def assertTopLevelColumn(colName: Seq[String], command: String): Unit = {
+    if (colName.length > 1) {
+      throw new AnalysisException(s"$command does not support nested column: ${colName.quoted}")
+    }
+  }
+
+  private def convertToStructField(col: QualifiedColType): StructField = {
+    val builder = new MetadataBuilder
+    col.comment.foreach(builder.putString("comment", _))
+
+    val cleanedDataType = HiveStringType.replaceCharType(col.dataType)
+    if (col.dataType != cleanedDataType) {
+      builder.putString(HIVE_TYPE_STRING, col.dataType.catalogString)
+    }
+
+    StructField(
+      col.name.head,
+      cleanedDataType,
+      nullable = true,
+      builder.build())
+  }
+
+  private def isV2Provider(provider: String): Boolean = {
+    DataSource.lookupDataSourceV2(provider, conf) match {
+      // TODO(SPARK-28396): Currently file source v2 can't work with tables.
+      case Some(_: FileDataSourceV2) => false
+      case Some(_) => true
+      case _ => false
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/v2/writer/V1WriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/connector/write/V1WriteBuilder.scala
similarity index 94%
rename from sql/core/src/main/scala/org/apache/spark/sql/sources/v2/writer/V1WriteBuilder.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/connector/write/V1WriteBuilder.scala
index 2a88555e29276..e738ad1ede446 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/v2/writer/V1WriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/connector/write/V1WriteBuilder.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.writer
+package org.apache.spark.sql.connector.write
 
 import org.apache.spark.annotation.{Experimental, Unstable}
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.sources.InsertableRelation
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 
 /**
  * A trait that should be implemented by V1 DataSources that would like to leverage the DataSource
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
index 18a7f9822dcbc..fc384fe117caf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
@@ -41,7 +41,7 @@ object ExplainUtils {
    *
    * @param plan Input query plan to process
    * @param append function used to append the explain output
-   * @param startOperationID The start value of operation id. The subsequent operations will
+   * @param startOperatorID The start value of operation id. The subsequent operations will
    *                         be assigned higher value.
    *
    * @return The last generated operation id for this input plan. This is to ensure we
@@ -125,7 +125,7 @@ object ExplainUtils {
    *       appear in the explain output.
    *    2. operator identifier starts at startOperatorID + 1
    * @param plan Input query plan to process
-   * @param startOperationID The start value of operation id. The subsequent operations will
+   * @param startOperatorID The start value of operation id. The subsequent operations will
    *                         be assigned higher value.
    * @param operatorIDs A output parameter that contains a map of operator id and query plan. This
    *                    is used by caller to print the detail portion of the plan.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index eec8d70b5adf0..75abac4cfd1da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -77,9 +77,9 @@ object HiveResult {
     TimestampType,
     BinaryType)
 
-  private lazy val dateFormatter = DateFormatter()
-  private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(
-    DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
+  private lazy val zoneId = DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)
+  private lazy val dateFormatter = DateFormatter(zoneId)
+  private lazy val timestampFormatter = TimestampFormatter.getFractionFormatter(zoneId)
 
   /** Hive outputs fields of structs slightly differently than top level attributes. */
   private def toHiveStructString(a: (Any, DataType)): String = a match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index e5e86db29fe61..f9394473d06e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -25,6 +25,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.{InternalRow, QueryPlanningTracker}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
+import org.apache.spark.sql.catalyst.expressions.codegen.ByteCodeStats
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer}
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -105,7 +106,8 @@ class QueryExecution(
    * Given QueryExecution is not a public class, end users are discouraged to use this: please
    * use `Dataset.rdd` instead where conversion will be applied.
    */
-  lazy val toRdd: RDD[InternalRow] = executedPlan.execute()
+  lazy val toRdd: RDD[InternalRow] = new SQLExecutionRDD(
+    executedPlan.execute(), sparkSession.sessionState.conf)
 
   /**
    * Prepares a planned [[SparkPlan]] for execution by inserting shuffle operations and internal
@@ -135,7 +137,11 @@ class QueryExecution(
     val concat = new PlanStringConcat()
     concat.append("== Physical Plan ==\n")
     if (formatted) {
-      ExplainUtils.processPlan(executedPlan, concat.append)
+      try {
+        ExplainUtils.processPlan(executedPlan, concat.append)
+      } catch {
+        case e: AnalysisException => concat.append(e.toString)
+      }
     } else {
       QueryPlan.append(executedPlan, concat.append, verbose = false, addSuffix = false)
     }
@@ -174,8 +180,11 @@ class QueryExecution(
     val maxFields = SQLConf.get.maxToStringFields
 
     // trigger to compute stats for logical plans
-    optimizedPlan.stats
-
+    try {
+      optimizedPlan.stats
+    } catch {
+      case e: AnalysisException => concat.append(e.toString + "\n")
+    }
     // only show optimized logical plan and physical plan
     concat.append("== Optimized Logical Plan ==\n")
     QueryPlan.append(optimizedPlan, concat.append, verbose = true, addSuffix = true, maxFields)
@@ -212,7 +221,7 @@ class QueryExecution(
      *
      * @return Sequence of WholeStageCodegen subtrees and corresponding codegen
      */
-    def codegenToSeq(): Seq[(String, String)] = {
+    def codegenToSeq(): Seq[(String, String, ByteCodeStats)] = {
       org.apache.spark.sql.execution.debug.codegenStringSeq(executedPlan)
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecutionRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecutionRDD.scala
new file mode 100644
index 0000000000000..45b9cadc4aeda
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecutionRDD.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.spark.{Partition, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * It is just a wrapper over `sqlRDD`, which sets and makes effective all the configs from the
+ * captured `SQLConf`.
+ * Please notice that this means we may miss configurations set after the creation of this RDD and
+ * before its execution.
+ *
+ * @param sqlRDD the `RDD` generated by the SQL plan
+ * @param conf the `SQLConf` to apply to the execution of the SQL plan
+ */
+class SQLExecutionRDD(
+    var sqlRDD: RDD[InternalRow], @transient conf: SQLConf) extends RDD[InternalRow](sqlRDD) {
+  private val sqlConfigs = conf.getAllConfs
+  private lazy val sqlConfExecutorSide = {
+    val newConf = new SQLConf()
+    sqlConfigs.foreach { case (k, v) => newConf.setConfString(k, v) }
+    newConf
+  }
+
+  override val partitioner = firstParent[InternalRow].partitioner
+
+  override def getPartitions: Array[Partition] = firstParent[InternalRow].partitions
+
+  override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = {
+    // If we are in the context of a tracked SQL operation, `SQLExecution.EXECUTION_ID_KEY` is set
+    // and we have nothing to do here. Otherwise, we use the `SQLConf` captured at the creation of
+    // this RDD.
+    if (context.getLocalProperty(SQLExecution.EXECUTION_ID_KEY) == null) {
+      SQLConf.withExistingConf(sqlConfExecutorSide) {
+        firstParent[InternalRow].iterator(split, context)
+      }
+    } else {
+      firstParent[InternalRow].iterator(split, context)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
index 079ff25fcb67e..f5b0e761161de 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
@@ -172,7 +172,7 @@ class ShuffledRowRDD(
     reader.read().asInstanceOf[Iterator[Product2[Int, InternalRow]]].map(_._2)
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     dependency = null
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 98d6be0374da7..4a8b56faddf7e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -20,15 +20,17 @@ package org.apache.spark.sql.execution
 import org.apache.spark.sql.ExperimentalMethods
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer._
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning}
 import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions
 import org.apache.spark.sql.execution.datasources.SchemaPruning
 import org.apache.spark.sql.execution.python.{ExtractGroupingPythonUDFFromAggregate, ExtractPythonUDFFromAggregate, ExtractPythonUDFs}
 
 class SparkOptimizer(
+    catalogManager: CatalogManager,
     catalog: SessionCatalog,
     experimentalMethods: ExperimentalMethods)
-  extends Optimizer(catalog) {
+  extends Optimizer(catalogManager) {
 
   override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+
     Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 83cdc7a978a9a..2346545876b2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -138,13 +138,6 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
     }
   }
 
-  /**
-   * Create a [[SetDatabaseCommand]] logical plan.
-   */
-  override def visitUse(ctx: UseContext): LogicalPlan = withOrigin(ctx) {
-    SetDatabaseCommand(ctx.db.getText)
-  }
-
   /**
    * Create a [[ShowTablesCommand]] logical plan.
    * Example SQL :
@@ -483,6 +476,22 @@ class SparkSqlAstBuilder(conf: SQLConf) extends AstBuilder(conf) {
       visitPropertyKeyValues(ctx.tablePropertyList))
   }
 
+  /**
+   * Create an [[AlterDatabaseSetLocationCommand]] command.
+   *
+   * For example:
+   * {{{
+   *   ALTER (DATABASE|SCHEMA) database SET LOCATION path;
+   * }}}
+   */
+  override def visitSetDatabaseLocation(
+      ctx: SetDatabaseLocationContext): LogicalPlan = withOrigin(ctx) {
+    AlterDatabaseSetLocationCommand(
+      ctx.db.getText,
+      visitLocationSpec(ctx.locationSpec)
+    )
+  }
+
   /**
    * Create a [[DropDatabaseCommand]] command.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 85469bf2401d4..a2f45898d273f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -682,6 +682,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           f, p, b, is, ot, planLater(child)) :: Nil
       case logical.FlatMapGroupsInPandas(grouping, func, output, child) =>
         execution.python.FlatMapGroupsInPandasExec(grouping, func, output, planLater(child)) :: Nil
+      case logical.FlatMapCoGroupsInPandas(leftGroup, rightGroup, func, output, left, right) =>
+        execution.python.FlatMapCoGroupsInPandasExec(
+          leftGroup, rightGroup, func, output, planLater(left), planLater(right)) :: Nil
       case logical.MapInPandas(func, output, child) =>
         execution.python.MapInPandasExec(func, output, planLater(child)) :: Nil
       case logical.MapElements(f, _, _, objAttr, child) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index ce9a6ea319d5f..f723fcfac6d06 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -688,7 +688,7 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
   override def doExecute(): RDD[InternalRow] = {
     val (ctx, cleanedSource) = doCodeGen()
     // try to compile and fallback if it failed
-    val (_, maxCodeSize) = try {
+    val (_, compiledCodeStats) = try {
       CodeGenerator.compile(cleanedSource)
     } catch {
       case NonFatal(_) if !Utils.isTesting && sqlContext.conf.codegenFallback =>
@@ -698,9 +698,9 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
     }
 
     // Check if compiled code has a too large function
-    if (maxCodeSize > sqlContext.conf.hugeMethodLimit) {
+    if (compiledCodeStats.maxMethodCodeSize > sqlContext.conf.hugeMethodLimit) {
       logInfo(s"Found too long generated codes and JIT optimization might not work: " +
-        s"the bytecode size ($maxCodeSize) is above the limit " +
+        s"the bytecode size (${compiledCodeStats.maxMethodCodeSize}) is above the limit " +
         s"${sqlContext.conf.hugeMethodLimit}, and the whole-stage codegen was disabled " +
         s"for this plan (id=$codegenStageId). To avoid this, you can raise the limit " +
         s"`${SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.key}`:\n$treeString")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
new file mode 100644
index 0000000000000..0ec8710e4db43
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanHelper.scala
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import org.apache.spark.sql.execution.SparkPlan
+
+/**
+ * This class provides utility methods related to tree traversal of an [[AdaptiveSparkPlanExec]]
+ * plan. Unlike their counterparts in [[org.apache.spark.sql.catalyst.trees.TreeNode]] or
+ * [[org.apache.spark.sql.catalyst.plans.QueryPlan]], these methods traverse down leaf nodes of
+ * adaptive plans, i.e., [[AdaptiveSparkPlanExec]] and [[QueryStageExec]].
+ */
+trait AdaptiveSparkPlanHelper {
+
+  /**
+   * Find the first [[SparkPlan]] that satisfies the condition specified by `f`.
+   * The condition is recursively applied to this node and all of its children (pre-order).
+   */
+  def find(p: SparkPlan)(f: SparkPlan => Boolean): Option[SparkPlan] = if (f(p)) {
+    Some(p)
+  } else {
+    allChildren(p).foldLeft(Option.empty[SparkPlan]) { (l, r) => l.orElse(find(r)(f)) }
+  }
+
+  /**
+   * Runs the given function on this node and then recursively on children.
+   * @param f the function to be applied to each node in the tree.
+   */
+  def foreach(p: SparkPlan)(f: SparkPlan => Unit): Unit = {
+    f(p)
+    allChildren(p).foreach(foreach(_)(f))
+  }
+
+  /**
+   * Runs the given function recursively on children then on this node.
+   * @param f the function to be applied to each node in the tree.
+   */
+  def foreachUp(p: SparkPlan)(f: SparkPlan => Unit): Unit = {
+    allChildren(p).foreach(foreachUp(_)(f))
+    f(p)
+  }
+
+  /**
+   * Returns a Seq containing the result of applying the given function to each
+   * node in this tree in a preorder traversal.
+   * @param f the function to be applied.
+   */
+  def map[A](p: SparkPlan)(f: SparkPlan => A): Seq[A] = {
+    val ret = new collection.mutable.ArrayBuffer[A]()
+    foreach(p)(ret += f(_))
+    ret
+  }
+
+  /**
+   * Returns a Seq by applying a function to all nodes in this tree and using the elements of the
+   * resulting collections.
+   */
+  def flatMap[A](p: SparkPlan)(f: SparkPlan => TraversableOnce[A]): Seq[A] = {
+    val ret = new collection.mutable.ArrayBuffer[A]()
+    foreach(p)(ret ++= f(_))
+    ret
+  }
+
+  /**
+   * Returns a Seq containing the result of applying a partial function to all elements in this
+   * tree on which the function is defined.
+   */
+  def collect[B](p: SparkPlan)(pf: PartialFunction[SparkPlan, B]): Seq[B] = {
+    val ret = new collection.mutable.ArrayBuffer[B]()
+    val lifted = pf.lift
+    foreach(p)(node => lifted(node).foreach(ret.+=))
+    ret
+  }
+
+  /**
+   * Returns a Seq containing the leaves in this tree.
+   */
+  def collectLeaves(p: SparkPlan): Seq[SparkPlan] = {
+    collect(p) { case plan if allChildren(plan).isEmpty => plan }
+  }
+
+  /**
+   * Finds and returns the first [[SparkPlan]] of the tree for which the given partial function
+   * is defined (pre-order), and applies the partial function to it.
+   */
+  def collectFirst[B](p: SparkPlan)(pf: PartialFunction[SparkPlan, B]): Option[B] = {
+    val lifted = pf.lift
+    lifted(p).orElse {
+      allChildren(p).foldLeft(Option.empty[B]) { (l, r) => l.orElse(collectFirst(r)(pf)) }
+    }
+  }
+
+  /**
+   * Returns a sequence containing the result of applying a partial function to all elements in this
+   * plan, also considering all the plans in its (nested) subqueries
+   */
+  def collectInPlanAndSubqueries[B](p: SparkPlan)(f: PartialFunction[SparkPlan, B]): Seq[B] = {
+    (p +: subqueriesAll(p)).flatMap(collect(_)(f))
+  }
+
+  /**
+   * Returns a sequence containing the subqueries in this plan, also including the (nested)
+   * subquries in its children
+   */
+  def subqueriesAll(p: SparkPlan): Seq[SparkPlan] = {
+    val subqueries = flatMap(p)(_.subqueries)
+    subqueries ++ subqueries.flatMap(subqueriesAll)
+  }
+
+  private def allChildren(p: SparkPlan): Seq[SparkPlan] = p match {
+    case a: AdaptiveSparkPlanExec => Seq(a.executedPlan)
+    case s: QueryStageExec => Seq(s.plan)
+    case _ => p.children
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 7ead180d869b2..5dc5b822919be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -261,14 +261,6 @@ case class HashAggregateExec(
      """.stripMargin
   }
 
-  private def isValidParamLength(paramLength: Int): Boolean = {
-    // This config is only for testing
-    sqlContext.getConf("spark.sql.HashAggregateExec.validParamLength", null) match {
-      case null | "" => CodeGenerator.isValidParamLength(paramLength)
-      case validLength => paramLength <= validLength.toInt
-    }
-  }
-
   // Splits aggregate code into small functions because the most of JVM implementations
   // can not compile too long functions. Returns None if we are not able to split the given code.
   //
@@ -294,7 +286,7 @@ case class HashAggregateExec(
         val paramLength = CodeGenerator.calculateParamLengthFromExprValues(inputVarsForOneFunc)
 
         // Checks if a parameter length for the `aggExprsForOneFunc` does not go over the JVM limit
-        if (isValidParamLength(paramLength)) {
+        if (CodeGenerator.isValidParamLength(paramLength)) {
           Some(inputVarsForOneFunc)
         } else {
           None
@@ -307,7 +299,9 @@ case class HashAggregateExec(
       if (inputVars.forall(_.isDefined)) {
         val splitCodes = inputVars.flatten.zipWithIndex.map { case (args, i) =>
           val doAggFunc = ctx.freshName(s"doAggregate_${aggNames(i)}")
-          val argList = args.map(v => s"${v.javaType.getName} ${v.variableName}").mkString(", ")
+          val argList = args.map { v =>
+            s"${CodeGenerator.typeName(v.javaType)} ${v.variableName}"
+          }.mkString(", ")
           val doAggFuncName = ctx.addNewFunction(doAggFunc,
             s"""
                |private void $doAggFunc($argList) throws java.io.IOException {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index b072a7f5d914c..ca0cfb6834f1c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
 
 import java.util.concurrent.TimeUnit._
 
+import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.Duration
 
@@ -171,6 +172,7 @@ case class FilterExec(condition: Expression, child: SparkPlan)
     // This is very perf sensitive.
     // TODO: revisit this. We can consider reordering predicates as well.
     val generatedIsNotNullChecks = new Array[Boolean](notNullPreds.length)
+    val extraIsNotNullAttrs = mutable.Set[Attribute]()
     val generated = otherPreds.map { c =>
       val nullChecks = c.references.map { r =>
         val idx = notNullPreds.indexWhere { n => n.asInstanceOf[IsNotNull].child.semanticEquals(r)}
@@ -178,6 +180,9 @@ case class FilterExec(condition: Expression, child: SparkPlan)
           generatedIsNotNullChecks(idx) = true
           // Use the child's output. The nullability is what the child produced.
           genPredicate(notNullPreds(idx), input, child.output)
+        } else if (notNullAttributes.contains(r.exprId) && !extraIsNotNullAttrs.contains(r)) {
+          extraIsNotNullAttrs += r
+          genPredicate(IsNotNull(r), input, child.output)
         } else {
           ""
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
index 85c36b7da9498..a43dc0b41ecca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.types._
 private[columnar] trait ColumnAccessor {
   initialize()
 
-  protected def initialize()
+  protected def initialize(): Unit
 
   def hasNext: Boolean
 
@@ -50,7 +50,7 @@ private[columnar] abstract class BasicColumnAccessor[JvmType](
     protected val columnType: ColumnType[JvmType])
   extends ColumnAccessor {
 
-  protected def initialize() {}
+  protected def initialize(): Unit = {}
 
   override def hasNext: Boolean = buffer.hasRemaining
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index 542a10fc175c0..059e1447e31d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -221,7 +221,8 @@ private[columnar] object INT extends NativeColumnType(IntegerType, 4) {
   override def getField(row: InternalRow, ordinal: Int): Int = row.getInt(ordinal)
 
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setInt(toOrdinal, from.getInt(fromOrdinal))
   }
 }
@@ -249,7 +250,8 @@ private[columnar] object LONG extends NativeColumnType(LongType, 8) {
 
   override def getField(row: InternalRow, ordinal: Int): Long = row.getLong(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setLong(toOrdinal, from.getLong(fromOrdinal))
   }
 }
@@ -277,7 +279,8 @@ private[columnar] object FLOAT extends NativeColumnType(FloatType, 4) {
 
   override def getField(row: InternalRow, ordinal: Int): Float = row.getFloat(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setFloat(toOrdinal, from.getFloat(fromOrdinal))
   }
 }
@@ -305,7 +308,8 @@ private[columnar] object DOUBLE extends NativeColumnType(DoubleType, 8) {
 
   override def getField(row: InternalRow, ordinal: Int): Double = row.getDouble(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setDouble(toOrdinal, from.getDouble(fromOrdinal))
   }
 }
@@ -331,7 +335,8 @@ private[columnar] object BOOLEAN extends NativeColumnType(BooleanType, 1) {
 
   override def getField(row: InternalRow, ordinal: Int): Boolean = row.getBoolean(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setBoolean(toOrdinal, from.getBoolean(fromOrdinal))
   }
 }
@@ -359,7 +364,8 @@ private[columnar] object BYTE extends NativeColumnType(ByteType, 1) {
 
   override def getField(row: InternalRow, ordinal: Int): Byte = row.getByte(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setByte(toOrdinal, from.getByte(fromOrdinal))
   }
 }
@@ -387,7 +393,8 @@ private[columnar] object SHORT extends NativeColumnType(ShortType, 2) {
 
   override def getField(row: InternalRow, ordinal: Int): Short = row.getShort(ordinal)
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     to.setShort(toOrdinal, from.getShort(fromOrdinal))
   }
 }
@@ -452,7 +459,8 @@ private[columnar] object STRING
     row.getUTF8String(ordinal)
   }
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     setField(to, toOrdinal, getField(from, fromOrdinal))
   }
 
@@ -496,7 +504,8 @@ private[columnar] case class COMPACT_DECIMAL(precision: Int, scale: Int)
     row.setDecimal(ordinal, value, precision)
   }
 
-  override def copyField(from: InternalRow, fromOrdinal: Int, to: InternalRow, toOrdinal: Int) {
+  override def copyField(from: InternalRow, fromOrdinal: Int,
+      to: InternalRow, toOrdinal: Int): Unit = {
     setField(to, toOrdinal, getField(from, fromOrdinal))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index f7d4fa4c4ffcb..bb641bcb430ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.command
 import java.util.UUID
 
 import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.errors.TreeNodeException
@@ -156,7 +157,11 @@ case class ExplainCommand(
       sparkSession.sessionState.executePlan(logicalPlan))
     val outputString =
       if (codegen) {
-        codegenString(queryExecution.executedPlan)
+        try {
+          codegenString(queryExecution.executedPlan)
+        } catch {
+          case e: AnalysisException => e.toString
+        }
       } else if (extended) {
         queryExecution.toString
       } else if (cost) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index ee5d37cebf2f3..3645d38b3b558 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.TimeUnit._
 
 import scala.collection.{GenMap, GenSeq}
 import scala.collection.parallel.ForkJoinTaskSupport
+import scala.collection.parallel.immutable.ParVector
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
@@ -132,6 +133,27 @@ case class AlterDatabasePropertiesCommand(
   }
 }
 
+/**
+ * A command for users to set new location path for a database
+ * If the database does not exist, an error message will be issued to indicate the database
+ * does not exist.
+ * The syntax of using this command in SQL is:
+ * {{{
+ *    ALTER (DATABASE|SCHEMA) database_name SET LOCATION path
+ * }}}
+ */
+case class AlterDatabaseSetLocationCommand(databaseName: String, location: String)
+  extends RunnableCommand {
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val catalog = sparkSession.sessionState.catalog
+    val oldDb = catalog.getDatabaseMetadata(databaseName)
+    catalog.alterDatabase(oldDb.copy(locationUri = CatalogUtils.stringToURI(location)))
+
+    Seq.empty[Row]
+  }
+}
+
 /**
  * A command for users to show the name of the database, its comment (if one has been set), and its
  * root location on the filesystem. When extended is true, it also shows the database's properties
@@ -663,7 +685,7 @@ case class AlterTableRecoverPartitionsCommand(
     val statusPar: GenSeq[FileStatus] =
       if (partitionNames.length > 1 && statuses.length > threshold || partitionNames.length > 2) {
         // parallelize the list of partitions here, then we can have better parallelism later.
-        val parArray = statuses.par
+        val parArray = new ParVector(statuses.toVector)
         parArray.tasksupport = evalTaskSupport
         parArray
       } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 9377cb0174673..f9992a925261f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -278,6 +278,13 @@ case class LoadDataCommand(
     val catalog = sparkSession.sessionState.catalog
     val targetTable = catalog.getTableMetadata(table)
     val tableIdentwithDB = targetTable.identifier.quotedString
+    val normalizedSpec = partition.map { spec =>
+      PartitioningUtils.normalizePartitionSpec(
+        spec,
+        targetTable.partitionColumnNames,
+        tableIdentwithDB,
+        sparkSession.sessionState.conf.resolver)
+    }
 
     if (targetTable.tableType == CatalogTableType.VIEW) {
       throw new AnalysisException(s"Target table in LOAD DATA cannot be a view: $tableIdentwithDB")
@@ -297,13 +304,6 @@ case class LoadDataCommand(
           s"do not match number of partitioned columns in table " +
           s"(${targetTable.partitionColumnNames.size})")
       }
-      partition.get.keys.foreach { colName =>
-        if (!targetTable.partitionColumnNames.contains(colName)) {
-          throw new AnalysisException(s"LOAD DATA target table $tableIdentwithDB is partitioned, " +
-            s"but the specified partition spec refers to a column that is not partitioned: " +
-            s"'$colName'")
-        }
-      }
     } else {
       if (partition.nonEmpty) {
         throw new AnalysisException(s"LOAD DATA target table $tableIdentwithDB is not " +
@@ -353,7 +353,7 @@ case class LoadDataCommand(
       catalog.loadPartition(
         targetTable.identifier,
         loadPath.toString,
-        partition.get,
+        normalizedSpec.get,
         isOverwrite,
         inheritTableSpecs = true,
         isSrcLocal = isLocal)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 9376f08351791..0f5f1591623af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogUtils}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.catalog.TableProvider
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.DataWritingCommand
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
@@ -46,7 +47,6 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.{RateStreamProvider, TextSocketSourceProvider}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
-import org.apache.spark.sql.sources.v2.TableProvider
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{CalendarIntervalType, StructField, StructType}
 import org.apache.spark.sql.util.SchemaUtils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
deleted file mode 100644
index a37a2cf7f0369..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceResolution.scala
+++ /dev/null
@@ -1,384 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources
-
-import scala.collection.mutable
-
-import org.apache.spark.sql.{AnalysisException, SaveMode}
-import org.apache.spark.sql.catalog.v2.{CatalogManager, CatalogPlugin, Identifier, LookupCatalog, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{CastSupport, UnresolvedRelation}
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTableType, CatalogUtils, UnresolvedCatalogRelation}
-import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, CreateV2Table, DeleteFromTable, DropTable, Filter, LogicalPlan, ReplaceTable, ReplaceTableAsSelect, ShowTables, SubqueryAlias}
-import org.apache.spark.sql.catalyst.plans.logical.sql.{AlterTableAddColumnsStatement, AlterTableSetLocationStatement, AlterTableSetPropertiesStatement, AlterTableUnsetPropertiesStatement, AlterViewSetPropertiesStatement, AlterViewUnsetPropertiesStatement, CreateTableAsSelectStatement, CreateTableStatement, DeleteFromStatement, DescribeColumnStatement, DescribeTableStatement, DropTableStatement, DropViewStatement, QualifiedColType, ReplaceTableAsSelectStatement, ReplaceTableStatement, ShowTablesStatement}
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.command.{AlterTableAddColumnsCommand, AlterTableSetLocationCommand, AlterTableSetPropertiesCommand, AlterTableUnsetPropertiesCommand, DescribeColumnCommand, DescribeTableCommand, DropTableCommand, ShowTablesCommand}
-import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{HIVE_TYPE_STRING, HiveStringType, MetadataBuilder, StructField, StructType}
-
-case class DataSourceResolution(
-    conf: SQLConf,
-    catalogManager: CatalogManager)
-  extends Rule[LogicalPlan] with CastSupport with LookupCatalog {
-
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
-
-  def v2SessionCatalog: CatalogPlugin = sessionCatalog.getOrElse(
-    throw new AnalysisException("No v2 session catalog implementation is available"))
-
-  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case CreateTableStatement(
-        AsTableIdentifier(table), schema, partitionCols, bucketSpec, properties,
-        V1Provider(provider), options, location, comment, ifNotExists) =>
-      // the source is v1, the identifier has no catalog, and there is no default v2 catalog
-      val tableDesc = buildCatalogTable(table, schema, partitionCols, bucketSpec, properties,
-        provider, options, location, comment, ifNotExists)
-      val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
-
-      CreateTable(tableDesc, mode, None)
-
-    case create: CreateTableStatement =>
-      // the provider was not a v1 source or a v2 catalog is the default, convert to a v2 plan
-      val CatalogObjectIdentifier(maybeCatalog, identifier) = create.tableName
-      maybeCatalog match {
-        case Some(catalog) =>
-          // the identifier had a catalog, or there is a default v2 catalog
-          convertCreateTable(catalog.asTableCatalog, identifier, create)
-        case _ =>
-          // the identifier had no catalog and no default catalog is set, but the source is v2.
-          // use the v2 session catalog, which delegates to the global v1 session catalog
-          convertCreateTable(v2SessionCatalog.asTableCatalog, identifier, create)
-      }
-
-    case CreateTableAsSelectStatement(
-        AsTableIdentifier(table), query, partitionCols, bucketSpec, properties,
-        V1Provider(provider), options, location, comment, ifNotExists) =>
-      // the source is v1, the identifier has no catalog, and there is no default v2 catalog
-      val tableDesc = buildCatalogTable(table, new StructType, partitionCols, bucketSpec,
-        properties, provider, options, location, comment, ifNotExists)
-      val mode = if (ifNotExists) SaveMode.Ignore else SaveMode.ErrorIfExists
-
-      CreateTable(tableDesc, mode, Some(query))
-
-    case create: CreateTableAsSelectStatement =>
-      // the provider was not a v1 source or a v2 catalog is the default, convert to a v2 plan
-      val CatalogObjectIdentifier(maybeCatalog, identifier) = create.tableName
-      maybeCatalog match {
-        case Some(catalog) =>
-          // the identifier had a catalog, or there is a default v2 catalog
-          convertCTAS(catalog.asTableCatalog, identifier, create)
-        case _ =>
-          // the identifier had no catalog and no default catalog is set, but the source is v2.
-          // use the v2 session catalog, which delegates to the global v1 session catalog
-          convertCTAS(v2SessionCatalog.asTableCatalog, identifier, create)
-      }
-
-    case DescribeColumnStatement(
-        AsTableIdentifier(tableName), colName, isExtended) =>
-      DescribeColumnCommand(tableName, colName, isExtended)
-
-    case DescribeColumnStatement(
-        CatalogObjectIdentifier(Some(catalog), ident), colName, isExtended) =>
-      throw new AnalysisException("Describing columns is not supported for v2 tables.")
-
-    case DescribeTableStatement(
-        AsTableIdentifier(tableName), partitionSpec, isExtended) =>
-      DescribeTableCommand(tableName, partitionSpec, isExtended)
-
-    case ReplaceTableStatement(
-        AsTableIdentifier(table), schema, partitionCols, bucketSpec, properties,
-        V1Provider(provider), options, location, comment, orCreate) =>
-        throw new AnalysisException(
-          s"Replacing tables is not supported using the legacy / v1 Spark external catalog" +
-            s" API. Write provider name: $provider, identifier: $table.")
-
-    case ReplaceTableAsSelectStatement(
-        AsTableIdentifier(table), query, partitionCols, bucketSpec, properties,
-        V1Provider(provider), options, location, comment, orCreate) =>
-      throw new AnalysisException(
-        s"Replacing tables is not supported using the legacy / v1 Spark external catalog" +
-          s" API. Write provider name: $provider, identifier: $table.")
-
-    case replace: ReplaceTableStatement =>
-      // the provider was not a v1 source, convert to a v2 plan
-      val CatalogObjectIdentifier(maybeCatalog, identifier) = replace.tableName
-      val catalog = maybeCatalog.orElse(sessionCatalog)
-        .getOrElse(throw new AnalysisException(
-          s"No catalog specified for table ${identifier.quoted} and no default catalog is set"))
-        .asTableCatalog
-      convertReplaceTable(catalog, identifier, replace)
-
-    case rtas: ReplaceTableAsSelectStatement =>
-      // the provider was not a v1 source, convert to a v2 plan
-      val CatalogObjectIdentifier(maybeCatalog, identifier) = rtas.tableName
-      val catalog = maybeCatalog.orElse(sessionCatalog)
-        .getOrElse(throw new AnalysisException(
-          s"No catalog specified for table ${identifier.quoted} and no default catalog is set"))
-        .asTableCatalog
-      convertRTAS(catalog, identifier, rtas)
-
-    case DropTableStatement(CatalogObjectIdentifier(Some(catalog), ident), ifExists, _) =>
-      DropTable(catalog.asTableCatalog, ident, ifExists)
-
-    case DropTableStatement(AsTableIdentifier(tableName), ifExists, purge) =>
-      DropTableCommand(tableName, ifExists, isView = false, purge)
-
-    case DropViewStatement(CatalogObjectIdentifier(Some(catalog), ident), _) =>
-      throw new AnalysisException(
-        s"Can not specify catalog `${catalog.name}` for view $ident " +
-          s"because view support in catalog has not been implemented yet")
-
-    case DropViewStatement(AsTableIdentifier(tableName), ifExists) =>
-      DropTableCommand(tableName, ifExists, isView = true, purge = false)
-
-    case AlterTableSetPropertiesStatement(AsTableIdentifier(table), properties) =>
-      AlterTableSetPropertiesCommand(table, properties, isView = false)
-
-    case AlterViewSetPropertiesStatement(AsTableIdentifier(table), properties) =>
-      AlterTableSetPropertiesCommand(table, properties, isView = true)
-
-    case AlterTableUnsetPropertiesStatement(AsTableIdentifier(table), propertyKeys, ifExists) =>
-      AlterTableUnsetPropertiesCommand(table, propertyKeys, ifExists, isView = false)
-
-    case AlterViewUnsetPropertiesStatement(AsTableIdentifier(table), propertyKeys, ifExists) =>
-      AlterTableUnsetPropertiesCommand(table, propertyKeys, ifExists, isView = true)
-
-    case AlterTableSetLocationStatement(AsTableIdentifier(table), newLocation) =>
-      AlterTableSetLocationCommand(table, None, newLocation)
-
-    case AlterTableAddColumnsStatement(AsTableIdentifier(table), newColumns)
-        if newColumns.forall(_.name.size == 1) =>
-      // only top-level adds are supported using AlterTableAddColumnsCommand
-      AlterTableAddColumnsCommand(table, newColumns.map(convertToStructField))
-
-    case DeleteFromStatement(AsTableIdentifier(table), tableAlias, condition) =>
-      throw new AnalysisException(
-        s"Delete from tables is not supported using the legacy / v1 Spark external catalog" +
-            s" API. Identifier: $table.")
-
-    case delete: DeleteFromStatement =>
-      val relation = UnresolvedRelation(delete.tableName)
-      val aliased = delete.tableAlias.map(SubqueryAlias(_, relation)).getOrElse(relation)
-      DeleteFromTable(aliased, delete.condition)
-
-    case ShowTablesStatement(None, pattern) =>
-      defaultCatalog match {
-        case Some(catalog) =>
-          ShowTables(
-            catalog.asTableCatalog,
-            catalogManager.currentNamespace,
-            pattern)
-        case None =>
-          ShowTablesCommand(None, pattern)
-      }
-
-    case ShowTablesStatement(Some(namespace), pattern) =>
-      val CatalogNamespace(maybeCatalog, ns) = namespace
-      maybeCatalog match {
-        case Some(catalog) =>
-          ShowTables(catalog.asTableCatalog, ns, pattern)
-        case None =>
-          if (namespace.length != 1) {
-            throw new AnalysisException(
-              s"The database name is not valid: ${namespace.quoted}")
-          }
-          ShowTablesCommand(Some(namespace.quoted), pattern)
-      }
-  }
-
-  object V1Provider {
-    def unapply(provider: String): Option[String] = {
-      DataSource.lookupDataSourceV2(provider, conf) match {
-        // TODO(SPARK-28396): Currently file source v2 can't work with tables.
-        case Some(_: FileDataSourceV2) => Some(provider)
-        case Some(_) => None
-        case _ => Some(provider)
-      }
-    }
-  }
-
-  private def buildCatalogTable(
-      table: TableIdentifier,
-      schema: StructType,
-      partitioning: Seq[Transform],
-      bucketSpec: Option[BucketSpec],
-      properties: Map[String, String],
-      provider: String,
-      options: Map[String, String],
-      location: Option[String],
-      comment: Option[String],
-      ifNotExists: Boolean): CatalogTable = {
-
-    val storage = DataSource.buildStorageFormatFromOptions(options)
-    if (location.isDefined && storage.locationUri.isDefined) {
-      throw new AnalysisException(
-        "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " +
-            "you can only specify one of them.")
-    }
-    val customLocation = storage.locationUri.orElse(location.map(CatalogUtils.stringToURI))
-
-    val tableType = if (customLocation.isDefined) {
-      CatalogTableType.EXTERNAL
-    } else {
-      CatalogTableType.MANAGED
-    }
-
-    CatalogTable(
-      identifier = table,
-      tableType = tableType,
-      storage = storage.copy(locationUri = customLocation),
-      schema = schema,
-      provider = Some(provider),
-      partitionColumnNames = partitioning.asPartitionColumns,
-      bucketSpec = bucketSpec,
-      properties = properties,
-      comment = comment)
-  }
-
-  private def convertCTAS(
-      catalog: TableCatalog,
-      identifier: Identifier,
-      ctas: CreateTableAsSelectStatement): CreateTableAsSelect = {
-    // convert the bucket spec and add it as a transform
-    val partitioning = ctas.partitioning ++ ctas.bucketSpec.map(_.asTransform)
-    val properties = convertTableProperties(
-      ctas.properties, ctas.options, ctas.location, ctas.comment, ctas.provider)
-
-    CreateTableAsSelect(
-      catalog,
-      identifier,
-      partitioning,
-      ctas.asSelect,
-      properties,
-      writeOptions = ctas.options.filterKeys(_ != "path"),
-      ignoreIfExists = ctas.ifNotExists)
-  }
-
-  private def convertCreateTable(
-      catalog: TableCatalog,
-      identifier: Identifier,
-      create: CreateTableStatement): CreateV2Table = {
-    // convert the bucket spec and add it as a transform
-    val partitioning = create.partitioning ++ create.bucketSpec.map(_.asTransform)
-    val properties = convertTableProperties(
-      create.properties, create.options, create.location, create.comment, create.provider)
-
-    CreateV2Table(
-      catalog,
-      identifier,
-      create.tableSchema,
-      partitioning,
-      properties,
-      ignoreIfExists = create.ifNotExists)
-  }
-
-  private def convertRTAS(
-      catalog: TableCatalog,
-      identifier: Identifier,
-      rtas: ReplaceTableAsSelectStatement): ReplaceTableAsSelect = {
-    // convert the bucket spec and add it as a transform
-    val partitioning = rtas.partitioning ++ rtas.bucketSpec.map(_.asTransform)
-    val properties = convertTableProperties(
-      rtas.properties, rtas.options, rtas.location, rtas.comment, rtas.provider)
-
-    ReplaceTableAsSelect(
-      catalog,
-      identifier,
-      partitioning,
-      rtas.asSelect,
-      properties,
-      writeOptions = rtas.options.filterKeys(_ != "path"),
-      orCreate = rtas.orCreate)
-  }
-
-  private def convertReplaceTable(
-      catalog: TableCatalog,
-      identifier: Identifier,
-      replace: ReplaceTableStatement): ReplaceTable = {
-    // convert the bucket spec and add it as a transform
-    val partitioning = replace.partitioning ++ replace.bucketSpec.map(_.asTransform)
-    val properties = convertTableProperties(
-      replace.properties, replace.options, replace.location, replace.comment, replace.provider)
-
-    ReplaceTable(
-      catalog,
-      identifier,
-      replace.tableSchema,
-      partitioning,
-      properties,
-      orCreate = replace.orCreate)
-  }
-
-  private def convertTableProperties(
-      properties: Map[String, String],
-      options: Map[String, String],
-      location: Option[String],
-      comment: Option[String],
-      provider: String): Map[String, String] = {
-    if (options.contains("path") && location.isDefined) {
-      throw new AnalysisException(
-        "LOCATION and 'path' in OPTIONS are both used to indicate the custom table path, " +
-            "you can only specify one of them.")
-    }
-
-    if ((options.contains("comment") || properties.contains("comment"))
-        && comment.isDefined) {
-      throw new AnalysisException(
-        "COMMENT and option/property 'comment' are both used to set the table comment, you can " +
-            "only specify one of them.")
-    }
-
-    if (options.contains("provider") || properties.contains("provider")) {
-      throw new AnalysisException(
-        "USING and option/property 'provider' are both used to set the provider implementation, " +
-            "you can only specify one of them.")
-    }
-
-    val filteredOptions = options.filterKeys(_ != "path")
-
-    // create table properties from TBLPROPERTIES and OPTIONS clauses
-    val tableProperties = new mutable.HashMap[String, String]()
-    tableProperties ++= properties
-    tableProperties ++= filteredOptions
-
-    // convert USING, LOCATION, and COMMENT clauses to table properties
-    tableProperties += ("provider" -> provider)
-    comment.map(text => tableProperties += ("comment" -> text))
-    location.orElse(options.get("path")).map(loc => tableProperties += ("location" -> loc))
-
-    tableProperties.toMap
-  }
-
-  private def convertToStructField(col: QualifiedColType): StructField = {
-    val builder = new MetadataBuilder
-    col.comment.foreach(builder.putString("comment", _))
-
-    val cleanedDataType = HiveStringType.replaceCharType(col.dataType)
-    if (col.dataType != cleanedDataType) {
-      builder.putString(HIVE_TYPE_STRING, col.dataType.catalogString)
-    }
-
-    StructField(
-      col.name.head,
-      cleanedDataType,
-      nullable = true,
-      builder.build())
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 4dcf5c52ce83d..1a92a2bb7156c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -34,7 +34,8 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoTable, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.sql.InsertIntoStatement
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
@@ -140,7 +141,7 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
         if query.resolved && DDLUtils.isDatasourceTable(tableDesc) =>
       CreateDataSourceTableAsSelectCommand(tableDesc, mode, query, query.output.map(_.name))
 
-    case InsertIntoTable(l @ LogicalRelation(_: InsertableRelation, _, _, _),
+    case InsertIntoStatement(l @ LogicalRelation(_: InsertableRelation, _, _, _),
         parts, query, overwrite, false) if parts.isEmpty =>
       InsertIntoDataSourceCommand(l, query, overwrite)
 
@@ -152,7 +153,7 @@ case class DataSourceAnalysis(conf: SQLConf) extends Rule[LogicalPlan] with Cast
 
       InsertIntoDataSourceDirCommand(storage, provider.get, query, overwrite)
 
-    case i @ InsertIntoTable(
+    case i @ InsertIntoStatement(
         l @ LogicalRelation(t: HadoopFsRelation, _, table, _), parts, query, overwrite, _) =>
       // If the InsertIntoTable command is for a partitioned HadoopFsRelation and
       // the user has specified static partitions, we add a Project operator on top of the query
@@ -241,11 +242,11 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoTable(UnresolvedCatalogRelation(tableMeta), _, _, _, _)
+    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta), _, _, _, _)
         if DDLUtils.isDatasourceTable(tableMeta) =>
       i.copy(table = readDataSourceTable(tableMeta))
 
-    case i @ InsertIntoTable(UnresolvedCatalogRelation(tableMeta), _, _, _, _) =>
+    case i @ InsertIntoStatement(UnresolvedCatalogRelation(tableMeta), _, _, _, _) =>
       i.copy(table = DDLUtils.readHiveTable(tableMeta))
 
     case UnresolvedCatalogRelation(tableMeta) if DDLUtils.isDatasourceTable(tableMeta) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
index 813af8203c2c5..70e78a9ecc859 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FallBackFileSourceV2.scala
@@ -20,12 +20,13 @@ package org.apache.spark.sql.execution.datasources
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.sql.InsertIntoStatement
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileDataSourceV2, FileTable}
 
 /**
- * Replace the File source V2 table in [[InsertIntoTable]] to V1 [[FileFormat]].
+ * Replace the File source V2 table in [[InsertIntoStatement]] to V1 [[FileFormat]].
  * E.g, with temporary view `t` using [[FileDataSourceV2]], inserting into  view `t` fails
  * since there is no corresponding physical plan.
  * This is a temporary hack for making current data source V2 work. It should be
@@ -33,7 +34,7 @@ import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, File
  */
 class FallBackFileSourceV2(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoTable(d @ DataSourceV2Relation(table: FileTable, _, _), _, _, _, _) =>
+    case i @ InsertIntoStatement(d @ DataSourceV2Relation(table: FileTable, _, _), _, _, _, _) =>
       val v1FileFormat = table.fallbackFileFormat.newInstance()
       val relation = HadoopFsRelation(
         table.fileIndex,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
index 2595cc6371bc2..c1ebc98fb1ddd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.sources.v2.writer.{DataWriter, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage}
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.util.SerializableConfiguration
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
index 14bee173cc116..b4fc94e097aa8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.Partition
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.sources.v2.reader.InputPartition
+import org.apache.spark.sql.connector.read.InputPartition
 
 /**
  * A collection of file blocks that should be read as a single task
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 9e98b0bbfabc9..542c996a5342d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -19,17 +19,14 @@ package org.apache.spark.sql.execution.datasources
 
 import java.io.{FileNotFoundException, IOException}
 
-import scala.collection.mutable
-
 import org.apache.parquet.io.ParquetDecodingException
 
-import org.apache.spark.{Partition => RDDPartition, TaskContext, TaskKilledException}
+import org.apache.spark.{Partition => RDDPartition, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.rdd.{InputFileBlockHolder, RDD}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.QueryExecutionException
-import org.apache.spark.sql.sources.v2.reader.InputPartition
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.NextIterator
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index d43fa3893df1d..fbe874b3e8bc5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -90,8 +90,6 @@ case class InsertIntoHadoopFsRelationCommand(
         fs, catalogTable.get, qualifiedOutputPath, matchingPartitions)
     }
 
-    val pathExists = fs.exists(qualifiedOutputPath)
-
     val parameters = CaseInsensitiveMap(options)
 
     val partitionOverwriteMode = parameters.get("partitionOverwriteMode")
@@ -111,25 +109,30 @@ case class InsertIntoHadoopFsRelationCommand(
       outputPath = outputPath.toString,
       dynamicPartitionOverwrite = dynamicPartitionOverwrite)
 
-    val doInsertion = (mode, pathExists) match {
-      case (SaveMode.ErrorIfExists, true) =>
-        throw new AnalysisException(s"path $qualifiedOutputPath already exists.")
-      case (SaveMode.Overwrite, true) =>
-        if (ifPartitionNotExists && matchingPartitions.nonEmpty) {
-          false
-        } else if (dynamicPartitionOverwrite) {
-          // For dynamic partition overwrite, do not delete partition directories ahead.
-          true
-        } else {
-          deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer)
+    val doInsertion = if (mode == SaveMode.Append) {
+      true
+    } else {
+      val pathExists = fs.exists(qualifiedOutputPath)
+      (mode, pathExists) match {
+        case (SaveMode.ErrorIfExists, true) =>
+          throw new AnalysisException(s"path $qualifiedOutputPath already exists.")
+        case (SaveMode.Overwrite, true) =>
+          if (ifPartitionNotExists && matchingPartitions.nonEmpty) {
+            false
+          } else if (dynamicPartitionOverwrite) {
+            // For dynamic partition overwrite, do not delete partition directories ahead.
+            true
+          } else {
+            deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer)
+            true
+          }
+        case (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
           true
-        }
-      case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
-        true
-      case (SaveMode.Ignore, exists) =>
-        !exists
-      case (s, exists) =>
-        throw new IllegalStateException(s"unsupported save mode $s ($exists)")
+        case (SaveMode.Ignore, exists) =>
+          !exists
+        case (s, exists) =>
+          throw new IllegalStateException(s"unsupported save mode $s ($exists)")
+      }
     }
 
     if (doInsertion) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 1e47d53b7e976..fdad43b23c5aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -130,7 +130,7 @@ object PartitioningUtils {
       Map.empty[String, String]
     }
 
-    val dateFormatter = DateFormatter()
+    val dateFormatter = DateFormatter(zoneId)
     val timestampFormatter = TimestampFormatter(timestampPartitionPattern, zoneId)
     // First, we need to parse every partition's path and see if we can find partition values.
     val (partitionValues, optDiscoveredBasePaths) = paths.map { path =>
@@ -492,7 +492,7 @@ object PartitioningUtils {
       // We need to check that we can cast the raw string since we later can use Cast to get
       // the partition values with the right DataType (see
       // org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex.inferPartitioning)
-      val dateValue = Cast(Literal(raw), DateType).eval()
+      val dateValue = Cast(Literal(raw), DateType, Some(zoneId.getId)).eval()
       // Disallow DateType if the cast returned null
       require(dateValue != null)
       Literal.create(dateValue, DateType)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 16b493892e3be..e25ce53941ff6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -231,7 +231,7 @@ private[jdbc] class JDBCRDD(
     var stmt: PreparedStatement = null
     var conn: Connection = null
 
-    def close() {
+    def close(): Unit = {
       if (closed) return
       try {
         if (null != rs) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 3cd5cb1647923..f5a474ddf3904 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -186,7 +186,7 @@ private[sql] object JDBCRelation extends Logging {
     }
     columnType match {
       case _: NumericType => value.toLong
-      case DateType => parse(stringToDate).toLong
+      case DateType => parse(stringToDate(_, getZoneId(timeZoneId))).toLong
       case TimestampType => parse(stringToTimestamp(_, getZoneId(timeZoneId)))
     }
   }
@@ -197,7 +197,9 @@ private[sql] object JDBCRelation extends Logging {
       timeZoneId: String): String = {
     def dateTimeToString(): String = {
       val dateTimeStr = columnType match {
-        case DateType => DateFormatter().format(value.toInt)
+        case DateType =>
+          val dateFormatter = DateFormatter(DateTimeUtils.getZoneId(timeZoneId))
+          dateFormatter.format(value.toInt)
         case TimestampType =>
           val timestampFormatter = TimestampFormatter.getFractionFormatter(
             DateTimeUtils.getZoneId(timeZoneId))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
index e4f9e49c4dd28..3f4f29c3e135a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/noop/NoopDataSource.scala
@@ -22,10 +22,10 @@ import java.util
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, SupportsTruncate, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index f7c12598da209..12c81a1dcab56 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -38,10 +38,9 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.{SerializableConfiguration, Utils}
 
 private[sql] object OrcFileFormat {
   private def checkFieldName(name: String): Unit = {
@@ -180,10 +179,11 @@ class OrcFileFormat
 
       val fs = filePath.getFileSystem(conf)
       val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
-      val reader = OrcFile.createReader(filePath, readerOptions)
-
-      val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
-        isCaseSensitive, dataSchema, requiredSchema, reader, conf)
+      val requestedColIdsOrEmptyFile =
+        Utils.tryWithResource(OrcFile.createReader(filePath, readerOptions)) { reader =>
+          OrcUtils.requestedColumnIds(
+            isCaseSensitive, dataSchema, requiredSchema, reader, conf)
+        }
 
       if (requestedColIdsOrEmptyFile.isEmpty) {
         Iterator.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
index 12d4244e19812..eea9b2a8f9613 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.execution.datasources.SchemaMergeUtils
 import org.apache.spark.sql.types._
-import org.apache.spark.util.{SerializableConfiguration, ThreadUtils}
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 object OrcUtils extends Logging {
 
@@ -62,8 +62,9 @@ object OrcUtils extends Logging {
     val fs = file.getFileSystem(conf)
     val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
     try {
-      val reader = OrcFile.createReader(file, readerOptions)
-      val schema = reader.getSchema
+      val schema = Utils.tryWithResource(OrcFile.createReader(file, readerOptions)) { reader =>
+        reader.getSchema
+      }
       if (schema.getFieldNames.size == 0) {
         None
       } else {
@@ -162,6 +163,7 @@ object OrcUtils extends Logging {
                 if (matchedOrcFields.size > 1) {
                   // Need to fail if there is ambiguity, i.e. more than one field is matched.
                   val matchedOrcFieldsString = matchedOrcFields.mkString("[", ", ", "]")
+                  reader.close()
                   throw new RuntimeException(s"""Found duplicate field(s) "$requiredFieldName": """
                     + s"$matchedOrcFieldsString in case-insensitive mode")
                 } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index b507ef1c509dd..c92c68095db8f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -20,18 +20,19 @@ package org.apache.spark.sql.execution.datasources
 import java.util.Locale
 
 import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
-import org.apache.spark.sql.catalog.v2.expressions.{FieldReference, RewritableTransform}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, RowOrdering}
+import org.apache.spark.sql.catalyst.expressions.{Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, RowOrdering}
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.sql.InsertIntoStatement
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.expressions.{FieldReference, RewritableTransform}
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.sources.InsertableRelation
-import org.apache.spark.sql.types.{ArrayType, AtomicType, StructField, StructType}
+import org.apache.spark.sql.types.{AtomicType, StructType}
 import org.apache.spark.sql.util.SchemaUtils
 
 /**
@@ -377,19 +378,19 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
 }
 
 /**
- * Preprocess the [[InsertIntoTable]] plan. Throws exception if the number of columns mismatch, or
- * specified partition columns are different from the existing partition columns in the target
+ * Preprocess the [[InsertIntoStatement]] plan. Throws exception if the number of columns mismatch,
+ * or specified partition columns are different from the existing partition columns in the target
  * table. It also does data type casting and field renaming, to make sure that the columns to be
  * inserted have the correct data type and fields have the correct names.
  */
 case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
   private def preprocess(
-      insert: InsertIntoTable,
+      insert: InsertIntoStatement,
       tblName: String,
-      partColNames: Seq[String]): InsertIntoTable = {
+      partColNames: Seq[String]): InsertIntoStatement = {
 
     val normalizedPartSpec = PartitioningUtils.normalizePartitionSpec(
-      insert.partition, partColNames, tblName, conf.resolver)
+      insert.partitionSpec, partColNames, tblName, conf.resolver)
 
     val staticPartCols = normalizedPartSpec.filter(_._2.isDefined).keySet
     val expectedColumns = insert.table.output.filterNot(a => staticPartCols.contains(a.name))
@@ -417,16 +418,16 @@ case class PreprocessTableInsertion(conf: SQLConf) extends Rule[LogicalPlan] {
            """.stripMargin)
       }
 
-      insert.copy(query = newQuery, partition = normalizedPartSpec)
+      insert.copy(query = newQuery, partitionSpec = normalizedPartSpec)
     } else {
       // All partition columns are dynamic because the InsertIntoTable command does
       // not explicitly specify partitioning columns.
-      insert.copy(query = newQuery, partition = partColNames.map(_ -> None).toMap)
+      insert.copy(query = newQuery, partitionSpec = partColNames.map(_ -> None).toMap)
     }
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case i @ InsertIntoTable(table, _, query, _, _) if table.resolved && query.resolved =>
+    case i @ InsertIntoStatement(table, _, query, _, _) if table.resolved && query.resolved =>
       table match {
         case relation: HiveTableRelation =>
           val metadata = relation.tableMeta
@@ -503,7 +504,7 @@ object PreWriteCheck extends (LogicalPlan => Unit) {
 
   def apply(plan: LogicalPlan): Unit = {
     plan.foreach {
-      case InsertIntoTable(l @ LogicalRelation(relation, _, _, _), partition, query, _, _) =>
+      case InsertIntoStatement(l @ LogicalRelation(relation, _, _, _), partition, query, _, _) =>
         // Get all input data source relations of the query.
         val srcRelations = query.collect {
           case LogicalRelation(src, _, _, _) => src
@@ -525,7 +526,7 @@ object PreWriteCheck extends (LogicalPlan => Unit) {
           case _ => failAnalysis(s"$relation does not allow insertion.")
         }
 
-      case InsertIntoTable(t, _, _, _, _)
+      case InsertIntoStatement(t, _, _, _, _)
         if !t.isInstanceOf[LeafNode] ||
           t.isInstanceOf[Range] ||
           t.isInstanceOf[OneRowRelation] ||
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableExec.scala
index a3fa82b12e938..4525f2608b710 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/AlterTableExec.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.SparkException
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog, TableChange}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog, TableChange}
 import org.apache.spark.sql.execution.LeafExecNode
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
index 0f98d9486bbbf..e4e7887017a1d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
@@ -21,7 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.sources.v2.reader._
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan}
 
 /**
  * Physical plan node for scanning a batch of data from a data source v2.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
index f54ff608a53e3..dc95d157e40fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.connector.read.{InputPartition, Scan}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReaderFactory, ContinuousStream, Offset}
 import org.apache.spark.sql.execution.streaming.continuous._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReaderFactory, ContinuousStream, Offset}
 
 /**
  * Physical plan node for scanning data from a streaming data source with continuous mode.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
index f35758bf08c67..600ed6f7c98ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
@@ -20,11 +20,11 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.JavaConverters._
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.types.StructType
 
@@ -35,7 +35,7 @@ case class CreateTableExec(
     partitioning: Seq[Transform],
     tableProperties: Map[String, String],
     ignoreIfExists: Boolean) extends LeafExecNode {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   override protected def doExecute(): RDD[InternalRow] = {
     if (!catalog.tableExists(identifier)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala
index 33079d5912506..9211ec25525fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expression}
 import org.apache.spark.sql.catalyst.plans.physical
-import org.apache.spark.sql.sources.v2.reader.partitioning.{ClusteredDistribution, Partitioning}
+import org.apache.spark.sql.connector.read.partitioning.{ClusteredDistribution, Partitioning}
 
 /**
  * An adapter from public data source partitioning to catalyst internal `Partitioning`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
index f62f7349d1da7..71560f60f531c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
 
 class DataSourceRDDPartition(val index: Int, val inputPartition: InputPartition)
   extends Partition with Serializable
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
index 74fc5432ea82c..211f61279ddd5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
@@ -23,9 +23,9 @@ import org.apache.spark.sql.catalyst.expressions.AttributeMap
 import org.apache.spark.sql.catalyst.plans.physical
 import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan, SupportsReportPartitioning}
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReaderFactory, Scan, SupportsReportPartitioning}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 7cad305aefeb8..c8d29520bcfce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -17,24 +17,20 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import java.util.UUID
-
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.spark.sql.{AnalysisException, Strategy}
-import org.apache.spark.sql.catalog.v2.StagingTableCatalog
 import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, AppendData, CreateTableAsSelect, CreateV2Table, DeleteFromTable, DescribeTable, DropTable, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Repartition, ReplaceTable, ReplaceTableAsSelect, ShowTables}
+import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, AppendData, CreateTableAsSelect, CreateV2Table, DeleteFromTable, DescribeTable, DropTable, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Repartition, ReplaceTable, ReplaceTableAsSelect, SetCatalogAndNamespace, ShowNamespaces, ShowTables}
+import org.apache.spark.sql.connector.catalog.{StagingTableCatalog, TableCapability}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.sources
-import org.apache.spark.sql.sources.v2.TableCapability
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
-import org.apache.spark.sql.sources.v2.writer.V1WriteBuilder
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object DataSourceV2Strategy extends Strategy with PredicateHelper {
@@ -155,17 +151,30 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
 
     case r: StreamingDataSourceV2Relation if r.startOffset.isDefined && r.endOffset.isDefined =>
       val microBatchStream = r.stream.asInstanceOf[MicroBatchStream]
-      // ensure there is a projection, which will produce unsafe rows required by some operators
-      ProjectExec(r.output,
-        MicroBatchScanExec(
-          r.output, r.scan, microBatchStream, r.startOffset.get, r.endOffset.get)) :: Nil
+      val scanExec = MicroBatchScanExec(
+        r.output, r.scan, microBatchStream, r.startOffset.get, r.endOffset.get)
+
+      val withProjection = if (scanExec.supportsColumnar) {
+        scanExec
+      } else {
+        // Add a Project here to make sure we produce unsafe rows.
+        ProjectExec(r.output, scanExec)
+      }
+
+      withProjection :: Nil
 
     case r: StreamingDataSourceV2Relation if r.startOffset.isDefined && r.endOffset.isEmpty =>
       val continuousStream = r.stream.asInstanceOf[ContinuousStream]
-      // ensure there is a projection, which will produce unsafe rows required by some operators
-      ProjectExec(r.output,
-        ContinuousScanExec(
-          r.output, r.scan, continuousStream, r.startOffset.get)) :: Nil
+      val scanExec = ContinuousScanExec(r.output, r.scan, continuousStream, r.startOffset.get)
+
+      val withProjection = if (scanExec.supportsColumnar) {
+        scanExec
+      } else {
+        // Add a Project here to make sure we produce unsafe rows.
+        ProjectExec(r.output, scanExec)
+      }
+
+      withProjection :: Nil
 
     case WriteToDataSourceV2(writer, query) =>
       WriteToDataSourceV2Exec(writer, planLater(query)) :: Nil
@@ -217,15 +226,15 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
             orCreate = orCreate) :: Nil
       }
 
-    case AppendData(r: DataSourceV2Relation, query, _) =>
+    case AppendData(r: DataSourceV2Relation, query, writeOptions, _) =>
       r.table.asWritable match {
         case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
-          AppendDataExecV1(v1, r.options, query) :: Nil
+          AppendDataExecV1(v1, writeOptions.asOptions, query) :: Nil
         case v2 =>
-          AppendDataExec(v2, r.options, planLater(query)) :: Nil
+          AppendDataExec(v2, writeOptions.asOptions, planLater(query)) :: Nil
       }
 
-    case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, _) =>
+    case OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, writeOptions, _) =>
       // fail if any filter cannot be converted. correctness depends on removing all matching data.
       val filters = splitConjunctivePredicates(deleteExpr).map {
         filter => DataSourceStrategy.translateFilter(deleteExpr).getOrElse(
@@ -233,25 +242,27 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
       }.toArray
       r.table.asWritable match {
         case v1 if v1.supports(TableCapability.V1_BATCH_WRITE) =>
-          OverwriteByExpressionExecV1(v1, filters, r.options, query) :: Nil
+          OverwriteByExpressionExecV1(v1, filters, writeOptions.asOptions, query) :: Nil
         case v2 =>
-          OverwriteByExpressionExec(v2, filters, r.options, planLater(query)) :: Nil
+          OverwriteByExpressionExec(v2, filters, writeOptions.asOptions, planLater(query)) :: Nil
       }
 
-    case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, _) =>
-      OverwritePartitionsDynamicExec(r.table.asWritable, r.options, planLater(query)) :: Nil
+    case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, writeOptions, _) =>
+      OverwritePartitionsDynamicExec(
+        r.table.asWritable, writeOptions.asOptions, planLater(query)) :: Nil
 
     case DeleteFromTable(r: DataSourceV2Relation, condition) =>
-      if (SubqueryExpression.hasSubquery(condition)) {
+      if (condition.exists(SubqueryExpression.hasSubquery)) {
         throw new AnalysisException(
           s"Delete by condition with subquery is not supported: $condition")
       }
       // fail if any filter cannot be converted. correctness depends on removing all matching data.
-      val filters = splitConjunctivePredicates(condition).map {
-        f => DataSourceStrategy.translateFilter(f).getOrElse(
-          throw new AnalysisException(s"Exec delete failed:" +
-              s" cannot translate expression to source filter: $f"))
-      }.toArray
+      val filters = DataSourceStrategy.normalizeFilters(condition.toSeq, r.output)
+          .flatMap(splitConjunctivePredicates(_).map {
+            f => DataSourceStrategy.translateFilter(f).getOrElse(
+              throw new AnalysisException(s"Exec update failed:" +
+                  s" cannot translate expression to source filter: $f"))
+          }).toArray
       DeleteFromTableExec(r.table.asDeletable, filters) :: Nil
 
     case WriteToContinuousDataSource(writer, query) =>
@@ -278,9 +289,15 @@ object DataSourceV2Strategy extends Strategy with PredicateHelper {
     case AlterTable(catalog, ident, _, changes) =>
       AlterTableExec(catalog, ident, changes) :: Nil
 
+    case r: ShowNamespaces =>
+      ShowNamespacesExec(r.output, r.catalog, r.namespace, r.pattern) :: Nil
+
     case r : ShowTables =>
       ShowTablesExec(r.output, r.catalog, r.namespace, r.pattern) :: Nil
 
+    case SetCatalogAndNamespace(catalogManager, catalogName, namespace) =>
+      SetCatalogAndNamespaceExec(catalogManager, catalogName, namespace) :: Nil
+
     case _ => Nil
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
index 30897d86f8179..52294ae2cb851 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.datasources.v2
 import java.util.regex.Pattern
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.connector.catalog.{SessionConfigSupport, TableProvider}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.{SessionConfigSupport, TableProvider}
 
 private[sql] object DataSourceV2Utils extends Logging {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
index a5840571fff23..b91cbca8d2e70 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.SupportsDelete
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.SupportsDelete
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 case class DeleteFromTableExec(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 640bdfb8cba54..e696efe950f98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -24,8 +24,8 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.LeafExecNode
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 
 case class DescribeTableExec(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
index d325e0205f9d8..9164b211ed0e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
 import org.apache.spark.sql.execution.LeafExecNode
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala
index b177d15e1fe32..711bd41e1db24 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/EmptyPartitionReader.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.io.IOException
 
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.connector.read.PartitionReader
 
 /**
  * A [[PartitionReader]] with empty output.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
index db31927fa73bb..e7d9a247533c4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
@@ -20,10 +20,9 @@ import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.io.FileCommitProtocol
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, WriterCommitMessage}
 import org.apache.spark.sql.execution.datasources.{WriteJobDescription, WriteTaskResult}
 import org.apache.spark.sql.execution.datasources.FileFormatWriter.processStats
-import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.util.SerializableConfiguration
 
 class FileBatchWrite(
     job: Job,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index ac786bbaac6d7..e0091293d1669 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -20,9 +20,9 @@ import com.fasterxml.jackson.databind.ObjectMapper
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.catalog.TableProvider
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2.TableProvider
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index 836eae88e4da7..1b55c181c7efc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -22,10 +22,10 @@ import org.apache.parquet.io.ParquetDecodingException
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.InputFileBlockHolder
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
 
 class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
   extends PartitionReader[T] with Logging {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
index 5a19412c90334..c1d91736a8b8e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
@@ -17,9 +17,8 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile, PartitioningUtils}
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader, PartitionReaderFactory}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 0438bd0430da1..55104a2b21deb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -25,10 +25,10 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.IO_WARNING_LARGEFILETHRESHOLD
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.connector.read.{Batch, InputPartition, Scan, Statistics, SupportsReportStatistics}
 import org.apache.spark.sql.execution.PartitionedFileUtil
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
index 3b236be90e6ff..21bc14c577bdc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
@@ -17,8 +17,8 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownRequiredColumns}
 import org.apache.spark.sql.execution.datasources.{PartitioningAwareFileIndex, PartitioningUtils}
-import org.apache.spark.sql.sources.v2.reader.{ScanBuilder, SupportsPushDownRequiredColumns}
 import org.apache.spark.sql.types.StructType
 
 abstract class FileScanBuilder(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 4483f5b1dd30c..5329e09916bd6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -23,11 +23,11 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.{FileStatus, Path}
 
 import org.apache.spark.sql.{AnalysisException, SparkSession}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex}
-import org.apache.spark.sql.sources.v2.{SupportsRead, SupportsWrite, Table, TableCapability}
-import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.util.SchemaUtils
@@ -39,7 +39,7 @@ abstract class FileTable(
     userSpecifiedSchema: Option[StructType])
   extends Table with SupportsRead with SupportsWrite {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   lazy val fileIndex: PartitioningAwareFileIndex = {
     val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
index eacc4cb3ac4a9..65a2c61ba090f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriteBuilder.scala
@@ -30,10 +30,10 @@ import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.connector.write.{BatchWrite, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, DataSource, OutputWriterFactory, WriteJobDescription}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, WriteBuilder}
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.sql.util.SchemaUtils
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
index eb573b317142a..1f25fed3000b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
@@ -23,9 +23,8 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 
 import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory}
 import org.apache.spark.sql.execution.datasources.{DynamicPartitionDataWriter, SingleDirectoryDataWriter, WriteJobDescription}
-import org.apache.spark.sql.sources.v2.writer.{DataWriter, DataWriterFactory}
-import org.apache.spark.util.SerializableConfiguration
 
 case class FileWriterFactory (
     description: WriteJobDescription,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
index a9b0f5bce1b09..bca28e3cacb62 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReaderFactory, Scan}
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 
 /**
  * Physical plan node for scanning a micro-batch of data from a data source.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala
index f9dfcf448a3ea..0d9aa5b42a6ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderFromIterator.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.connector.read.PartitionReader
 
 class PartitionReaderFromIterator[InternalRow](
     iter: Iterator[InternalRow]) extends PartitionReader[InternalRow] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala
index 072465b56857d..7bca98e54efa7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionReaderWithPartitionValues.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.JoinedRow
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.types.StructType
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
index baa8cb6b24659..8e524a986aa06 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PartitionRecordReader.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.hadoop.mapreduce.RecordReader
 
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
+import org.apache.spark.sql.connector.read.PartitionReader
 
 class PartitionRecordReader[T](
     private[this] var rowReader: RecordReader[_, T]) extends PartitionReader[T] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
index 35d86ee2abbbb..a56ccd6a6a9f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
@@ -20,13 +20,12 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.JavaConverters._
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.{Identifier, StagingTableCatalog, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException}
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, TableCatalog}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.LeafExecNode
-import org.apache.spark.sql.sources.v2.StagedTable
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetCatalogAndNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetCatalogAndNamespaceExec.scala
new file mode 100644
index 0000000000000..93e0c89cf6480
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/SetCatalogAndNamespaceExec.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.execution.LeafExecNode
+
+/**
+ * Physical plan node for setting the current catalog and/or namespace.
+ */
+case class SetCatalogAndNamespaceExec(
+    catalogManager: CatalogManager,
+    catalogName: Option[String],
+    namespace: Option[Seq[String]])
+    extends LeafExecNode {
+  override protected def doExecute(): RDD[InternalRow] = {
+    // The catalog is updated first because CatalogManager resets the current namespace
+    // when the current catalog is set.
+    catalogName.map(catalogManager.setCurrentCatalog)
+    namespace.map(ns => catalogManager.setCurrentNamespace(ns.toArray))
+
+    sqlContext.sparkContext.parallelize(Seq.empty, 1)
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala
new file mode 100644
index 0000000000000..49f55be52de16
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
+import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
+import org.apache.spark.sql.connector.catalog.SupportsNamespaces
+import org.apache.spark.sql.execution.LeafExecNode
+
+/**
+ * Physical plan node for showing namespaces.
+ */
+case class ShowNamespacesExec(
+    output: Seq[Attribute],
+    catalog: SupportsNamespaces,
+    namespace: Option[Seq[String]],
+    pattern: Option[String])
+    extends LeafExecNode {
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    val namespaces = namespace.map { ns =>
+        if (ns.nonEmpty) {
+          catalog.listNamespaces(ns.toArray)
+        } else {
+          catalog.listNamespaces()
+        }
+      }
+      .getOrElse(catalog.listNamespaces())
+
+    val rows = new ArrayBuffer[InternalRow]()
+    val encoder = RowEncoder(schema).resolveAndBind()
+
+    namespaces.map(_.quoted).map { ns =>
+      if (pattern.map(StringUtils.filterPattern(Seq(ns), _).nonEmpty).getOrElse(true)) {
+        rows += encoder
+          .toRow(new GenericRowWithSchema(Array(ns), schema))
+          .copy()
+      }
+    }
+
+    sparkContext.parallelize(rows, 1)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
index c652f28a5e760..e352bc6759623 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablesExec.scala
@@ -20,12 +20,12 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.NamespaceHelper
-import org.apache.spark.sql.catalog.v2.TableCatalog
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericRowWithSchema}
 import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.NamespaceHelper
+import org.apache.spark.sql.connector.catalog.TableCatalog
 import org.apache.spark.sql.execution.LeafExecNode
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
index 660b6e763e056..509a5f7139cca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
@@ -20,8 +20,9 @@ package org.apache.spark.sql.execution.datasources.v2
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2}
-import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.BooleanType
 
 /**
@@ -32,6 +33,10 @@ object TableCapabilityCheck extends (LogicalPlan => Unit) {
 
   private def failAnalysis(msg: String): Unit = throw new AnalysisException(msg)
 
+  private def supportsBatchWrite(table: Table): Boolean = {
+    table.supportsAny(BATCH_WRITE, V1_BATCH_WRITE)
+  }
+
   override def apply(plan: LogicalPlan): Unit = {
     plan foreach {
       case r: DataSourceV2Relation if !r.table.supports(BATCH_READ) =>
@@ -43,24 +48,23 @@ object TableCapabilityCheck extends (LogicalPlan => Unit) {
 
       // TODO: check STREAMING_WRITE capability. It's not doable now because we don't have a
       //       a logical plan for streaming write.
-
-      case AppendData(r: DataSourceV2Relation, _, _) if !r.table.supports(BATCH_WRITE) =>
+      case AppendData(r: DataSourceV2Relation, _, _, _) if !supportsBatchWrite(r.table) =>
         failAnalysis(s"Table ${r.table.name()} does not support append in batch mode.")
 
-      case OverwritePartitionsDynamic(r: DataSourceV2Relation, _, _)
+      case OverwritePartitionsDynamic(r: DataSourceV2Relation, _, _, _)
         if !r.table.supports(BATCH_WRITE) || !r.table.supports(OVERWRITE_DYNAMIC) =>
         failAnalysis(s"Table ${r.table.name()} does not support dynamic overwrite in batch mode.")
 
-      case OverwriteByExpression(r: DataSourceV2Relation, expr, _, _) =>
+      case OverwriteByExpression(r: DataSourceV2Relation, expr, _, _, _) =>
         expr match {
           case Literal(true, BooleanType) =>
-            if (!r.table.supports(BATCH_WRITE) ||
-              !r.table.supportsAny(TRUNCATE, OVERWRITE_BY_FILTER)) {
+            if (!supportsBatchWrite(r.table) ||
+                !r.table.supportsAny(TRUNCATE, OVERWRITE_BY_FILTER)) {
               failAnalysis(
                 s"Table ${r.table.name()} does not support truncate in batch mode.")
             }
           case _ =>
-            if (!r.table.supports(BATCH_WRITE) || !r.table.supports(OVERWRITE_BY_FILTER)) {
+            if (!supportsBatchWrite(r.table) || !r.table.supports(OVERWRITE_BY_FILTER)) {
               failAnalysis(s"Table ${r.table.name()} does not support " +
                 "overwrite by filter in batch mode.")
             }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
index 2f05ff3a7c2e1..bf67e972976b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
@@ -19,18 +19,16 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.util.UUID
 
-import scala.collection.JavaConverters._
-
 import org.apache.spark.SparkException
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Dataset, SaveMode}
+import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.SupportsWrite
+import org.apache.spark.sql.connector.write.{SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder}
 import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.sources.{AlwaysTrue, CreatableRelationProvider, Filter, InsertableRelation}
-import org.apache.spark.sql.sources.v2.{SupportsWrite, Table}
-import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.sources.{AlwaysTrue, Filter, InsertableRelation}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index ebfd7384930fe..dffb9cb67b5c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -23,48 +23,39 @@ import java.util
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalog.v2.{Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange}
-import org.apache.spark.sql.catalog.v2.NamespaceChange.{RemoveProperty, SetProperty}
-import org.apache.spark.sql.catalog.v2.expressions.{BucketTransform, FieldReference, IdentityTransform, Transform}
-import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogDatabase, CatalogTable, CatalogTableType, CatalogUtils, SessionCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.NamespaceChange.RemoveProperty
+import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.internal.SessionState
-import org.apache.spark.sql.sources.v2.Table
-import org.apache.spark.sql.sources.v2.internal.V1Table
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A [[TableCatalog]] that translates calls to the v1 SessionCatalog.
  */
-class V2SessionCatalog(sessionState: SessionState) extends TableCatalog with SupportsNamespaces {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+class V2SessionCatalog(catalog: SessionCatalog, conf: SQLConf)
+  extends TableCatalog with SupportsNamespaces {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
   import V2SessionCatalog._
 
-  def this() = {
-    this(SparkSession.active.sessionState)
-  }
-
   override val defaultNamespace: Array[String] = Array("default")
 
-  private lazy val catalog: SessionCatalog = sessionState.catalog
-
-  private var _name: String = _
+  override def name: String = CatalogManager.SESSION_CATALOG_NAME
 
-  override def name: String = _name
-
-  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {
-    this._name = name
-  }
+  // This class is instantiated by Spark, so `initialize` method will not be called.
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {}
 
   override def listTables(namespace: Array[String]): Array[Identifier] = {
     namespace match {
       case Array(db) =>
-        catalog.listTables(db).map(ident => Identifier.of(Array(db), ident.table)).toArray
+        catalog
+          .listTables(db)
+          .map(ident => Identifier.of(Array(ident.database.getOrElse("")), ident.table))
+          .toArray
       case _ =>
         throw new NoSuchNamespaceException(namespace)
     }
@@ -92,7 +83,7 @@ class V2SessionCatalog(sessionState: SessionState) extends TableCatalog with Sup
       properties: util.Map[String, String]): Table = {
 
     val (partitionColumns, maybeBucketSpec) = V2SessionCatalog.convertTransforms(partitions)
-    val provider = properties.getOrDefault("provider", sessionState.conf.defaultDataSourceName)
+    val provider = properties.getOrDefault("provider", conf.defaultDataSourceName)
     val tableProperties = properties.asScala
     val location = Option(properties.get(LOCATION_TABLE_PROP))
     val storage = DataSource.buildStorageFormatFromOptions(tableProperties.toMap)
@@ -108,7 +99,7 @@ class V2SessionCatalog(sessionState: SessionState) extends TableCatalog with Sup
       partitionColumnNames = partitionColumns,
       bucketSpec = maybeBucketSpec,
       properties = tableProperties.toMap,
-      tracksPartitionsInCatalog = sessionState.conf.manageFilesourcePartitions,
+      tracksPartitionsInCatalog = conf.manageFilesourcePartitions,
       comment = Option(properties.get(COMMENT_TABLE_PROP)))
 
     try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 0131d72ebc97a..9f4392da6ab4d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -26,16 +26,15 @@ import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.executor.CommitDeniedException
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalog.v2.{Identifier, StagingTableCatalog, TableCatalog}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{Identifier, StagedTable, StagingTableCatalog, SupportsWrite, TableCatalog}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.sources.{AlwaysTrue, Filter}
-import org.apache.spark.sql.sources.v2.{StagedTable, SupportsWrite}
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriterFactory, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{LongAccumulator, Utils}
 
@@ -69,7 +68,7 @@ case class CreateTableAsSelectExec(
     writeOptions: CaseInsensitiveStringMap,
     ifNotExists: Boolean) extends V2TableWriteExec with SupportsV1Write {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.IdentifierHelper
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
 
   override protected def doExecute(): RDD[InternalRow] = {
     if (catalog.tableExists(ident)) {
@@ -158,7 +157,7 @@ case class ReplaceTableAsSelectExec(
     writeOptions: CaseInsensitiveStringMap,
     orCreate: Boolean) extends V2TableWriteExec with SupportsV1Write {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.IdentifierHelper
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
 
   override protected def doExecute(): RDD[InternalRow] = {
     // Note that this operation is potentially unsafe, but these are the strict semantics of
@@ -470,7 +469,7 @@ object DataWritingSparkTask extends Logging {
 }
 
 private[v2] trait AtomicTableWriteExec extends V2TableWriteExec with SupportsV1Write {
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits.IdentifierHelper
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
 
   protected def writeToStagedTable(
       stagedTable: StagedTable,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
index 045f41e670ad3..1f99d4282f6da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVDataSourceV2.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.csv
 
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
index 828594ffb10af..a20b0f1560a1d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
@@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources.v2.csv
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityParser}
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
index 3cbcfca01a9c3..5125de9313a4c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
@@ -23,11 +23,11 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.csv.CSVOptions
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
-import org.apache.spark.sql.execution.datasources.csv.{CSVDataSource, MultiLineCSVDataSource}
+import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
 import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
-import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
index 28c5b3d81a3d5..8b486d0344506 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.execution.datasources.v2.csv
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
-import org.apache.spark.sql.sources.v2.reader.Scan
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
index 8170661a70172..04beee0e3b0f2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
@@ -22,10 +22,10 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.csv.CSVOptions
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types.{AtomicType, DataType, StructType, UserDefinedType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala
index 610bd4c1b9d85..7a0949e586cd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonDataSourceV2.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.json
 
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
 import org.apache.spark.sql.execution.datasources.v2._
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
index e5b7ae0bd228a..698423948f916 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
@@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.datasources.v2.json
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.json.{JacksonParser, JSONOptionsInRead}
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.execution.datasources.json.JsonDataSource
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
index 5c41bbd931982..a64b78d3c8305 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
@@ -24,10 +24,10 @@ import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.json.JSONOptionsInRead
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.json.JsonDataSource
-import org.apache.spark.sql.execution.datasources.v2.{FileScan, TextBasedFileScan}
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
+import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
index bb3c0366bdc2f..be53b1b1676f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
@@ -17,9 +17,9 @@
 package org.apache.spark.sql.execution.datasources.v2.json
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
-import org.apache.spark.sql.sources.v2.reader.Scan
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
index bbdd3ae69222a..9bb615528fc5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
@@ -22,10 +22,10 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.json.JSONOptionsInRead
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.json.JsonDataSource
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
index 1ea80d2ba5fbc..8665af33b976a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcDataSourceV2.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.orc
 
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.v2._
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
index ec923797e2691..03d58fdcb7f67 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -29,16 +29,14 @@ import org.apache.orc.mapreduce.OrcInputFormat
 
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.JoinedRow
-import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
-import org.apache.spark.sql.execution.datasources.{PartitionedFile, PartitioningUtils}
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader}
+import org.apache.spark.sql.execution.datasources.PartitionedFile
 import org.apache.spark.sql.execution.datasources.orc.{OrcColumnarBatchReader, OrcDeserializer, OrcUtils}
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader}
 import org.apache.spark.sql.types.{AtomicType, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.{SerializableConfiguration, Utils}
 
 /**
  * A factory used to create Orc readers.
@@ -76,10 +74,11 @@ case class OrcPartitionReaderFactory(
 
     val fs = filePath.getFileSystem(conf)
     val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
-    val reader = OrcFile.createReader(filePath, readerOptions)
-
-    val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
-      isCaseSensitive, dataSchema, readDataSchema, reader, conf)
+    val requestedColIdsOrEmptyFile =
+      Utils.tryWithResource(OrcFile.createReader(filePath, readerOptions)) { reader =>
+        OrcUtils.requestedColumnIds(
+          isCaseSensitive, dataSchema, readDataSchema, reader, conf)
+      }
 
     if (requestedColIdsOrEmptyFile.isEmpty) {
       new EmptyPartitionReader[InternalRow]
@@ -121,10 +120,11 @@ case class OrcPartitionReaderFactory(
 
     val fs = filePath.getFileSystem(conf)
     val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
-    val reader = OrcFile.createReader(filePath, readerOptions)
-
-    val requestedColIdsOrEmptyFile = OrcUtils.requestedColumnIds(
-      isCaseSensitive, dataSchema, readDataSchema, reader, conf)
+    val requestedColIdsOrEmptyFile =
+      Utils.tryWithResource(OrcFile.createReader(filePath, readerOptions)) { reader =>
+        OrcUtils.requestedColumnIds(
+          isCaseSensitive, dataSchema, readDataSchema, reader, conf)
+      }
 
     if (requestedColIdsOrEmptyFile.isEmpty) {
       new EmptyPartitionReader
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
index a4fb03405d162..40784516a6f34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -20,10 +20,10 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScan
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index 458b98c627be4..8d1d4ec45915b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -22,11 +22,11 @@ import scala.collection.JavaConverters._
 import org.apache.orc.mapreduce.OrcInputFormat
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.orc.OrcFilters
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
index 3fe433861a3c4..f2e4b88e9f1ae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -21,10 +21,10 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.orc.OrcUtils
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetDataSourceV2.scala
index 0b6d5a960374b..8cb6186c12ff3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetDataSourceV2.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.parquet
 
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.datasources.v2._
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
index a0f19c3dd2eb4..ded145f7de94e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
@@ -33,12 +33,12 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader}
 import org.apache.spark.sql.execution.datasources.{PartitionedFile, RecordReaderIterator}
 import org.apache.spark.sql.execution.datasources.parquet._
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader}
 import org.apache.spark.sql.types.{AtomicType, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.SerializableConfiguration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
index a67aa3b92ce82..7e6ea41cf0b8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
@@ -21,13 +21,12 @@ import org.apache.hadoop.fs.Path
 import org.apache.parquet.hadoop.ParquetInputFormat
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetReadSupport, ParquetWriteSupport}
 import org.apache.spark.sql.execution.datasources.v2.FileScan
-import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
index 4b8b434af88e6..87db00077e798 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
@@ -20,11 +20,11 @@ package org.apache.spark.sql.execution.datasources.v2.parquet
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFilters, SparkToParquetSchemaConverter}
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
 import org.apache.spark.sql.sources.Filter
-import org.apache.spark.sql.sources.v2.reader.{Scan, SupportsPushDownFilters}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
index dce851dbcd336..2ad64b1aa5244 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
@@ -21,10 +21,10 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetUtils
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala
index f6aa1e9c898b9..049c717effa26 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextDataSourceV2.scala
@@ -16,10 +16,10 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.text
 
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.text.TextFileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.sources.v2.Table
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
index 8788887111880..0cd184da6ef8f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
@@ -21,11 +21,11 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
+import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.execution.datasources.{HadoopFileLinesReader, HadoopFileWholeTextReader, PartitionedFile}
 import org.apache.spark.sql.execution.datasources.text.TextOptions
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.PartitionReader
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
index 89b0511442d4a..a2c42db59d7fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
@@ -21,10 +21,10 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.text.TextOptions
 import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
index fbe5e1688b836..b2b518c12b01a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.execution.datasources.v2.text
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
-import org.apache.spark.sql.sources.v2.reader.Scan
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
index b8cb61a6c646e..87bfa84985e5a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.execution.datasources.v2.text
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.types.{DataType, StringType, StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index 03adeaaa66569..6a57ef2cafe23 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution
 import java.util.Collections
 
 import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
 
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
@@ -27,7 +28,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeFormatter, CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.trees.TreeNodeRef
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
@@ -81,11 +82,20 @@ package object debug {
   def writeCodegen(append: String => Unit, plan: SparkPlan): Unit = {
     val codegenSeq = codegenStringSeq(plan)
     append(s"Found ${codegenSeq.size} WholeStageCodegen subtrees.\n")
-    for (((subtree, code), i) <- codegenSeq.zipWithIndex) {
-      append(s"== Subtree ${i + 1} / ${codegenSeq.size} ==\n")
+    for (((subtree, code, codeStats), i) <- codegenSeq.zipWithIndex) {
+      val usedConstPoolRatio = if (codeStats.maxConstPoolSize > 0) {
+        val rt = 100.0 * codeStats.maxConstPoolSize / CodeGenerator.MAX_JVM_CONSTANT_POOL_SIZE
+        "(%.2f%% used)".format(rt)
+      } else {
+        ""
+      }
+      val codeStatsStr = s"maxMethodCodeSize:${codeStats.maxMethodCodeSize}; " +
+        s"maxConstantPoolSize:${codeStats.maxConstPoolSize}$usedConstPoolRatio; " +
+        s"numInnerClasses:${codeStats.numInnerClasses}"
+      append(s"== Subtree ${i + 1} / ${codegenSeq.size} ($codeStatsStr) ==\n")
       append(subtree)
       append("\nGenerated code:\n")
-      append(s"${code}\n")
+      append(s"$code\n")
     }
   }
 
@@ -95,7 +105,7 @@ package object debug {
    * @param plan the query plan for codegen
    * @return Sequence of WholeStageCodegen subtrees and corresponding codegen
    */
-  def codegenStringSeq(plan: SparkPlan): Seq[(String, String)] = {
+  def codegenStringSeq(plan: SparkPlan): Seq[(String, String, ByteCodeStats)] = {
     val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
     plan transform {
       case s: WholeStageCodegenExec =>
@@ -105,7 +115,13 @@ package object debug {
     }
     codegenSubtrees.toSeq.map { subtree =>
       val (_, source) = subtree.doCodeGen()
-      (subtree.toString, CodeFormatter.format(source))
+      val codeStats = try {
+        CodeGenerator.compile(source)._2
+      } catch {
+        case NonFatal(_) =>
+          ByteCodeStats.UNAVAILABLE
+      }
+      (subtree.toString, CodeFormatter.format(source), codeStats)
     }
   }
 
@@ -130,7 +146,7 @@ package object debug {
    * @param query the streaming query for codegen
    * @return Sequence of WholeStageCodegen subtrees and corresponding codegen
    */
-  def codegenStringSeq(query: StreamingQuery): Seq[(String, String)] = {
+  def codegenStringSeq(query: StreamingQuery): Seq[(String, String, ByteCodeStats)] = {
     val w = asStreamExecution(query)
     if (w.lastExecution != null) {
       codegenStringSeq(w.lastExecution.executedPlan)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
index 3315ae7dabef1..a1dde415d6e8b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expression, SortOrder}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.execution.{ExplainUtils, LeafExecNode, SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
@@ -109,9 +109,10 @@ case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] {
     }
     // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls.
     val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]()
-    plan.transformUp {
+
+    // Replace a Exchange duplicate with a ReusedExchange
+    def reuse: PartialFunction[Exchange, SparkPlan] = {
       case exchange: Exchange =>
-        // the exchanges that have same results usually also have same schemas (same column names).
         val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]())
         val samePlan = sameSchema.find { e =>
           exchange.sameResult(e)
@@ -125,5 +126,16 @@ case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] {
           exchange
         }
     }
+
+    plan transformUp {
+      case exchange: Exchange => reuse(exchange)
+    } transformAllExpressions {
+      // Lookup inside subqueries for duplicate exchanges
+      case in: InSubqueryExec =>
+        val newIn = in.plan.transformUp {
+          case exchange: Exchange => reuse(exchange)
+        }
+        in.copy(plan = newIn.asInstanceOf[BaseSubqueryExec])
+    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
index 5101f7e871af2..b44b13c8de0da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
@@ -19,12 +19,9 @@ package org.apache.spark.sql.execution.python
 
 import java.io._
 import java.net._
-import java.util.concurrent.atomic.AtomicBoolean
-
-import scala.collection.JavaConverters._
 
 import org.apache.arrow.vector.VectorSchemaRoot
-import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter}
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
 
 import org.apache.spark._
 import org.apache.spark.api.python._
@@ -33,7 +30,7 @@ import org.apache.spark.sql.execution.arrow.ArrowWriter
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.ArrowUtils
-import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
+import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.Utils
 
 /**
@@ -46,8 +43,8 @@ class ArrowPythonRunner(
     schema: StructType,
     timeZoneId: String,
     conf: Map[String, String])
-  extends BasePythonRunner[Iterator[InternalRow], ColumnarBatch](
-    funcs, evalType, argOffsets) {
+  extends BasePythonRunner[Iterator[InternalRow], ColumnarBatch](funcs, evalType, argOffsets)
+  with PythonArrowOutput {
 
   override val bufferSize: Int = SQLConf.get.pandasUDFBufferSize
   require(
@@ -119,72 +116,4 @@ class ArrowPythonRunner(
     }
   }
 
-  protected override def newReaderIterator(
-      stream: DataInputStream,
-      writerThread: WriterThread,
-      startTime: Long,
-      env: SparkEnv,
-      worker: Socket,
-      releasedOrClosed: AtomicBoolean,
-      context: TaskContext): Iterator[ColumnarBatch] = {
-    new ReaderIterator(stream, writerThread, startTime, env, worker, releasedOrClosed, context) {
-
-      private val allocator = ArrowUtils.rootAllocator.newChildAllocator(
-        s"stdin reader for $pythonExec", 0, Long.MaxValue)
-
-      private var reader: ArrowStreamReader = _
-      private var root: VectorSchemaRoot = _
-      private var schema: StructType = _
-      private var vectors: Array[ColumnVector] = _
-
-      context.addTaskCompletionListener[Unit] { _ =>
-        if (reader != null) {
-          reader.close(false)
-        }
-        allocator.close()
-      }
-
-      private var batchLoaded = true
-
-      protected override def read(): ColumnarBatch = {
-        if (writerThread.exception.isDefined) {
-          throw writerThread.exception.get
-        }
-        try {
-          if (reader != null && batchLoaded) {
-            batchLoaded = reader.loadNextBatch()
-            if (batchLoaded) {
-              val batch = new ColumnarBatch(vectors)
-              batch.setNumRows(root.getRowCount)
-              batch
-            } else {
-              reader.close(false)
-              allocator.close()
-              // Reach end of stream. Call `read()` again to read control data.
-              read()
-            }
-          } else {
-            stream.readInt() match {
-              case SpecialLengths.START_ARROW_STREAM =>
-                reader = new ArrowStreamReader(stream, allocator)
-                root = reader.getVectorSchemaRoot()
-                schema = ArrowUtils.fromArrowSchema(root.getSchema())
-                vectors = root.getFieldVectors().asScala.map { vector =>
-                  new ArrowColumnVector(vector)
-                }.toArray[ColumnVector]
-                read()
-              case SpecialLengths.TIMING_DATA =>
-                handleTimingData()
-                read()
-              case SpecialLengths.PYTHON_EXCEPTION_THROWN =>
-                throw handlePythonException()
-              case SpecialLengths.END_OF_DATA_SECTION =>
-                handleEndOfDataSection()
-                null
-            }
-          }
-        } catch handleException
-      }
-    }
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
new file mode 100644
index 0000000000000..25ce16db264ac
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import java.io.DataOutputStream
+import java.net.Socket
+
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
+
+import org.apache.spark.{SparkEnv, TaskContext}
+import org.apache.spark.api.python.{BasePythonRunner, ChainedPythonFunctions, PythonRDD}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.arrow.ArrowWriter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.Utils
+
+
+/**
+ * Python UDF Runner for cogrouped udfs. It sends Arrow bathes from two different DataFrames,
+ * groups them in Python, and receive it back in JVM as batches of single DataFrame.
+ */
+class CoGroupedArrowPythonRunner(
+    funcs: Seq[ChainedPythonFunctions],
+    evalType: Int,
+    argOffsets: Array[Array[Int]],
+    leftSchema: StructType,
+    rightSchema: StructType,
+    timeZoneId: String,
+    conf: Map[String, String])
+  extends BasePythonRunner[
+    (Iterator[InternalRow], Iterator[InternalRow]), ColumnarBatch](funcs, evalType, argOffsets)
+  with PythonArrowOutput {
+
+  protected def newWriterThread(
+      env: SparkEnv,
+      worker: Socket,
+      inputIterator: Iterator[(Iterator[InternalRow], Iterator[InternalRow])],
+      partitionIndex: Int,
+      context: TaskContext): WriterThread = {
+
+    new WriterThread(env, worker, inputIterator, partitionIndex, context) {
+
+      protected override def writeCommand(dataOut: DataOutputStream): Unit = {
+
+        // Write config for the worker as a number of key -> value pairs of strings
+        dataOut.writeInt(conf.size)
+        for ((k, v) <- conf) {
+          PythonRDD.writeUTF(k, dataOut)
+          PythonRDD.writeUTF(v, dataOut)
+        }
+
+        PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets)
+      }
+
+      protected override def writeIteratorToStream(dataOut: DataOutputStream): Unit = {
+        // For each we first send the number of dataframes in each group then send
+        // first df, then send second df.  End of data is marked by sending 0.
+        while (inputIterator.hasNext) {
+          dataOut.writeInt(2)
+          val (nextLeft, nextRight) = inputIterator.next()
+          writeGroup(nextLeft, leftSchema, dataOut, "left")
+          writeGroup(nextRight, rightSchema, dataOut, "right")
+        }
+        dataOut.writeInt(0)
+      }
+
+      private def writeGroup(
+          group: Iterator[InternalRow],
+          schema: StructType,
+          dataOut: DataOutputStream,
+          name: String): Unit = {
+        val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
+        val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+          s"stdout writer for $pythonExec ($name)", 0, Long.MaxValue)
+        val root = VectorSchemaRoot.create(arrowSchema, allocator)
+
+        Utils.tryWithSafeFinally {
+          val writer = new ArrowStreamWriter(root, null, dataOut)
+          val arrowWriter = ArrowWriter.create(root)
+          writer.start()
+
+          while (group.hasNext) {
+            arrowWriter.write(group.next())
+          }
+          arrowWriter.finish()
+          writer.writeBatch()
+          writer.end()
+        }{
+          root.close()
+          allocator.close()
+        }
+      }
+    }
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInPandasExec.scala
new file mode 100644
index 0000000000000..b079405bdc2f8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInPandasExec.scala
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
+import org.apache.spark.sql.execution.{BinaryExecNode, CoGroupedIterator, SparkPlan}
+import org.apache.spark.sql.execution.python.PandasGroupUtils._
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
+
+
+/**
+ * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapCoGroupsInPandas]]
+ *
+ * The input dataframes are first Cogrouped.  Rows from each side of the cogroup are passed to the
+ * Python worker via Arrow.  As each side of the cogroup may have a different schema we send every
+ * group in its own Arrow stream.
+ * The Python worker turns the resulting record batches to `pandas.DataFrame`s, invokes the
+ * user-defined function, and passes the resulting `pandas.DataFrame`
+ * as an Arrow record batch. Finally, each record batch is turned to
+ * Iterator[InternalRow] using ColumnarBatch.
+ *
+ * Note on memory usage:
+ * Both the Python worker and the Java executor need to have enough memory to
+ * hold the largest cogroup. The memory on the Java side is used to construct the
+ * record batches (off heap memory). The memory on the Python side is used for
+ * holding the `pandas.DataFrame`. It's possible to further split one group into
+ * multiple record batches to reduce the memory footprint on the Java side, this
+ * is left as future work.
+ */
+case class FlatMapCoGroupsInPandasExec(
+    leftGroup: Seq[Attribute],
+    rightGroup: Seq[Attribute],
+    func: Expression,
+    output: Seq[Attribute],
+    left: SparkPlan,
+    right: SparkPlan)
+  extends SparkPlan with BinaryExecNode {
+
+  private val sessionLocalTimeZone = conf.sessionLocalTimeZone
+  private val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
+  private val pandasFunction = func.asInstanceOf[PythonUDF].func
+  private val chainedFunc = Seq(ChainedPythonFunctions(Seq(pandasFunction)))
+
+  override def producedAttributes: AttributeSet = AttributeSet(output)
+
+  override def outputPartitioning: Partitioning = left.outputPartitioning
+
+  override def requiredChildDistribution: Seq[Distribution] = {
+    val leftDist = if (leftGroup.isEmpty) AllTuples else ClusteredDistribution(leftGroup)
+    val rightDist = if (rightGroup.isEmpty) AllTuples else ClusteredDistribution(rightGroup)
+    leftDist :: rightDist :: Nil
+  }
+
+  override def requiredChildOrdering: Seq[Seq[SortOrder]] = {
+    leftGroup
+      .map(SortOrder(_, Ascending)) :: rightGroup.map(SortOrder(_, Ascending)) :: Nil
+  }
+
+  override protected def doExecute(): RDD[InternalRow] = {
+
+    val (leftDedup, leftArgOffsets) = resolveArgOffsets(left, leftGroup)
+    val (rightDedup, rightArgOffsets) = resolveArgOffsets(right, rightGroup)
+
+    // Map cogrouped rows to ArrowPythonRunner results, Only execute if partition is not empty
+    left.execute().zipPartitions(right.execute())  { (leftData, rightData) =>
+      if (leftData.isEmpty && rightData.isEmpty) Iterator.empty else {
+
+        val leftGrouped = groupAndProject(leftData, leftGroup, left.output, leftDedup)
+        val rightGrouped = groupAndProject(rightData, rightGroup, right.output, rightDedup)
+        val data = new CoGroupedIterator(leftGrouped, rightGrouped, leftGroup)
+          .map { case (_, l, r) => (l, r) }
+
+        val runner = new CoGroupedArrowPythonRunner(
+          chainedFunc,
+          PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+          Array(leftArgOffsets ++ rightArgOffsets),
+          StructType.fromAttributes(leftDedup),
+          StructType.fromAttributes(rightDedup),
+          sessionLocalTimeZone,
+          pythonRunnerConf)
+
+        executePython(data, output, runner)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
index 267698d1bca50..5032bc81327b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
@@ -17,19 +17,16 @@
 
 package org.apache.spark.sql.execution.python
 
-import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
-
-import org.apache.spark.TaskContext
 import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
-import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.python.PandasGroupUtils._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.ArrowUtils
-import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch}
+
 
 /**
  * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsInPandas]]
@@ -53,14 +50,17 @@ case class FlatMapGroupsInPandasExec(
     func: Expression,
     output: Seq[Attribute],
     child: SparkPlan)
-  extends UnaryExecNode {
+  extends SparkPlan with UnaryExecNode {
 
+  private val sessionLocalTimeZone = conf.sessionLocalTimeZone
+  private val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
   private val pandasFunction = func.asInstanceOf[PythonUDF].func
-
-  override def outputPartitioning: Partitioning = child.outputPartitioning
+  private val chainedFunc = Seq(ChainedPythonFunctions(Seq(pandasFunction)))
 
   override def producedAttributes: AttributeSet = AttributeSet(output)
 
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
   override def requiredChildDistribution: Seq[Distribution] = {
     if (groupingAttributes.isEmpty) {
       AllTuples :: Nil
@@ -75,88 +75,23 @@ case class FlatMapGroupsInPandasExec(
   override protected def doExecute(): RDD[InternalRow] = {
     val inputRDD = child.execute()
 
-    val chainedFunc = Seq(ChainedPythonFunctions(Seq(pandasFunction)))
-    val sessionLocalTimeZone = conf.sessionLocalTimeZone
-    val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
-
-    // Deduplicate the grouping attributes.
-    // If a grouping attribute also appears in data attributes, then we don't need to send the
-    // grouping attribute to Python worker. If a grouping attribute is not in data attributes,
-    // then we need to send this grouping attribute to python worker.
-    //
-    // We use argOffsets to distinguish grouping attributes and data attributes as following:
-    //
-    // argOffsets[0] is the length of grouping attributes
-    // argOffsets[1 .. argOffsets[0]+1] is the arg offsets for grouping attributes
-    // argOffsets[argOffsets[0]+1 .. ] is the arg offsets for data attributes
-
-    val dataAttributes = child.output.drop(groupingAttributes.length)
-    val groupingIndicesInData = groupingAttributes.map { attribute =>
-      dataAttributes.indexWhere(attribute.semanticEquals)
-    }
-
-    val groupingArgOffsets = new ArrayBuffer[Int]
-    val nonDupGroupingAttributes = new ArrayBuffer[Attribute]
-    val nonDupGroupingSize = groupingIndicesInData.count(_ == -1)
-
-    // Non duplicate grouping attributes are added to nonDupGroupingAttributes and
-    // their offsets are 0, 1, 2 ...
-    // Duplicate grouping attributes are NOT added to nonDupGroupingAttributes and
-    // their offsets are n + index, where n is the total number of non duplicate grouping
-    // attributes and index is the index in the data attributes that the grouping attribute
-    // is a duplicate of.
-
-    groupingAttributes.zip(groupingIndicesInData).foreach {
-      case (attribute, index) =>
-        if (index == -1) {
-          groupingArgOffsets += nonDupGroupingAttributes.length
-          nonDupGroupingAttributes += attribute
-        } else {
-          groupingArgOffsets += index + nonDupGroupingSize
-        }
-    }
-
-    val dataArgOffsets = nonDupGroupingAttributes.length until
-      (nonDupGroupingAttributes.length + dataAttributes.length)
-
-    val argOffsets = Array(Array(groupingAttributes.length) ++ groupingArgOffsets ++ dataArgOffsets)
-
-    // Attributes after deduplication
-    val dedupAttributes = nonDupGroupingAttributes ++ dataAttributes
-    val dedupSchema = StructType.fromAttributes(dedupAttributes)
+    val (dedupAttributes, argOffsets) = resolveArgOffsets(child, groupingAttributes)
 
     // Map grouped rows to ArrowPythonRunner results, Only execute if partition is not empty
     inputRDD.mapPartitionsInternal { iter => if (iter.isEmpty) iter else {
-      val grouped = if (groupingAttributes.isEmpty) {
-        Iterator(iter)
-      } else {
-        val groupedIter = GroupedIterator(iter, groupingAttributes, child.output)
-        val dedupProj = UnsafeProjection.create(dedupAttributes, child.output)
-        groupedIter.map {
-          case (_, groupedRowIter) => groupedRowIter.map(dedupProj)
-        }
-      }
 
-      val context = TaskContext.get()
+      val data = groupAndProject(iter, groupingAttributes, child.output, dedupAttributes)
+        .map { case (_, x) => x }
 
-      val columnarBatchIter = new ArrowPythonRunner(
+      val runner = new ArrowPythonRunner(
         chainedFunc,
         PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
-        argOffsets,
-        dedupSchema,
+        Array(argOffsets),
+        StructType.fromAttributes(dedupAttributes),
         sessionLocalTimeZone,
-        pythonRunnerConf).compute(grouped, context.partitionId(), context)
-
-      val unsafeProj = UnsafeProjection.create(output, output)
+        pythonRunnerConf)
 
-      columnarBatchIter.flatMap { batch =>
-        // Grouped Map UDF returns a StructType column in ColumnarBatch, select the children here
-        val structVector = batch.column(0).asInstanceOf[ArrowColumnVector]
-        val outputVectors = output.indices.map(structVector.getChild)
-        val flattenedBatch = new ColumnarBatch(outputVectors.toArray)
-        flattenedBatch.setNumRows(batch.numRows())
-        flattenedBatch.rowIterator.asScala
-      }.map(unsafeProj)
+      executePython(data, output, runner)
     }}
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PandasGroupUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PandasGroupUtils.scala
new file mode 100644
index 0000000000000..68ce991a8ae7f
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PandasGroupUtils.scala
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.TaskContext
+import org.apache.spark.api.python.BasePythonRunner
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
+import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan}
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch}
+
+/**
+ * Base functionality for plans which execute grouped python udfs.
+ */
+private[python] object PandasGroupUtils {
+  /**
+   * passes the data to the python runner and coverts the resulting
+   * columnarbatch into internal rows.
+   */
+  def executePython[T](
+      data: Iterator[T],
+      output: Seq[Attribute],
+      runner: BasePythonRunner[T, ColumnarBatch]): Iterator[InternalRow] = {
+
+    val context = TaskContext.get()
+    val columnarBatchIter = runner.compute(data, context.partitionId(), context)
+    val unsafeProj = UnsafeProjection.create(output, output)
+
+    columnarBatchIter.flatMap { batch =>
+      //  UDF returns a StructType column in ColumnarBatch, select the children here
+      val structVector = batch.column(0).asInstanceOf[ArrowColumnVector]
+      val outputVectors = output.indices.map(structVector.getChild)
+      val flattenedBatch = new ColumnarBatch(outputVectors.toArray)
+      flattenedBatch.setNumRows(batch.numRows())
+      flattenedBatch.rowIterator.asScala
+    }.map(unsafeProj)
+  }
+
+  /**
+   * groups according to grouping attributes and then projects into the deduplicated schema
+   */
+  def groupAndProject(
+      input: Iterator[InternalRow],
+      groupingAttributes: Seq[Attribute],
+      inputSchema: Seq[Attribute],
+      dedupSchema: Seq[Attribute]): Iterator[(InternalRow, Iterator[InternalRow])] = {
+    val groupedIter = GroupedIterator(input, groupingAttributes, inputSchema)
+    val dedupProj = UnsafeProjection.create(dedupSchema, inputSchema)
+    groupedIter.map {
+      case (k, groupedRowIter) => (k, groupedRowIter.map(dedupProj))
+    }
+  }
+
+  /**
+   * Returns a the deduplicated attributes of the spark plan and the arg offsets of the
+   * keys and values.
+   *
+   * The deduplicated attributes are needed because the spark plan may contain an attribute
+   * twice; once in the key and once in the value.  For any such attribute we need to
+   * deduplicate.
+   *
+   * The arg offsets are used to distinguish grouping grouping attributes and data attributes
+   * as following:
+   *
+   * argOffsets[0] is the length of the argOffsets array
+   *
+   * argOffsets[1] is the length of grouping attribute
+   * argOffsets[2 .. argOffsets[0]+2] is the arg offsets for grouping attributes
+   *
+   * argOffsets[argOffsets[0]+2 .. ] is the arg offsets for data attributes
+   */
+  def resolveArgOffsets(
+    child: SparkPlan, groupingAttributes: Seq[Attribute]): (Seq[Attribute], Array[Int]) = {
+
+    val dataAttributes = child.output.drop(groupingAttributes.length)
+    val groupingIndicesInData = groupingAttributes.map { attribute =>
+      dataAttributes.indexWhere(attribute.semanticEquals)
+    }
+
+    val groupingArgOffsets = new ArrayBuffer[Int]
+    val nonDupGroupingAttributes = new ArrayBuffer[Attribute]
+    val nonDupGroupingSize = groupingIndicesInData.count(_ == -1)
+
+    groupingAttributes.zip(groupingIndicesInData).foreach {
+      case (attribute, index) =>
+        if (index == -1) {
+          groupingArgOffsets += nonDupGroupingAttributes.length
+          nonDupGroupingAttributes += attribute
+        } else {
+          groupingArgOffsets += index + nonDupGroupingSize
+        }
+    }
+
+    val dataArgOffsets = nonDupGroupingAttributes.length until
+      (nonDupGroupingAttributes.length + dataAttributes.length)
+
+    val argOffsetsLength = groupingAttributes.length + dataArgOffsets.length + 1
+    val argOffsets = Array(argOffsetsLength,
+          groupingAttributes.length) ++ groupingArgOffsets ++ dataArgOffsets
+
+    // Attributes after deduplication
+    val dedupAttributes = nonDupGroupingAttributes ++ dataAttributes
+    (dedupAttributes, argOffsets)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala
new file mode 100644
index 0000000000000..bb353062384a0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.python
+
+import java.io.DataInputStream
+import java.net.Socket
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.collection.JavaConverters._
+
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowStreamReader
+
+import org.apache.spark.{SparkEnv, TaskContext}
+import org.apache.spark.api.python.{BasePythonRunner, SpecialLengths}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
+
+/**
+ * A trait that can be mixed-in with [[BasePythonRunner]]. It implements the logic from
+ * Python (Arrow) to JVM (ColumnarBatch).
+ */
+private[python] trait PythonArrowOutput { self: BasePythonRunner[_, ColumnarBatch] =>
+
+  protected def newReaderIterator(
+      stream: DataInputStream,
+      writerThread: WriterThread,
+      startTime: Long,
+      env: SparkEnv,
+      worker: Socket,
+      releasedOrClosed: AtomicBoolean,
+      context: TaskContext): Iterator[ColumnarBatch] = {
+
+    new ReaderIterator(stream, writerThread, startTime, env, worker, releasedOrClosed, context) {
+
+      private val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+        s"stdin reader for $pythonExec", 0, Long.MaxValue)
+
+      private var reader: ArrowStreamReader = _
+      private var root: VectorSchemaRoot = _
+      private var schema: StructType = _
+      private var vectors: Array[ColumnVector] = _
+
+      context.addTaskCompletionListener[Unit] { _ =>
+        if (reader != null) {
+          reader.close(false)
+        }
+        allocator.close()
+      }
+
+      private var batchLoaded = true
+
+      protected override def read(): ColumnarBatch = {
+        if (writerThread.exception.isDefined) {
+          throw writerThread.exception.get
+        }
+        try {
+          if (reader != null && batchLoaded) {
+            batchLoaded = reader.loadNextBatch()
+            if (batchLoaded) {
+              val batch = new ColumnarBatch(vectors)
+              batch.setNumRows(root.getRowCount)
+              batch
+            } else {
+              reader.close(false)
+              allocator.close()
+              // Reach end of stream. Call `read()` again to read control data.
+              read()
+            }
+          } else {
+            stream.readInt() match {
+              case SpecialLengths.START_ARROW_STREAM =>
+                reader = new ArrowStreamReader(stream, allocator)
+                root = reader.getVectorSchemaRoot()
+                schema = ArrowUtils.fromArrowSchema(root.getSchema())
+                vectors = root.getFieldVectors().asScala.map { vector =>
+                  new ArrowColumnVector(vector)
+                }.toArray[ColumnVector]
+                read()
+              case SpecialLengths.TIMING_DATA =>
+                handleTimingData()
+                read()
+              case SpecialLengths.PYTHON_EXCEPTION_THROWN =>
+                throw handlePythonException()
+              case SpecialLengths.END_OF_DATA_SECTION =>
+                handleEndOfDataSection()
+                null
+            }
+          }
+        } catch handleException
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
index cad89dedb8b07..f54c4b8f22066 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
@@ -304,7 +304,7 @@ case class WindowInPandasExec(
         var nextRow: UnsafeRow = null
         var nextGroup: UnsafeRow = null
         var nextRowAvailable: Boolean = false
-        private[this] def fetchNextRow() {
+        private[this] def fetchNextRow(): Unit = {
           nextRowAvailable = stream.hasNext
           if (nextRowAvailable) {
             nextRow = stream.next().asInstanceOf[UnsafeRow]
@@ -325,7 +325,7 @@ case class WindowInPandasExec(
 
         val frames = factories.map(_(indexRow))
 
-        private[this] def fetchNextPartition() {
+        private[this] def fetchNextPartition(): Unit = {
           // Collect all the rows in the current partition.
           // Before we start to fetch new input rows, make a copy of nextGroup.
           val currentGroup = nextGroup.copy()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ContinuousRecordEndpoint.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ContinuousRecordEndpoint.scala
index c9c2ebc875f28..7eb9768e483dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ContinuousRecordEndpoint.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ContinuousRecordEndpoint.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.streaming
 import org.apache.spark.SparkEnv
 import org.apache.spark.rpc.{RpcCallContext, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.reader.streaming.PartitionOffset
+import org.apache.spark.sql.connector.read.streaming.PartitionOffset
 
 case class ContinuousRecordPartitionOffset(partitionId: Int, offset: Int) extends PartitionOffset
 case class GetRecord(offset: ContinuousRecordPartitionOffset)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 67e26dc1a2dbc..92eef6af2238c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -262,7 +262,7 @@ class FileStreamSource(
     // and the value of the maxFileAge parameter.
   }
 
-  override def stop() {}
+  override def stop(): Unit = {}
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
index 916bd2ddbc818..f6cc8116c6c4c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.io.IOException
 import java.util.UUID
 
 import scala.collection.mutable.ArrayBuffer
@@ -43,6 +44,8 @@ class ManifestFileCommitProtocol(jobId: String, path: String)
   @transient private var fileLog: FileStreamSinkLog = _
   private var batchId: Long = _
 
+  @transient private var pendingCommitFiles: ArrayBuffer[Path] = _
+
   /**
    * Sets up the manifest log output and the batch id for this job.
    * Must be called before any other function.
@@ -54,13 +57,21 @@ class ManifestFileCommitProtocol(jobId: String, path: String)
 
   override def setupJob(jobContext: JobContext): Unit = {
     require(fileLog != null, "setupManifestOptions must be called before this function")
-    // Do nothing
+    pendingCommitFiles = new ArrayBuffer[Path]
   }
 
   override def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit = {
     require(fileLog != null, "setupManifestOptions must be called before this function")
     val fileStatuses = taskCommits.flatMap(_.obj.asInstanceOf[Seq[SinkFileStatus]]).toArray
 
+    // We shouldn't remove the files if they're written to the metadata:
+    // `fileLog.add(batchId, fileStatuses)` could fail AFTER writing files to the metadata
+    // as well as there could be race
+    // so for the safety we clean up the list before calling anything incurs exception.
+    // The case is uncommon and we do best effort instead of guarantee, so the simplicity of
+    // logic here would be OK, and safe for dealing with unexpected situations.
+    pendingCommitFiles.clear()
+
     if (fileLog.add(batchId, fileStatuses)) {
       logInfo(s"Committed batch $batchId")
     } else {
@@ -70,7 +81,29 @@ class ManifestFileCommitProtocol(jobId: String, path: String)
 
   override def abortJob(jobContext: JobContext): Unit = {
     require(fileLog != null, "setupManifestOptions must be called before this function")
-    // Do nothing
+    // Best effort cleanup of complete files from failed job.
+    // Since the file has UUID in its filename, we are safe to try deleting them
+    // as the file will not conflict with file with another attempt on the same task.
+    if (pendingCommitFiles.nonEmpty) {
+      pendingCommitFiles.foreach { path =>
+        try {
+          val fs = path.getFileSystem(jobContext.getConfiguration)
+          // this is to make sure the file can be seen from driver as well
+          if (fs.exists(path)) {
+            fs.delete(path, false)
+          }
+        } catch {
+          case e: IOException =>
+            logWarning(s"Fail to remove temporary file $path, continue removing next.", e)
+        }
+      }
+      pendingCommitFiles.clear()
+    }
+  }
+
+  override def onTaskCommit(taskCommit: TaskCommitMessage): Unit = {
+    pendingCommitFiles ++= taskCommit.obj.asInstanceOf[Seq[SinkFileStatus]]
+      .map(_.toFileStatus.getPath)
   }
 
   override def setupTask(taskContext: TaskAttemptContext): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index e7eb2cb558cdb..5fe1f92e396c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -24,12 +24,12 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.{StreamingDataSourceV2Relation, StreamWriterCommitProgress, WriteToDataSourceV2Exec}
 import org.apache.spark.sql.execution.streaming.sources.{RateControlMicroBatchStream, WriteToMicroBatchDataSource}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.streaming.{OutputMode, Trigger}
 import org.apache.spark.util.Clock
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index b6fa2e9dc3612..357c049aa18fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -22,9 +22,9 @@ import org.json4s.jackson.Serialization
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.RuntimeConfig
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, StreamingAggregationStateManager}
 import org.apache.spark.sql.internal.SQLConf.{FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, _}
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
 
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
index b40426aff0e79..f6543c3e4c4ca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
@@ -24,7 +24,7 @@ import java.nio.charset.StandardCharsets._
 import scala.io.{Source => IOSource}
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2}
 
 /**
  * This class is used to log offsets to persistent files in HDFS.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 6cb75083d0c0b..204c6a4febfd5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -27,10 +27,10 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
+import org.apache.spark.sql.connector.catalog.Table
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, SparkDataStream}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.{MicroBatchScanExec, StreamingDataSourceV2Relation, StreamWriterCommitProgress}
-import org.apache.spark.sql.sources.v2.Table
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, SparkDataStream}
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent
 import org.apache.spark.util.Clock
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala
index 02fed50485b94..84f0961e4af12 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/RateStreamOffset.scala
@@ -20,10 +20,10 @@ package org.apache.spark.sql.execution.streaming
 import org.json4s.DefaultFormats
 import org.json4s.jackson.Serialization
 
-import org.apache.spark.sql.sources.v2
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2}
 
 case class RateStreamOffset(partitionToValueAndRunTimeMs: Map[Int, ValueRunTimeMsPair])
-  extends v2.reader.streaming.Offset {
+  extends OffsetV2 {
   implicit val defaultFormats: DefaultFormats = DefaultFormats
   override val json = Serialization.write(partitionToValueAndRunTimeMs)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
index 190325fb7ec25..36c7796ec4399 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.streaming
 import java.util
 
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.sources.v2.{Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.{Table, TableCapability}
 import org.apache.spark.sql.types.StructType
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index 7f66d0b055cc3..6d51d7dc44171 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
-import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.types.StructType
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 7c1f6ca42c1f2..f470ad3f9c690 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -22,7 +22,7 @@ import java.nio.channels.ClosedByInterruptException
 import java.util.UUID
 import java.util.concurrent.{CountDownLatch, ExecutionException, TimeUnit}
 import java.util.concurrent.atomic.AtomicReference
-import java.util.concurrent.locks.{Condition, ReentrantLock}
+import java.util.concurrent.locks.ReentrantLock
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{Map => MutableMap}
@@ -36,14 +36,14 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
+import org.apache.spark.sql.connector.write.SupportsTruncate
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.{SupportsWrite, Table}
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
-import org.apache.spark.sql.sources.v2.writer.SupportsTruncate
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{Clock, UninterruptibleThread, Utils}
@@ -578,8 +578,10 @@ abstract class StreamExecution(
 
   protected def getBatchDescriptionString: String = {
     val batchDescription = if (currentBatchId < 0) "init" else currentBatchId.toString
-    Option(name).map(_ + "<br/>").getOrElse("") +
-      s"id = $id<br/>runId = $runId<br/>batch = $batchDescription"
+    s"""|${Option(name).getOrElse("")}
+        |id = $id
+        |runId = $runId
+        |batch = $batchDescription""".stripMargin
   }
 
   protected def createStreamingWrite(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
index 8783eaa0e68b3..c6634f51d0382 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamProgress.scala
@@ -19,8 +19,7 @@ package org.apache.spark.sql.execution.streaming
 
 import scala.collection.{immutable, GenTraversableOnce}
 
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
-
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
 
 /**
  * A helper class that looks like a Map[Source, Offset].
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
index 7dd491ede9d05..1b8d69ffb7521 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingQueryListenerBus.scala
@@ -63,7 +63,7 @@ class StreamingQueryListenerBus(sparkListenerBus: LiveListenerBus)
    * are dispatched to Spark listener bus. This method is guaranteed to be called by queries in
    * the same SparkSession as this listener.
    */
-  def post(event: StreamingQueryListener.Event) {
+  def post(event: StreamingQueryListener.Event): Unit = {
     event match {
       case s: QueryStartedEvent =>
         activeQueryRunIds.synchronized { activeQueryRunIds += s.runId }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 142b6e7d18068..5858c54ce554a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -23,10 +23,10 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
+import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.execution.LeafExecNode
 import org.apache.spark.sql.execution.datasources.DataSource
-import org.apache.spark.sql.sources.v2.{Table, TableProvider}
-import org.apache.spark.sql.sources.v2.reader.streaming.SparkDataStream
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 object StreamingRelation {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
index 76ab1284633b1..b0f8cf9cd1846 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
@@ -63,7 +63,7 @@ case object MinWatermark extends MultipleWatermarkPolicy {
 }
 
 /**
- * Policy to choose the *min* of the operator watermark values as the global watermark value. So the
+ * Policy to choose the *max* of the operator watermark values as the global watermark value. So the
  * global watermark will advance if any of the individual operator watermarks has advanced.
  * In other words, in a streaming query with multiple input streams and watermarks defined on all
  * of them, the global watermark will advance as fast as the fastest input. So if there is watermark
@@ -108,10 +108,9 @@ case class WatermarkTracker(policy: MultipleWatermarkPolicy) extends Logging {
         }
     }
 
-    // Update the global watermark to the minimum of all watermark nodes.
-    // This is the safest option, because only the global watermark is fault-tolerant. Making
-    // it the minimum of all individual watermarks guarantees it will never advance past where
-    // any individual watermark operator would be if it were in a plan by itself.
+    // Update the global watermark accordingly to the chosen policy. To find all available policies
+    // and their semantics, please check the comments of
+    // `org.apache.spark.sql.execution.streaming.MultipleWatermarkPolicy` implementations.
     val chosenGlobalWatermark = policy.chooseGlobalWatermark(operatorToWatermarkMap.values.toSeq)
     if (chosenGlobalWatermark > globalWatermarkMs) {
       logInfo(s"Updating event-time watermark from $globalWatermarkMs to $chosenGlobalWatermark ms")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
index 9ae39c79c5156..20eb7ae5a6d96 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/console.scala
@@ -22,11 +22,11 @@ import java.util
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql._
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.write.{SupportsTruncate, WriteBuilder}
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.streaming.sources.ConsoleWrite
 import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister}
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.writer.{SupportsTruncate, WriteBuilder}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDD.scala
index b68f67e0b22d9..5ee27c71aa731 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousDataSourceRDD.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.streaming.continuous
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousPartitionReaderFactory
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.ContinuousPartitionReaderFactory
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.NextIterator
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index f6d156ded7663..8c7371e75b539 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -28,12 +28,11 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.{CurrentDate, CurrentTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, TableCapability}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, Offset => OffsetV2, PartitionOffset}
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming.{StreamingRelationV2, _}
-import org.apache.spark.sql.sources.v2
-import org.apache.spark.sql.sources.v2.{SupportsRead, SupportsWrite, TableCapability}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
 import org.apache.spark.sql.streaming.{OutputMode, Trigger}
 import org.apache.spark.util.Clock
 
@@ -340,7 +339,7 @@ class ContinuousExecution(
         val offset =
           sources(0).deserializeOffset(offsetLog.get(epoch).get.offsets(0).get.json)
         committedOffsets ++= Seq(sources(0) -> offset)
-        sources(0).commit(offset.asInstanceOf[v2.reader.streaming.Offset])
+        sources(0).commit(offset.asInstanceOf[OffsetV2])
       } else {
         return
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala
index 65c5fc63c2f46..dff2fa69e42fd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala
@@ -26,7 +26,7 @@ import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReader, PartitionOffset}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, PartitionOffset}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ThreadUtils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
index e1b7a8fc283d3..e66a1fe48a2e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousRateStreamSource.scala
@@ -22,9 +22,9 @@ import org.json4s.jackson.Serialization
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousPartitionReaderFactory, ContinuousStream, Offset, PartitionOffset}
 import org.apache.spark.sql.execution.streaming.{RateStreamOffset, ValueRunTimeMsPair}
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming._
 
 case class RateStreamPartitionOffset(
    partition: Int, currentValue: Long, currentTimeMs: Long) extends PartitionOffset
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
index 2263b42870a65..aaeb13defd34b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
@@ -32,10 +32,10 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousPartitionReaderFactory, ContinuousStream, Offset, PartitionOffset}
 import org.apache.spark.sql.execution.streaming.{Offset => _, _}
 import org.apache.spark.sql.execution.streaming.sources.TextSocketReader
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.RpcUtils
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
index a08411d746abe..558b4313d6d88 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.streaming.continuous
 import org.apache.spark.{Partition, SparkEnv, TaskContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.writer.DataWriter
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingDataWriterFactory
+import org.apache.spark.sql.connector.write.DataWriter
+import org.apache.spark.sql.connector.write.streaming.StreamingDataWriterFactory
 import org.apache.spark.util.Utils
 
 /**
@@ -86,7 +86,7 @@ class ContinuousWriteRDD(var prev: RDD[InternalRow], writerFactory: StreamingDat
     Iterator()
   }
 
-  override def clearDependencies() {
+  override def clearDependencies(): Unit = {
     super.clearDependencies()
     prev = null
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
index decf524f7167c..dbddab2e9acdd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/EpochCoordinator.scala
@@ -23,9 +23,9 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
-import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, PartitionOffset}
+import org.apache.spark.sql.connector.write.WriterCommitMessage
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.util.RpcUtils
 
 private[continuous] sealed trait EpochCoordinatorMessage extends Serializable
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala
index 54f484c4adae3..cecb2843fc3b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSource.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.streaming.continuous
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 
 /**
  * The logical plan for writing data in a continuous stream.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
index 2f3af6a6544c4..d4e522562e914 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
@@ -24,9 +24,9 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.streaming.StreamExecution
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 
 /**
  * The physical plan for writing data into a continuous processing [[StreamingWrite]].
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index df149552dfb30..911a526428cf4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -31,10 +31,10 @@ import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory, Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream, Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream, Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -237,7 +237,7 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
     lastOffsetCommitted = newOffset
   }
 
-  override def stop() {}
+  override def stop(): Unit = {}
 
   def reset(): Unit = synchronized {
     batches.clear()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
index dbe242784986d..6afb811a4d998 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWrite.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql.execution.streaming.sources
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
+import org.apache.spark.sql.connector.write.WriterCommitMessage
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
index 41eaf84b7f9ea..8f89ccd698763 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ContinuousMemoryStream.scala
@@ -29,9 +29,9 @@ import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.{Encoder, SQLContext}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousPartitionReaderFactory, ContinuousStream, Offset, PartitionOffset}
 import org.apache.spark.sql.execution.streaming.{Offset => _, _}
-import org.apache.spark.sql.sources.v2.reader.InputPartition
-import org.apache.spark.sql.sources.v2.reader.streaming._
 import org.apache.spark.util.RpcUtils
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
index 838c7d497e35b..bae7fa7d07356 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
@@ -26,10 +26,10 @@ import org.apache.spark.sql.{ForeachWriter, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.write.{DataWriter, SupportsTruncate, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.execution.python.PythonForeachWriter
-import org.apache.spark.sql.sources.v2.{SupportsWrite, Table, TableCapability}
-import org.apache.spark.sql.sources.v2.writer.{DataWriter, SupportsTruncate, WriteBuilder, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
index f3951897ea747..5f12832cd2550 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.sql.execution.streaming.sources
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 
 /**
  * A [[BatchWrite]] used to hook V2 stream writers into a microbatch plan. It implements
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
index fd4cb444ce580..53f56edc27681 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/PackedRowWriterFactory.scala
@@ -21,8 +21,8 @@ import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.writer.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingDataWriterFactory
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.StreamingDataWriterFactory
 
 /**
  * A simple [[DataWriterFactory]] whose tasks just pack rows into the commit message for delivery
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchStream.scala
index 6a66f52c8f732..fb46f76682688 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateControlMicroBatchStream.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.streaming.sources
 
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 
 // A special `MicroBatchStream` that can get latestOffset with a start offset.
 trait RateControlMicroBatchStream extends MicroBatchStream {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
index 156ba95ab9733..eb6baf698a5b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
@@ -27,9 +27,9 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.{ManualClock, SystemClock}
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
index f61e9dbecd4ea..3f7b0377f1eab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
@@ -23,11 +23,11 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.execution.streaming.continuous.RateStreamContinuousStream
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
index 25e9af2bc2927..97a6576832515 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
@@ -28,9 +28,9 @@ import scala.collection.mutable.ListBuffer
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.sql.execution.streaming.LongOffset
-import org.apache.spark.sql.sources.v2.reader.{InputPartition, PartitionReader, PartitionReaderFactory}
-import org.apache.spark.sql.sources.v2.reader.streaming.{MicroBatchStream, Offset}
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
index 0f807e235661a..fae3cb765c0c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
@@ -26,11 +26,11 @@ import scala.util.{Failure, Success, Try}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
+import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.execution.streaming.continuous.TextSocketContinuousStream
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.reader.{Scan, ScanBuilder}
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
index a3f58fa966fe8..ef1115e6d9e01 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSource.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.execution.streaming.sources
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.datasources.v2.WriteToDataSourceV2
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 
 /**
  * The logical plan for writing data to a micro-batch stream.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
index de8d00d4ac348..51ab5ce3578af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/memory.scala
@@ -32,10 +32,10 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.write.{DataWriter, DataWriterFactory, SupportsTruncate, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.execution.streaming.Sink
-import org.apache.spark.sql.sources.v2.{SupportsWrite, Table, TableCapability}
-import org.apache.spark.sql.sources.v2.writer._
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 6ee54b948a7d4..05c651f9951b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -43,7 +43,7 @@ import org.apache.spark.util.{SizeEstimator, Utils}
 
 /**
  * An implementation of [[StateStoreProvider]] and [[StateStore]] in which all the data is backed
- * by files in a HDFS-compatible file system. All updates to the store has to be done in sets
+ * by files in an HDFS-compatible file system. All updates to the store has to be done in sets
  * transactionally, and each set of updates increments the store's version. These versions can
  * be used to re-execute the updates (by retries in RDD operations) on the correct version of
  * the store, and regenerate the store version.
@@ -79,7 +79,7 @@ private[state] class HDFSBackedStateStoreProvider extends StateStoreProvider wit
   //   java.util.ConcurrentModificationException
   type MapType = java.util.concurrent.ConcurrentHashMap[UnsafeRow, UnsafeRow]
 
-  /** Implementation of [[StateStore]] API which is backed by a HDFS-compatible file system */
+  /** Implementation of [[StateStore]] API which is backed by an HDFS-compatible file system */
   class HDFSBackedStateStore(val version: Long, mapToUpdate: MapType)
     extends StateStore {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index 89f6edda2ef57..d191f3790ffa8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -123,7 +123,7 @@ case class WindowExec(
         var nextRow: UnsafeRow = null
         var nextGroup: UnsafeRow = null
         var nextRowAvailable: Boolean = false
-        private[this] def fetchNextRow() {
+        private[this] def fetchNextRow(): Unit = {
           nextRowAvailable = stream.hasNext
           if (nextRowAvailable) {
             nextRow = stream.next().asInstanceOf[UnsafeRow]
@@ -144,7 +144,7 @@ case class WindowExec(
         val windowFunctionResult = new SpecificInternalRow(expressions.map(_.dataType))
         val frames = factories.map(_(windowFunctionResult))
         val numFrames = frames.length
-        private[this] def fetchNextPartition() {
+        private[this] def fetchNextPartition(): Unit = {
           // Collect all the rows in the current partition.
           // Before we start to fetch new input rows, make a copy of nextGroup.
           val currentGroup = nextGroup.copy()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 6b8127bab1cb4..2ba34647dbca8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -69,6 +69,7 @@ import org.apache.spark.util.Utils
  * @groupname window_funcs Window functions
  * @groupname string_funcs String functions
  * @groupname collection_funcs Collection functions
+ * @groupname partition_transforms Partition transform functions
  * @groupname Ungrouped Support functions for DataFrames
  * @since 1.3.0
  */
@@ -2521,25 +2522,25 @@ object functions {
   }
 
   /**
-   * Overlay the specified portion of `src` with `replaceString`,
-   *  starting from byte position `pos` of `inputString` and proceeding for `len` bytes.
+   * Overlay the specified portion of `src` with `replace`,
+   *  starting from byte position `pos` of `src` and proceeding for `len` bytes.
    *
    * @group string_funcs
    * @since 3.0.0
    */
-  def overlay(src: Column, replaceString: String, pos: Int, len: Int): Column = withExpr {
-    Overlay(src.expr, lit(replaceString).expr, lit(pos).expr, lit(len).expr)
+  def overlay(src: Column, replace: Column, pos: Column, len: Column): Column = withExpr {
+    Overlay(src.expr, replace.expr, pos.expr, len.expr)
   }
 
   /**
-   * Overlay the specified portion of `src` with `replaceString`,
-   *  starting from byte position `pos` of `inputString`.
+   * Overlay the specified portion of `src` with `replace`,
+   *  starting from byte position `pos` of `src`.
    *
    * @group string_funcs
    * @since 3.0.0
    */
-  def overlay(src: Column, replaceString: String, pos: Int): Column = withExpr {
-    new Overlay(src.expr, lit(replaceString).expr, lit(pos).expr)
+  def overlay(src: Column, replace: Column, pos: Column): Column = withExpr {
+    new Overlay(src.expr, replace.expr, pos.expr)
   }
 
   /**
@@ -2634,8 +2635,8 @@ object functions {
    * See [[java.time.format.DateTimeFormatter]] for valid date and time format patterns
    *
    * @param dateExpr A date, timestamp or string. If a string, the data must be in a format that
-   *                 can be cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param format A pattern `dd.MM.yyyy` would return a string like `18.03.1993`
+   *                 can be cast to a timestamp, such as `uuuu-MM-dd` or `uuuu-MM-dd HH:mm:ss.SSSS`
+   * @param format A pattern `dd.MM.uuuu` would return a string like `18.03.1993`
    * @return A string, or null if `dateExpr` was a string that could not be cast to a timestamp
    * @note Use specialized functions like [[year]] whenever possible as they benefit from a
    * specialized implementation.
@@ -3385,6 +3386,177 @@ object functions {
     ArrayExcept(col1.expr, col2.expr)
   }
 
+  private def createLambda(f: Column => Column) = {
+    val x = UnresolvedNamedLambdaVariable(Seq("x"))
+    val function = f(Column(x)).expr
+    LambdaFunction(function, Seq(x))
+  }
+
+  private def createLambda(f: (Column, Column) => Column) = {
+    val x = UnresolvedNamedLambdaVariable(Seq("x"))
+    val y = UnresolvedNamedLambdaVariable(Seq("y"))
+    val function = f(Column(x), Column(y)).expr
+    LambdaFunction(function, Seq(x, y))
+  }
+
+  private def createLambda(f: (Column, Column, Column) => Column) = {
+    val x = UnresolvedNamedLambdaVariable(Seq("x"))
+    val y = UnresolvedNamedLambdaVariable(Seq("y"))
+    val z = UnresolvedNamedLambdaVariable(Seq("z"))
+    val function = f(Column(x), Column(y), Column(z)).expr
+    LambdaFunction(function, Seq(x, y, z))
+  }
+
+  /**
+   * Returns an array of elements after applying a tranformation to each element
+   * in the input array.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def transform(column: Column, f: Column => Column): Column = withExpr {
+    ArrayTransform(column.expr, createLambda(f))
+  }
+
+  /**
+   * Returns an array of elements after applying a tranformation to each element
+   * in the input array.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def transform(column: Column, f: (Column, Column) => Column): Column = withExpr {
+    ArrayTransform(column.expr, createLambda(f))
+  }
+
+  /**
+   * Returns whether a predicate holds for one or more elements in the array.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def exists(column: Column, f: Column => Column): Column = withExpr {
+    ArrayExists(column.expr, createLambda(f))
+  }
+
+  /**
+   * Returns whether a predicate holds for every element in the array.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def forall(column: Column, f: Column => Column): Column = withExpr {
+    ArrayForAll(column.expr, createLambda(f))
+  }
+
+  /**
+   * Returns an array of elements for which a predicate holds in a given array.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def filter(column: Column, f: Column => Column): Column = withExpr {
+    ArrayFilter(column.expr, createLambda(f))
+  }
+
+  /**
+   * Returns an array of elements for which a predicate holds in a given array.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def filter(column: Column, f: (Column, Column) => Column): Column = withExpr {
+    ArrayFilter(column.expr, createLambda(f))
+  }
+
+  /**
+   * Applies a binary operator to an initial state and all elements in the array,
+   * and reduces this to a single state. The final state is converted into the final result
+   * by applying a finish function.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def aggregate(
+      expr: Column,
+      zero: Column,
+      merge: (Column, Column) => Column,
+      finish: Column => Column): Column = withExpr {
+    ArrayAggregate(
+      expr.expr,
+      zero.expr,
+      createLambda(merge),
+      createLambda(finish)
+    )
+  }
+
+  /**
+   * Applies a binary operator to an initial state and all elements in the array,
+   * and reduces this to a single state.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def aggregate(expr: Column, zero: Column, merge: (Column, Column) => Column): Column =
+    aggregate(expr, zero, merge, c => c)
+
+  /**
+   * Merge two given arrays, element-wise, into a signle array using a function.
+   * If one array is shorter, nulls are appended at the end to match the length of the longer
+   * array, before applying the function.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def zip_with(left: Column, right: Column, f: (Column, Column) => Column): Column = withExpr {
+    ZipWith(left.expr, right.expr, createLambda(f))
+  }
+
+  /**
+   * Applies a function to every key-value pair in a map and returns
+   * a map with the results of those applications as the new keys for the pairs.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def transform_keys(expr: Column, f: (Column, Column) => Column): Column = withExpr {
+    TransformKeys(expr.expr, createLambda(f))
+  }
+
+  /**
+   * Applies a function to every key-value pair in a map and returns
+   * a map with the results of those applications as the new values for the pairs.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def transform_values(expr: Column, f: (Column, Column) => Column): Column = withExpr {
+    TransformValues(expr.expr, createLambda(f))
+  }
+
+  /**
+   * Returns a map whose key-value pairs satisfy a predicate.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def map_filter(expr: Column, f: (Column, Column) => Column): Column = withExpr {
+    MapFilter(expr.expr, createLambda(f))
+  }
+
+  /**
+   * Merge two given maps, key-wise into a single map using a function.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
+   */
+  def map_zip_with(
+      left: Column,
+      right: Column,
+      f: (Column, Column, Column) => Column): Column = withExpr {
+    MapZipWith(left.expr, right.expr, createLambda(f))
+  }
+
   /**
    * Creates a new row for each element in the given array or map column.
    * Uses the default column name `col` for elements in the array and
@@ -3942,6 +4114,63 @@ object functions {
    */
   def to_csv(e: Column): Column = to_csv(e, Map.empty[String, String].asJava)
 
+  /**
+   * A transform for timestamps and dates to partition data into years.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def years(e: Column): Column = withExpr { Years(e.expr) }
+
+  /**
+   * A transform for timestamps and dates to partition data into months.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def months(e: Column): Column = withExpr { Months(e.expr) }
+
+  /**
+   * A transform for timestamps and dates to partition data into days.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def days(e: Column): Column = withExpr { Days(e.expr) }
+
+  /**
+   * A transform for timestamps to partition data into hours.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def hours(e: Column): Column = withExpr { Hours(e.expr) }
+
+  /**
+   * A transform for any type that partitions by a hash of the input column.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def bucket(numBuckets: Column, e: Column): Column = withExpr {
+    numBuckets.expr match {
+      case lit @ Literal(_, IntegerType) =>
+        Bucket(lit, e.expr)
+      case _ =>
+        throw new AnalysisException(s"Invalid number of buckets: bucket($numBuckets, $e)")
+    }
+  }
+
+  /**
+   * A transform for any type that partitions by a hash of the input column.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def bucket(numBuckets: Int, e: Column): Column = withExpr {
+    Bucket(Literal(numBuckets), e.expr)
+  }
+
   // scalastyle:off line.size.limit
   // scalastyle:off parameter.number
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index db4885aa01bad..17ef9019b520d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -19,16 +19,17 @@ package org.apache.spark.sql.internal
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql.{ExperimentalMethods, SparkSession, UDFRegistration, _}
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, ResolveSessionCatalog}
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.{ColumnarRule, QueryExecution, SparkOptimizer, SparkPlanner, SparkSqlParser}
 import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.execution.datasources.v2.TableCapabilityCheck
+import org.apache.spark.sql.execution.datasources.v2.{TableCapabilityCheck, V2SessionCatalog}
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.util.ExecutionListenerManager
 
@@ -151,6 +152,10 @@ abstract class BaseSessionStateBuilder(
     catalog
   }
 
+  protected lazy val v2SessionCatalog = new V2SessionCatalog(catalog, conf)
+
+  protected lazy val catalogManager = new CatalogManager(conf, v2SessionCatalog, catalog)
+
   /**
    * Interface exposed to the user for registering user-defined functions.
    *
@@ -164,12 +169,12 @@ abstract class BaseSessionStateBuilder(
    *
    * Note: this depends on the `conf` and `catalog` fields.
    */
-  protected def analyzer: Analyzer = new Analyzer(catalog, conf) {
+  protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
         new FallBackFileSourceV2(session) +:
-        DataSourceResolution(conf, this.catalogManager) +:
+        new ResolveSessionCatalog(catalogManager, conf, catalog.isView) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
@@ -223,7 +228,7 @@ abstract class BaseSessionStateBuilder(
    * Note: this depends on `catalog` and `experimentalMethods` fields.
    */
   protected def optimizer: Optimizer = {
-    new SparkOptimizer(catalog, experimentalMethods) {
+    new SparkOptimizer(catalogManager, catalog, experimentalMethods) {
       override def extendedOperatorOptimizationRules: Seq[Rule[LogicalPlan]] =
         super.extendedOperatorOptimizationRules ++ customOperatorOptimizationRules
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index a83a0f51ecf11..92e255ae8aaa8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -24,12 +24,12 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalog.v2.CatalogManager
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.util.{ExecutionListenerManager, QueryExecutionListener}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 23a84cbd0dc02..4a6516d325ddd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -24,13 +24,13 @@ import scala.collection.JavaConverters._
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.connector.catalog.{SupportsRead, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.execution.streaming.{StreamingRelation, StreamingRelationV2}
 import org.apache.spark.sql.sources.StreamSourceProvider
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 36104d7a70443..74170b1b5d77e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -25,13 +25,13 @@ import org.apache.spark.annotation.Evolving
 import org.apache.spark.api.java.function.VoidFunction2
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources._
-import org.apache.spark.sql.sources.v2.{SupportsWrite, TableProvider}
-import org.apache.spark.sql.sources.v2.TableCapability._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index 916d6a0365965..cc81cf6cfafec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -79,7 +79,7 @@ object StreamingQueryListener {
 
   /**
    * Event representing the start of a query
-   * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
+   * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`.
    * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
    * @param name User-specified name of the query, null if not specified.
    * @since 2.1.0
@@ -101,7 +101,7 @@ object StreamingQueryListener {
   /**
    * Event representing that termination of a query.
    *
-   * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
+   * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`.
    * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
    * @param exception The exception message of the query if the query was terminated
    *                  with an exception. Otherwise, it will be `None`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 976595616bd28..9abe38dfda0be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -31,12 +31,12 @@ import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
 import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.STREAMING_QUERY_LISTENERS
-import org.apache.spark.sql.sources.v2.{SupportsWrite, Table}
 import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index 0b3945cbd1323..e2fea8c9dd6ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -74,7 +74,7 @@ class StateOperatorProgress private[sql](
  * a trigger. Each event relates to processing done for a single trigger of the streaming
  * query. Events are emitted even when no new data is available to be processed.
  *
- * @param id An unique query id that persists across restarts. See `StreamingQuery.id()`.
+ * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`.
  * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
  * @param name User-specified name of the query, null if not specified.
  * @param timestamp Beginning time of the trigger in ISO8601 format, i.e. UTC timestamps.
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameWriterV2Suite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameWriterV2Suite.java
new file mode 100644
index 0000000000000..e418958bef94d
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameWriterV2Suite.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException;
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
+import org.apache.spark.sql.connector.InMemoryTableCatalog;
+import org.apache.spark.sql.test.TestSparkSession;
+import org.apache.spark.sql.types.StructType;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import static org.apache.spark.sql.functions.*;
+
+public class JavaDataFrameWriterV2Suite {
+  private static StructType schema = new StructType().add("s", "string");
+  private SparkSession spark = null;
+
+  public Dataset<Row> df() {
+    return spark.read().schema(schema).text();
+  }
+
+  @Before
+  public void createTestTable() {
+    this.spark = new TestSparkSession();
+    spark.conf().set("spark.sql.catalog.testcat", InMemoryTableCatalog.class.getName());
+    spark.sql("CREATE TABLE testcat.t (s string) USING foo");
+  }
+
+  @After
+  public void dropTestTable() {
+    spark.sql("DROP TABLE testcat.t");
+    spark.stop();
+  }
+
+  @Test
+  public void testAppendAPI() throws NoSuchTableException {
+    df().writeTo("testcat.t").append();
+    df().writeTo("testcat.t").option("property", "value").append();
+  }
+
+  @Test
+  public void testOverwritePartitionsAPI() throws NoSuchTableException {
+    df().writeTo("testcat.t").overwritePartitions();
+    df().writeTo("testcat.t").option("property", "value").overwritePartitions();
+  }
+
+  @Test
+  public void testOverwriteAPI() throws NoSuchTableException {
+    df().writeTo("testcat.t").overwrite(lit(true));
+    df().writeTo("testcat.t").option("property", "value").overwrite(lit(true));
+  }
+
+  @Test
+  public void testCreateAPI() throws TableAlreadyExistsException {
+    df().writeTo("testcat.t2").create();
+    spark.sql("DROP TABLE testcat.t2");
+
+    df().writeTo("testcat.t2").option("property", "value").create();
+    spark.sql("DROP TABLE testcat.t2");
+
+    df().writeTo("testcat.t2").tableProperty("property", "value").create();
+    spark.sql("DROP TABLE testcat.t2");
+
+    df().writeTo("testcat.t2").using("v2format").create();
+    spark.sql("DROP TABLE testcat.t2");
+
+    df().writeTo("testcat.t2").partitionedBy(col("s")).create();
+    spark.sql("DROP TABLE testcat.t2");
+  }
+
+  @Test
+  public void testReplaceAPI() throws CannotReplaceMissingTableException {
+    df().writeTo("testcat.t").replace();
+    df().writeTo("testcat.t").option("property", "value").replace();
+    df().writeTo("testcat.t").tableProperty("property", "value").replace();
+    df().writeTo("testcat.t").using("v2format").replace();
+    df().writeTo("testcat.t").partitionedBy(col("s")).replace();
+  }
+
+  @Test
+  public void testCreateOrReplaceAPI() {
+    df().writeTo("testcat.t").createOrReplace();
+    df().writeTo("testcat.t").option("property", "value").createOrReplace();
+    df().writeTo("testcat.t").tableProperty("property", "value").createOrReplace();
+    df().writeTo("testcat.t").using("v2format").createOrReplace();
+    df().writeTo("testcat.t").partitionedBy(col("s")).createOrReplace();
+  }
+}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaHigherOrderFunctionsSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaHigherOrderFunctionsSuite.java
new file mode 100644
index 0000000000000..e240326bee63a
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaHigherOrderFunctionsSuite.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql;
+
+import java.util.HashMap;
+import java.util.List;
+
+import static scala.collection.JavaConverters.mapAsScalaMap;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.types.*;
+import static org.apache.spark.sql.types.DataTypes.*;
+import static org.apache.spark.sql.functions.*;
+import org.apache.spark.sql.test.TestSparkSession;
+import static test.org.apache.spark.sql.JavaTestUtils.*;
+
+public class JavaHigherOrderFunctionsSuite {
+    private transient TestSparkSession spark;
+    private Dataset<Row> arrDf;
+    private Dataset<Row> mapDf;
+
+    private void setUpArrDf() {
+        List<Row> data = toRows(
+            makeArray(1, 9, 8, 7),
+            makeArray(5, 8, 9, 7, 2),
+            JavaTestUtils.<Integer>makeArray(),
+            null
+        );
+        StructType schema =  new StructType()
+            .add("x", new ArrayType(IntegerType, true), true);
+        arrDf = spark.createDataFrame(data, schema);
+    }
+
+    private void setUpMapDf() {
+        List<Row> data = toRows(
+            new HashMap<Integer, Integer>() {{
+                put(1, 1);
+                put(2, 2);
+            }},
+            null
+        );
+        StructType schema = new StructType()
+            .add("x", new MapType(IntegerType, IntegerType, true));
+        mapDf = spark.createDataFrame(data, schema);
+    }
+
+    @Before
+    public void setUp() {
+        spark = new TestSparkSession();
+        setUpArrDf();
+        setUpMapDf();
+    }
+
+    @After
+    public void tearDown() {
+        spark.stop();
+        spark = null;
+    }
+
+    @Test
+    public void testTransform() {
+        checkAnswer(
+            arrDf.select(transform(col("x"), x -> x.plus(1))),
+            toRows(
+                makeArray(2, 10, 9, 8),
+                makeArray(6, 9, 10, 8, 3),
+                JavaTestUtils.<Integer>makeArray(),
+                null
+            )
+        );
+        checkAnswer(
+            arrDf.select(transform(col("x"), (x, i) -> x.plus(i))),
+            toRows(
+                makeArray(1, 10, 10, 10),
+                makeArray(5, 9, 11, 10, 6),
+                JavaTestUtils.<Integer>makeArray(),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testFilter() {
+        checkAnswer(
+            arrDf.select(filter(col("x"), x -> x.plus(1).equalTo(10))),
+            toRows(
+                makeArray(9),
+                makeArray(9),
+                JavaTestUtils.<Integer>makeArray(),
+                null
+            )
+        );
+        checkAnswer(
+            arrDf.select(filter(col("x"), (x, i) -> x.plus(i).equalTo(10))),
+            toRows(
+                makeArray(9, 8, 7),
+                makeArray(7),
+                JavaTestUtils.<Integer>makeArray(),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testExists() {
+        checkAnswer(
+            arrDf.select(exists(col("x"), x -> x.plus(1).equalTo(10))),
+            toRows(
+                true,
+                true,
+                false,
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testForall() {
+        checkAnswer(
+            arrDf.select(forall(col("x"), x -> x.plus(1).equalTo(10))),
+            toRows(
+                false,
+                false,
+                true,
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testAggregate() {
+        checkAnswer(
+            arrDf.select(aggregate(col("x"), lit(0), (acc, x) -> acc.plus(x))),
+            toRows(
+                25,
+                31,
+                0,
+                null
+            )
+        );
+        checkAnswer(
+            arrDf.select(aggregate(col("x"), lit(0), (acc, x) -> acc.plus(x), x -> x)),
+            toRows(
+                25,
+                31,
+                0,
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testZipWith() {
+        checkAnswer(
+            arrDf.select(zip_with(col("x"), col("x"), (a, b) -> lit(42))),
+            toRows(
+                makeArray(42, 42, 42, 42),
+                makeArray(42, 42, 42, 42, 42),
+                JavaTestUtils.<Integer>makeArray(),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testTransformKeys() {
+        checkAnswer(
+            mapDf.select(transform_keys(col("x"), (k, v) -> k.plus(v))),
+            toRows(
+                mapAsScalaMap(new HashMap<Integer, Integer>() {{
+                    put(2, 1);
+                    put(4, 2);
+                }}),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testTransformValues() {
+        checkAnswer(
+            mapDf.select(transform_values(col("x"), (k, v) -> k.plus(v))),
+            toRows(
+                mapAsScalaMap(new HashMap<Integer, Integer>() {{
+                    put(1, 2);
+                    put(2, 4);
+                }}),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testMapFilter() {
+        checkAnswer(
+            mapDf.select(map_filter(col("x"), (k, v) -> lit(false))),
+            toRows(
+                mapAsScalaMap(new HashMap<Integer, Integer>()),
+                null
+            )
+        );
+    }
+
+    @Test
+    public void testMapZipWith() {
+        checkAnswer(
+            mapDf.select(map_zip_with(col("x"), col("x"), (k, v1, v2) -> lit(false))),
+            toRows(
+                mapAsScalaMap(new HashMap<Integer, Boolean>() {{
+                    put(1, false);
+                    put(2, false);
+                }}),
+                null
+            )
+        );
+    }
+}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaTestUtils.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaTestUtils.java
new file mode 100644
index 0000000000000..7fc6460e7352c
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaTestUtils.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql;
+
+import java.util.Arrays;
+import java.util.List;
+import static java.util.stream.Collectors.toList;
+
+import scala.collection.mutable.WrappedArray;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+
+public class JavaTestUtils {
+    public static <T> void checkAnswer(Dataset<T> actual, List<Row> expected) {
+        assertEquals(expected, actual.collectAsList());
+    }
+
+    public static List<Row> toRows(Object... objs) {
+        return Arrays.asList(objs)
+            .stream()
+            .map(RowFactory::create)
+            .collect(toList());
+    }
+
+    public static <T> WrappedArray<T> makeArray(T... ts) {
+        return WrappedArray.make(ts);
+    }
+}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2.java
similarity index 96%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2.java
index 255a9f887878b..9386ab51d64f0 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaAdvancedDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2.java
@@ -15,18 +15,18 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import java.io.IOException;
 import java.util.*;
 
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableProvider;
+import org.apache.spark.sql.connector.read.*;
 import org.apache.spark.sql.sources.Filter;
 import org.apache.spark.sql.sources.GreaterThan;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaColumnarDataSourceV2.java
similarity index 90%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaColumnarDataSourceV2.java
index 699859cfaebe1..76da45e182b3c 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaColumnarDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaColumnarDataSourceV2.java
@@ -15,15 +15,18 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import java.io.IOException;
 
 import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableProvider;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.*;
 import org.apache.spark.sql.types.DataTypes;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 import org.apache.spark.sql.vectorized.ColumnVector;
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaPartitionAwareDataSource.java
similarity index 86%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaPartitionAwareDataSource.java
index 391af5a306a16..fbbc457b2945d 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaPartitionAwareDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaPartitionAwareDataSource.java
@@ -15,21 +15,21 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import java.io.IOException;
 import java.util.Arrays;
 
-import org.apache.spark.sql.catalog.v2.expressions.Expressions;
-import org.apache.spark.sql.catalog.v2.expressions.Transform;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.*;
-import org.apache.spark.sql.sources.v2.reader.partitioning.ClusteredDistribution;
-import org.apache.spark.sql.sources.v2.reader.partitioning.Distribution;
-import org.apache.spark.sql.sources.v2.reader.partitioning.Partitioning;
+import org.apache.spark.sql.connector.expressions.Expressions;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableProvider;
+import org.apache.spark.sql.connector.read.*;
+import org.apache.spark.sql.connector.read.partitioning.ClusteredDistribution;
+import org.apache.spark.sql.connector.read.partitioning.Distribution;
+import org.apache.spark.sql.connector.read.partitioning.Partitioning;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 public class JavaPartitionAwareDataSource implements TableProvider {
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaRangeInputPartition.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaRangeInputPartition.java
similarity index 90%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaRangeInputPartition.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaRangeInputPartition.java
index 438f489a3eea7..d612441201e64 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaRangeInputPartition.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaRangeInputPartition.java
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
+import org.apache.spark.sql.connector.read.InputPartition;
 
 class JavaRangeInputPartition implements InputPartition {
   int start;
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaReportStatisticsDataSource.java
similarity index 82%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaReportStatisticsDataSource.java
index f3755e18b58d5..49438fe668d56 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaReportStatisticsDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaReportStatisticsDataSource.java
@@ -15,16 +15,16 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import java.util.OptionalLong;
 
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
-import org.apache.spark.sql.sources.v2.reader.Statistics;
-import org.apache.spark.sql.sources.v2.reader.SupportsReportStatistics;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableProvider;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.ScanBuilder;
+import org.apache.spark.sql.connector.read.Statistics;
+import org.apache.spark.sql.connector.read.SupportsReportStatistics;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 public class JavaReportStatisticsDataSource implements TableProvider {
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSchemaRequiredDataSource.java
similarity index 87%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSchemaRequiredDataSource.java
index 3800a94f88898..2181887ae54e2 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSchemaRequiredDataSource.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSchemaRequiredDataSource.java
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.*;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableProvider;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleBatchTable.java
similarity index 86%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleBatchTable.java
index 64663d5db4bed..97b00477e1764 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleBatchTable.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleBatchTable.java
@@ -15,15 +15,15 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
-import org.apache.spark.sql.sources.v2.SupportsRead;
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableCapability;
+import org.apache.spark.sql.connector.catalog.SupportsRead;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableCapability;
 import org.apache.spark.sql.types.StructType;
 
 abstract class JavaSimpleBatchTable implements Table, SupportsRead {
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleDataSourceV2.java
similarity index 84%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleDataSourceV2.java
index 7474f36c97f75..8b6d71b986ff7 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleDataSourceV2.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleDataSourceV2.java
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
-import org.apache.spark.sql.sources.v2.Table;
-import org.apache.spark.sql.sources.v2.TableProvider;
-import org.apache.spark.sql.sources.v2.reader.*;
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.catalog.TableProvider;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 public class JavaSimpleDataSourceV2 implements TableProvider {
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReaderFactory.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleReaderFactory.java
similarity index 86%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReaderFactory.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleReaderFactory.java
index 740279033c416..0c702031a939b 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleReaderFactory.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleReaderFactory.java
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
-import org.apache.spark.sql.sources.v2.reader.InputPartition;
-import org.apache.spark.sql.sources.v2.reader.PartitionReader;
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.InputPartition;
+import org.apache.spark.sql.connector.read.PartitionReader;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
 
 class JavaSimpleReaderFactory implements PartitionReaderFactory {
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleScanBuilder.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleScanBuilder.java
similarity index 81%
rename from sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleScanBuilder.java
rename to sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleScanBuilder.java
index 217e66950d146..7cbba00420928 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/sources/v2/JavaSimpleScanBuilder.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaSimpleScanBuilder.java
@@ -15,12 +15,12 @@
  * limitations under the License.
  */
 
-package test.org.apache.spark.sql.sources.v2;
+package test.org.apache.spark.sql.connector;
 
-import org.apache.spark.sql.sources.v2.reader.Batch;
-import org.apache.spark.sql.sources.v2.reader.PartitionReaderFactory;
-import org.apache.spark.sql.sources.v2.reader.Scan;
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder;
+import org.apache.spark.sql.connector.read.Batch;
+import org.apache.spark.sql.connector.read.PartitionReaderFactory;
+import org.apache.spark.sql.connector.read.Scan;
+import org.apache.spark.sql.connector.read.ScanBuilder;
 import org.apache.spark.sql.types.StructType;
 
 abstract class JavaSimpleScanBuilder implements ScanBuilder, Scan, Batch {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql
index f2f4b02c8634b..4a831855a091a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql
@@ -1,5 +1,5 @@
 -- Turns on ANSI mode
-SET spark.sql.parser.ansi.enabled=true;
+SET spark.sql.ansi.enabled=true;
 
 select
   '1' second,
@@ -185,4 +185,4 @@ select date '2012-01-01' + interval (a + 1) day from t;
 select date '2012-01-01' + (a + 1) day from t;
 
 -- Turns off ANSI mode
-SET spark.sql.parser.ansi.enabled=false;
+SET spark.sql.ansi.enabled=false;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/date_part.sql b/sql/core/src/test/resources/sql-tests/inputs/date_part.sql
index cb3d966281009..fd0fb50f71460 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/date_part.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/date_part.sql
@@ -66,3 +66,5 @@ select date_part('secs', c) from t;
 select date_part('not_supported', c) from t;
 
 select date_part(c, c) from t;
+
+select date_part(null, c) from t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
index 35f2be46cd130..a946123f709c7 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
@@ -84,7 +84,7 @@ select 123456789123456789.1234567890 * 1.123456789123456789;
 select 12345678912345.123456789123 / 0.000000012345678;
 
 -- throw an exception instead of returning NULL, according to SQL ANSI 2011
-set spark.sql.decimalOperations.nullOnOverflow=false;
+set spark.sql.ansi.enabled=true;
 
 -- test operations between decimals and constants
 select id, a*10, b/10 from decimals_test order by id;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part1.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part2.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part2.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part3.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part3.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part4.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/aggregates_part4.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part4.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/boolean.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/boolean.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/boolean.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/case.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/case.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
new file mode 100644
index 0000000000000..6725ce45e72a5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql
@@ -0,0 +1,48 @@
+--
+-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
+--
+--
+-- COMMENTS
+-- https://github.com/postgres/postgres/blob/REL_12_BETA3/src/test/regress/sql/comments.sql
+--
+
+SELECT 'trailing' AS first; -- trailing single line
+SELECT /* embedded single line */ 'embedded' AS `second`;
+SELECT /* both embedded and trailing single line */ 'both' AS third; -- trailing single line
+
+SELECT 'before multi-line' AS fourth;
+-- [SPARK-28880] ANSI SQL: Bracketed comments
+/* This is an example of SQL which should not execute:
+ * select 'multi-line';
+ */
+SELECT 'after multi-line' AS fifth;
+
+-- [SPARK-28880] ANSI SQL: Bracketed comments
+--
+-- Nested comments
+--
+
+/*
+SELECT 'trailing' as x1; -- inside block comment
+*/
+
+/* This block comment surrounds a query which itself has a block comment...
+SELECT /* embedded single line */ 'embedded' AS x2;
+*/
+
+SELECT -- continued after the following block comments...
+/* Deeply nested comment.
+   This includes a single apostrophe to make sure we aren't decoding this part as a string.
+SELECT 'deep nest' AS n1;
+/* Second level of nesting...
+SELECT 'deeper nest' as n2;
+/* Third level of nesting...
+SELECT 'deepest nest' as n3;
+*/
+Hoo boy. Still two deep...
+*/
+Now just one deep...
+*/
+'deeply nested example' AS sixth;
+
+/* and this is the end of the file */
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/date.sql
similarity index 95%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/date.sql
index b9a6b998e52fe..d3cd46e4e6b89 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/date.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/date.sql
@@ -208,20 +208,19 @@ SELECT date '5874898-01-01';  -- out of range
 
 SELECT f1 - date '2000-01-01' AS `Days From 2K` FROM DATE_TBL;
 
--- [SPARK-28141] Date type can not accept special values
--- SELECT f1 - date 'epoch' AS "Days From Epoch" FROM DATE_TBL;
+SELECT f1 - date 'epoch' AS `Days From Epoch` FROM DATE_TBL;
 
--- SELECT date 'yesterday' - date 'today' AS "One day";
+SELECT date 'yesterday' - date 'today' AS `One day`;
 
--- SELECT date 'today' - date 'tomorrow' AS "One day";
+SELECT date 'today' - date 'tomorrow' AS `One day`;
 
--- SELECT date 'yesterday' - date 'tomorrow' AS "Two days";
+SELECT date 'yesterday' - date 'tomorrow' AS `Two days`;
 
--- SELECT date 'tomorrow' - date 'today' AS "One day";
+SELECT date 'tomorrow' - date 'today' AS `One day`;
 
--- SELECT date 'today' - date 'yesterday' AS "One day";
+SELECT date 'today' - date 'yesterday' AS `One day`;
 
--- SELECT date 'tomorrow' - date 'yesterday' AS "Two days";
+SELECT date 'tomorrow' - date 'yesterday' AS `Two days`;
 
 -- [SPARK-28017] Enhance date EXTRACT
 --
@@ -290,7 +289,7 @@ SELECT DATE_TRUNC('DECADE', DATE '1993-12-25'); -- 1990-01-01
 SELECT DATE_TRUNC('DECADE', DATE '0004-12-25'); -- 0001-01-01 BC
 SELECT DATE_TRUNC('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G')); -- 0011-01-01 BC
 
--- [SPARK-28141] Date type can not accept special values
+-- [SPARK-29006] Support special date/timestamp values `infinity`/`-infinity`
 --
 -- test infinity
 --
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/float4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float4.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/float4.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float4.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/float8.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float8.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/float8.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/float8.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/int2.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int2.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/int2.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int2.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/int4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
similarity index 96%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/int4.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
index 1012db72e1873..1c2320ff7fad6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/int4.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int4.sql
@@ -33,11 +33,6 @@ INSERT INTO INT4_TBL VALUES ('-2147483647');
 -- INSERT INTO INT4_TBL(f1) VALUES ('123       5');
 -- INSERT INTO INT4_TBL(f1) VALUES ('');
 
--- We cannot test this when failOnOverFlow=true here
--- because exception happens in the executors and the
--- output stacktrace cannot have an exact match
-set spark.sql.arithmeticOperations.failOnOverFlow=false;
-
 SELECT '' AS five, * FROM INT4_TBL;
 
 SELECT '' AS four, i.* FROM INT4_TBL i WHERE i.f1 <> smallint('0');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/int8.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/int8.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/join.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/join.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/join.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/join.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/numeric.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/numeric.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/select.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_distinct.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_distinct.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_distinct.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_distinct.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_having.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_having.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_implicit.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/select_implicit.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/select_implicit.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/strings.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/strings.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/strings.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/strings.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/text.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql
similarity index 97%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/text.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql
index 04d3acc145e95..7abf903bc6bee 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/text.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/text.sql
@@ -45,10 +45,10 @@ select concat_ws('',10,20,null,30);
 select concat_ws(NULL,10,20,null,30) is null;
 select reverse('abcde');
 -- [SPARK-28036] Built-in udf left/right has inconsistent behavior
--- [SPARK-28479] Parser error when enabling ANSI mode
-set spark.sql.parser.ansi.enabled=false;
+-- [SPARK-28479][SPARK-28989] Parser error when enabling ANSI mode
+set spark.sql.ansi.enabled=false;
 select i, left('ahoj', i), right('ahoj', i) from range(-5, 6) t(i) order by i;
-set spark.sql.parser.ansi.enabled=true;
+set spark.sql.ansi.enabled=true;
 -- [SPARK-28037] Add built-in String Functions: quote_literal
 -- select quote_literal('');
 -- select quote_literal('abc''');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/timestamp.sql
similarity index 89%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/timestamp.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/timestamp.sql
index 65e8d3280e07c..260e8ea93d22d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/timestamp.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/timestamp.sql
@@ -7,7 +7,6 @@
 
 CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;
 
--- [SPARK-28141] Timestamp type can not accept special values
 -- Test shorthand input values
 -- We can't just "select" the results since they aren't constants; test for
 -- equality instead.  We can do that by running the test inside a transaction
@@ -17,22 +16,24 @@ CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;
 -- block is entered exactly at local midnight; then 'now' and 'today' have
 -- the same values and the counts will come out different.
 
--- INSERT INTO TIMESTAMP_TBL VALUES ('now');
+INSERT INTO TIMESTAMP_TBL VALUES ('now');
 -- SELECT pg_sleep(0.1);
 
 -- BEGIN;
 
--- INSERT INTO TIMESTAMP_TBL VALUES ('now');
--- INSERT INTO TIMESTAMP_TBL VALUES ('today');
--- INSERT INTO TIMESTAMP_TBL VALUES ('yesterday');
--- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow');
+INSERT INTO TIMESTAMP_TBL VALUES ('now');
+INSERT INTO TIMESTAMP_TBL VALUES ('today');
+INSERT INTO TIMESTAMP_TBL VALUES ('yesterday');
+INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow');
 -- time zone should be ignored by this data type
--- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow EST');
--- INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow zulu');
-
--- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'today';
--- SELECT count(*) AS Three FROM TIMESTAMP_TBL WHERE d1 = timestamp 'tomorrow';
--- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'yesterday';
+INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow EST');
+-- [SPARK-29024] Ignore case while resolving time zones
+INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow Zulu');
+
+SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'today';
+SELECT count(*) AS Three FROM TIMESTAMP_TBL WHERE d1 = timestamp 'tomorrow';
+SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'yesterday';
+-- [SPARK-29025] Support seconds precision by the timestamp type
 -- SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp(2) 'now';
 
 -- COMMIT;
@@ -48,12 +49,12 @@ CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet;
 -- SELECT count(*) AS two FROM TIMESTAMP_TBL WHERE d1 = timestamp(2) 'now';
 -- COMMIT;
 
--- TRUNCATE TIMESTAMP_TBL;
+TRUNCATE TABLE TIMESTAMP_TBL;
 
 -- Special values
 -- INSERT INTO TIMESTAMP_TBL VALUES ('-infinity');
 -- INSERT INTO TIMESTAMP_TBL VALUES ('infinity');
--- INSERT INTO TIMESTAMP_TBL VALUES ('epoch');
+INSERT INTO TIMESTAMP_TBL VALUES ('epoch');
 -- [SPARK-27923] Spark SQL insert there obsolete special values to NULL
 -- Obsolete special values
 -- INSERT INTO TIMESTAMP_TBL VALUES ('invalid');
@@ -174,18 +175,16 @@ SELECT '' AS `16`, d1 FROM TIMESTAMP_TBL
 SELECT '' AS `49`, d1 FROM TIMESTAMP_TBL
    WHERE d1 >= timestamp '1997-01-02';
 
--- [SPARK-28425] Add more Date/Time Operators
--- SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
---    FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
+SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
+   FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01';
 
 SELECT '' AS date_trunc_week, date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc;
 
--- [SPARK-28425] Add more Date/Time Operators
 -- Test casting within a BETWEEN qualifier
--- SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
---   FROM TIMESTAMP_TBL
---   WHERE d1 BETWEEN timestamp '1902-01-01'
---    AND timestamp '2038-01-01';
+SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
+  FROM TIMESTAMP_TBL
+  WHERE d1 BETWEEN timestamp '1902-01-01'
+   AND timestamp '2038-01-01';
 
 SELECT '' AS `54`, d1 as `timestamp`,
     date_part( 'year', d1) AS `year`, date_part( 'month', d1) AS `month`,
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/union.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/union.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/union.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/union.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/with.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/with.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/pgSQL/with.sql
rename to sql/core/src/test/resources/sql-tests/inputs/postgreSQL/with.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
index 22f3eafd6a02d..f06be5fc6aa58 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
@@ -83,7 +83,7 @@ GROUP BY  t1a,
           t3a,
           t3b,
           t3c
-ORDER BY  t1a DESC, t3b DESC;
+ORDER BY  t1a DESC, t3b DESC, t3c ASC;
 
 -- TC 01.03
 SELECT     Count(DISTINCT(t1a))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
index 4f8ca8bfb27c1..bebc18a61894b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
@@ -128,7 +128,7 @@ GROUP  BY t1b,
 HAVING t1d NOT IN (SELECT t2d
                    FROM   t2
                    WHERE  t1d = t2d)
-ORDER BY t1b DESC;
+ORDER BY t1b DESC, t1d ASC;
 
 -- TC 01.05
 SELECT   COUNT(DISTINCT(t1a)),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
similarity index 98%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part1.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
index d829a5c1159fd..24bc25a3fd1c5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
@@ -8,7 +8,7 @@
 -- avoid bit-exact output here because operations may not be bit-exact.
 -- SET extra_float_digits = 0;
 
--- This test file was converted from pgSQL/aggregates_part1.sql.
+-- This test file was converted from postgreSQL/aggregates_part1.sql.
 
 SELECT avg(udf(four)) AS avg_1 FROM onek;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part2.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql
similarity index 98%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part2.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql
index 5636537398a86..a86bb0b47487b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part2.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part2.sql
@@ -5,7 +5,7 @@
 -- AGGREGATES [Part 2]
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L145-L350
 --
--- This test file was converted from pgSQL/aggregates_part2.sql.
+-- This test file was converted from postgreSQL/aggregates_part2.sql.
 
 create temporary view int4_tbl as select * from values
   (0),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part3.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part3.sql
similarity index 99%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part3.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part3.sql
index 1c58620d1c11a..231c5235b313a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part3.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part3.sql
@@ -5,7 +5,7 @@
 -- AGGREGATES [Part 3]
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L352-L605
 
--- This test file was converted from pgSQL/aggregates_part3.sql.
+-- This test file was converted from postgreSQL/aggregates_part3.sql.
 
 -- [SPARK-28865] Table inheritance
 -- try it on an inheritance tree
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part4.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part4.sql
similarity index 99%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part4.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part4.sql
index 7c7777362de8e..8aea00073eee8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-aggregates_part4.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part4.sql
@@ -5,7 +5,7 @@
 -- AGGREGATES [Part 4]
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/aggregates.sql#L607-L997
 
--- This test file was converted from pgSQL/aggregates_part4.sql.
+-- This test file was converted from postgreSQL/aggregates_part4.sql.
 
 -- [SPARK-27980] Ordered-Set Aggregate Functions
 -- ordered-set aggregates
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql
similarity index 99%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql
index 1865ee94ec1f9..8fa3c0a6dfec9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-case.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql
@@ -6,7 +6,7 @@
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/case.sql
 -- Test the CASE statement
 --
--- This test file was converted from pgSQL/case.sql.
+-- This test file was converted from postgreSQL/case.sql.
 
 CREATE TABLE CASE_TBL (
   i integer,
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-join.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-join.sql
similarity index 99%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-join.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-join.sql
index c05aa156a13bf..e6fe1078b0d24 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-join.sql
@@ -6,7 +6,7 @@
 -- Test JOIN clauses
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/join.sql
 --
--- This test file was converted from pgSQL/join.sql.
+-- This test file was converted from postgreSQL/join.sql.
 
 CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
   (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_having.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql
similarity index 96%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_having.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql
index c8e4346cedb89..412d45b49a184 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_having.sql
@@ -5,7 +5,7 @@
 -- SELECT_HAVING
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_having.sql
 --
--- This test file was converted from inputs/pgSQL/select_having.sql
+-- This test file was converted from inputs/postgreSQL/select_having.sql
 -- TODO: We should add UDFs in GROUP BY clause when [SPARK-28386] and [SPARK-26741] is resolved.
 
 -- load test data
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_implicit.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_implicit.sql
similarity index 98%
rename from sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_implicit.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_implicit.sql
index 373896ccd1674..1cbd77c6cf86d 100755
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/pgSQL/udf-select_implicit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-select_implicit.sql
@@ -9,7 +9,7 @@
 -- - thomas 1998-07-09
 -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/select_implicit.sql
 --
--- This test file was converted from pgSQL/select_implicit.sql
+-- This test file was converted from postgreSQL/select_implicit.sql
 
 -- load test data
 CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 13f72614f5778..43ad3c3f539f1 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -3,11 +3,11 @@
 
 
 -- !query 0
-SET spark.sql.parser.ansi.enabled=true
+SET spark.sql.ansi.enabled=true
 -- !query 0 schema
 struct<key:string,value:string>
 -- !query 0 output
-spark.sql.parser.ansi.enabled	true
+spark.sql.ansi.enabled	true
 
 
 -- !query 1
@@ -432,8 +432,8 @@ select date '2012-01-01' + (a + 1) day from t
 
 
 -- !query 34
-SET spark.sql.parser.ansi.enabled=false
+SET spark.sql.ansi.enabled=false
 -- !query 34 schema
 struct<key:string,value:string>
 -- !query 34 output
-spark.sql.parser.ansi.enabled	false
+spark.sql.ansi.enabled	false
diff --git a/sql/core/src/test/resources/sql-tests/results/date_part.sql.out b/sql/core/src/test/resources/sql-tests/results/date_part.sql.out
index c59dfdbd3da34..bbdfe6cc22f5a 100644
--- a/sql/core/src/test/resources/sql-tests/results/date_part.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/date_part.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 51
+-- Number of queries: 52
 
 
 -- !query 0
@@ -357,41 +357,41 @@ struct<date_part('minutes', t.`c`):int>
 -- !query 44
 select date_part('second', c) from t
 -- !query 44 schema
-struct<date_part('second', t.`c`):int>
+struct<date_part('second', t.`c`):decimal(8,6)>
 -- !query 44 output
-9
+9.123456
 
 
 -- !query 45
 select date_part('s', c) from t
 -- !query 45 schema
-struct<date_part('s', t.`c`):int>
+struct<date_part('s', t.`c`):decimal(8,6)>
 -- !query 45 output
-9
+9.123456
 
 
 -- !query 46
 select date_part('sec', c) from t
 -- !query 46 schema
-struct<date_part('sec', t.`c`):int>
+struct<date_part('sec', t.`c`):decimal(8,6)>
 -- !query 46 output
-9
+9.123456
 
 
 -- !query 47
 select date_part('seconds', c) from t
 -- !query 47 schema
-struct<date_part('seconds', t.`c`):int>
+struct<date_part('seconds', t.`c`):decimal(8,6)>
 -- !query 47 output
-9
+9.123456
 
 
 -- !query 48
 select date_part('secs', c) from t
 -- !query 48 schema
-struct<date_part('secs', t.`c`):int>
+struct<date_part('secs', t.`c`):decimal(8,6)>
 -- !query 48 output
-9
+9.123456
 
 
 -- !query 49
@@ -410,3 +410,11 @@ struct<>
 -- !query 50 output
 org.apache.spark.sql.AnalysisException
 The field parameter needs to be a foldable string value.;; line 1 pos 7
+
+
+-- !query 51
+select date_part(null, c) from t
+-- !query 51 schema
+struct<date_part(NULL, t.`c`):double>
+-- !query 51 output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
index 217233bfad378..00e139d90f488 100644
--- a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
@@ -328,11 +328,11 @@ NULL
 
 
 -- !query 39
-set spark.sql.decimalOperations.nullOnOverflow=false
+set spark.sql.ansi.enabled=true
 -- !query 39 schema
 struct<key:string,value:string>
 -- !query 39 output
-spark.sql.decimalOperations.nullOnOverflow	false
+spark.sql.ansi.enabled	true
 
 
 -- !query 40
diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
index e007fa8368ef5..1424cc45fbb42 100644
--- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
@@ -365,41 +365,41 @@ struct<date_part('minutes', `c`):int>
 -- !query 45
 select extract(second from c) from t
 -- !query 45 schema
-struct<date_part('second', `c`):int>
+struct<date_part('second', `c`):decimal(8,6)>
 -- !query 45 output
-9
+9.123456
 
 
 -- !query 46
 select extract(s from c) from t
 -- !query 46 schema
-struct<date_part('s', `c`):int>
+struct<date_part('s', `c`):decimal(8,6)>
 -- !query 46 output
-9
+9.123456
 
 
 -- !query 47
 select extract(sec from c) from t
 -- !query 47 schema
-struct<date_part('sec', `c`):int>
+struct<date_part('sec', `c`):decimal(8,6)>
 -- !query 47 output
-9
+9.123456
 
 
 -- !query 48
 select extract(seconds from c) from t
 -- !query 48 schema
-struct<date_part('seconds', `c`):int>
+struct<date_part('seconds', `c`):decimal(8,6)>
 -- !query 48 output
-9
+9.123456
 
 
 -- !query 49
 select extract(secs from c) from t
 -- !query 49 schema
-struct<date_part('secs', `c`):int>
+struct<date_part('secs', `c`):decimal(8,6)>
 -- !query 49 output
-9
+9.123456
 
 
 -- !query 50
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part1.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part2.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part3.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/aggregates_part4.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/boolean.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/case.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/case.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
new file mode 100644
index 0000000000000..5e8383e794579
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
@@ -0,0 +1,196 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 13
+
+
+-- !query 0
+SELECT 'trailing' AS first
+-- !query 0 schema
+struct<first:string>
+-- !query 0 output
+trailing
+
+
+-- !query 1
+SELECT /* embedded single line */ 'embedded' AS `second`
+-- !query 1 schema
+struct<second:string>
+-- !query 1 output
+embedded
+
+
+-- !query 2
+SELECT /* both embedded and trailing single line */ 'both' AS third
+-- !query 2 schema
+struct<third:string>
+-- !query 2 output
+both
+
+
+-- !query 3
+SELECT 'before multi-line' AS fourth
+-- !query 3 schema
+struct<fourth:string>
+-- !query 3 output
+before multi-line
+
+
+-- !query 4
+/* This is an example of SQL which should not execute:
+ * select 'multi-line'
+-- !query 4 schema
+struct<>
+-- !query 4 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
+
+== SQL ==
+/* This is an example of SQL which should not execute:
+^^^
+ * select 'multi-line'
+
+
+-- !query 5
+*/
+SELECT 'after multi-line' AS fifth
+-- !query 5 schema
+struct<>
+-- !query 5 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
+
+== SQL ==
+*/
+^^^
+SELECT 'after multi-line' AS fifth
+
+
+-- !query 6
+/*
+SELECT 'trailing' as x1
+-- !query 6 schema
+struct<>
+-- !query 6 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
+
+== SQL ==
+/*
+^^^
+SELECT 'trailing' as x1
+
+
+-- !query 7
+*/
+
+/* This block comment surrounds a query which itself has a block comment...
+SELECT /* embedded single line */ 'embedded' AS x2
+-- !query 7 schema
+struct<>
+-- !query 7 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
+
+== SQL ==
+*/
+^^^
+
+/* This block comment surrounds a query which itself has a block comment...
+SELECT /* embedded single line */ 'embedded' AS x2
+
+
+-- !query 8
+*/
+
+SELECT -- continued after the following block comments...
+/* Deeply nested comment.
+   This includes a single apostrophe to make sure we aren't decoding this part as a string.
+SELECT 'deep nest' AS n1
+-- !query 8 schema
+struct<>
+-- !query 8 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
+
+== SQL ==
+*/
+^^^
+
+SELECT -- continued after the following block comments...
+/* Deeply nested comment.
+   This includes a single apostrophe to make sure we aren't decoding this part as a string.
+SELECT 'deep nest' AS n1
+
+
+-- !query 9
+/* Second level of nesting...
+SELECT 'deeper nest' as n2
+-- !query 9 schema
+struct<>
+-- !query 9 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
+
+== SQL ==
+/* Second level of nesting...
+^^^
+SELECT 'deeper nest' as n2
+
+
+-- !query 10
+/* Third level of nesting...
+SELECT 'deepest nest' as n3
+-- !query 10 schema
+struct<>
+-- !query 10 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
+
+== SQL ==
+/* Third level of nesting...
+^^^
+SELECT 'deepest nest' as n3
+
+
+-- !query 11
+*/
+Hoo boy. Still two deep...
+*/
+Now just one deep...
+*/
+'deeply nested example' AS sixth
+-- !query 11 schema
+struct<>
+-- !query 11 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
+
+== SQL ==
+*/
+^^^
+Hoo boy. Still two deep...
+*/
+Now just one deep...
+*/
+'deeply nested example' AS sixth
+
+
+-- !query 12
+/* and this is the end of the file */
+-- !query 12 schema
+struct<>
+-- !query 12 output
+org.apache.spark.sql.catalyst.parser.ParseException
+
+mismatched input '<EOF>' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 37)
+
+== SQL ==
+/* and this is the end of the file */
+-------------------------------------^^^
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
similarity index 93%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
index 083832007d618..29fcf61bd5b78 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 91
+-- Number of queries: 98
 
 
 -- !query 0
@@ -502,352 +502,422 @@ struct<Days From 2K:int>
 
 
 -- !query 47
-SELECT EXTRACT(EPOCH FROM DATE        '1970-01-01')
+SELECT f1 - date 'epoch' AS `Days From Epoch` FROM DATE_TBL
 -- !query 47 schema
-struct<date_part('EPOCH', DATE '1970-01-01'):decimal(20,6)>
+struct<Days From Epoch:int>
 -- !query 47 output
-0
+-4585
+-4650
+11048
+11049
+11050
+24934
+25300
+25667
+9554
+9555
+9556
+9557
+9920
+9921
+9922
 
 
 -- !query 48
-SELECT EXTRACT(EPOCH FROM TIMESTAMP   '1970-01-01')
+SELECT date 'yesterday' - date 'today' AS `One day`
 -- !query 48 schema
-struct<date_part('EPOCH', TIMESTAMP('1970-01-01 00:00:00')):decimal(20,6)>
+struct<One day:int>
 -- !query 48 output
-0
+-1
 
 
 -- !query 49
-SELECT EXTRACT(CENTURY FROM TO_DATE('0101-12-31 BC', 'yyyy-MM-dd G'))
+SELECT date 'today' - date 'tomorrow' AS `One day`
 -- !query 49 schema
-struct<date_part('CENTURY', TO_DATE('0101-12-31 BC', 'yyyy-MM-dd G')):int>
+struct<One day:int>
 -- !query 49 output
--2
+-1
 
 
 -- !query 50
-SELECT EXTRACT(CENTURY FROM TO_DATE('0100-12-31 BC', 'yyyy-MM-dd G'))
+SELECT date 'yesterday' - date 'tomorrow' AS `Two days`
 -- !query 50 schema
-struct<date_part('CENTURY', TO_DATE('0100-12-31 BC', 'yyyy-MM-dd G')):int>
+struct<Two days:int>
 -- !query 50 output
--1
+-2
 
 
 -- !query 51
-SELECT EXTRACT(CENTURY FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G'))
+SELECT date 'tomorrow' - date 'today' AS `One day`
 -- !query 51 schema
-struct<date_part('CENTURY', TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')):int>
+struct<One day:int>
 -- !query 51 output
--1
+1
 
 
 -- !query 52
-SELECT EXTRACT(CENTURY FROM DATE '0001-01-01')
+SELECT date 'today' - date 'yesterday' AS `One day`
 -- !query 52 schema
-struct<date_part('CENTURY', DATE '0001-01-01'):int>
+struct<One day:int>
 -- !query 52 output
 1
 
 
 -- !query 53
-SELECT EXTRACT(CENTURY FROM DATE '0001-01-01 AD')
+SELECT date 'tomorrow' - date 'yesterday' AS `Two days`
 -- !query 53 schema
-struct<date_part('CENTURY', DATE '0001-01-01'):int>
+struct<Two days:int>
 -- !query 53 output
-1
+2
 
 
 -- !query 54
-SELECT EXTRACT(CENTURY FROM DATE '1900-12-31')
+SELECT EXTRACT(EPOCH FROM DATE        '1970-01-01')
 -- !query 54 schema
-struct<date_part('CENTURY', DATE '1900-12-31'):int>
+struct<date_part('EPOCH', DATE '1970-01-01'):decimal(20,6)>
 -- !query 54 output
-19
+0
 
 
 -- !query 55
-SELECT EXTRACT(CENTURY FROM DATE '1901-01-01')
+SELECT EXTRACT(EPOCH FROM TIMESTAMP   '1970-01-01')
 -- !query 55 schema
-struct<date_part('CENTURY', DATE '1901-01-01'):int>
+struct<date_part('EPOCH', TIMESTAMP('1970-01-01 00:00:00')):decimal(20,6)>
 -- !query 55 output
-20
+0
 
 
 -- !query 56
-SELECT EXTRACT(CENTURY FROM DATE '2000-12-31')
+SELECT EXTRACT(CENTURY FROM TO_DATE('0101-12-31 BC', 'yyyy-MM-dd G'))
 -- !query 56 schema
-struct<date_part('CENTURY', DATE '2000-12-31'):int>
+struct<date_part('CENTURY', TO_DATE('0101-12-31 BC', 'yyyy-MM-dd G')):int>
 -- !query 56 output
-20
+-2
 
 
 -- !query 57
-SELECT EXTRACT(CENTURY FROM DATE '2001-01-01')
+SELECT EXTRACT(CENTURY FROM TO_DATE('0100-12-31 BC', 'yyyy-MM-dd G'))
 -- !query 57 schema
-struct<date_part('CENTURY', DATE '2001-01-01'):int>
+struct<date_part('CENTURY', TO_DATE('0100-12-31 BC', 'yyyy-MM-dd G')):int>
 -- !query 57 output
-21
+-1
 
 
 -- !query 58
-SELECT EXTRACT(CENTURY FROM CURRENT_DATE)>=21 AS True
+SELECT EXTRACT(CENTURY FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G'))
 -- !query 58 schema
-struct<True:boolean>
+struct<date_part('CENTURY', TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')):int>
 -- !query 58 output
-true
+-1
 
 
 -- !query 59
-SELECT EXTRACT(MILLENNIUM FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G'))
+SELECT EXTRACT(CENTURY FROM DATE '0001-01-01')
 -- !query 59 schema
-struct<date_part('MILLENNIUM', TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')):int>
+struct<date_part('CENTURY', DATE '0001-01-01'):int>
 -- !query 59 output
--1
+1
 
 
 -- !query 60
-SELECT EXTRACT(MILLENNIUM FROM DATE '0001-01-01 AD')
+SELECT EXTRACT(CENTURY FROM DATE '0001-01-01 AD')
 -- !query 60 schema
-struct<date_part('MILLENNIUM', DATE '0001-01-01'):int>
+struct<date_part('CENTURY', DATE '0001-01-01'):int>
 -- !query 60 output
 1
 
 
 -- !query 61
-SELECT EXTRACT(MILLENNIUM FROM DATE '1000-12-31')
+SELECT EXTRACT(CENTURY FROM DATE '1900-12-31')
 -- !query 61 schema
-struct<date_part('MILLENNIUM', DATE '1000-12-31'):int>
+struct<date_part('CENTURY', DATE '1900-12-31'):int>
 -- !query 61 output
-1
+19
 
 
 -- !query 62
-SELECT EXTRACT(MILLENNIUM FROM DATE '1001-01-01')
+SELECT EXTRACT(CENTURY FROM DATE '1901-01-01')
 -- !query 62 schema
-struct<date_part('MILLENNIUM', DATE '1001-01-01'):int>
+struct<date_part('CENTURY', DATE '1901-01-01'):int>
 -- !query 62 output
-2
+20
 
 
 -- !query 63
-SELECT EXTRACT(MILLENNIUM FROM DATE '2000-12-31')
+SELECT EXTRACT(CENTURY FROM DATE '2000-12-31')
 -- !query 63 schema
-struct<date_part('MILLENNIUM', DATE '2000-12-31'):int>
+struct<date_part('CENTURY', DATE '2000-12-31'):int>
 -- !query 63 output
-2
+20
 
 
 -- !query 64
-SELECT EXTRACT(MILLENNIUM FROM DATE '2001-01-01')
+SELECT EXTRACT(CENTURY FROM DATE '2001-01-01')
 -- !query 64 schema
-struct<date_part('MILLENNIUM', DATE '2001-01-01'):int>
+struct<date_part('CENTURY', DATE '2001-01-01'):int>
 -- !query 64 output
-3
+21
 
 
 -- !query 65
-SELECT EXTRACT(MILLENNIUM FROM CURRENT_DATE)
+SELECT EXTRACT(CENTURY FROM CURRENT_DATE)>=21 AS True
 -- !query 65 schema
-struct<date_part('MILLENNIUM', current_date()):int>
+struct<True:boolean>
 -- !query 65 output
-3
+true
 
 
 -- !query 66
-SELECT EXTRACT(DECADE FROM DATE '1994-12-25')
+SELECT EXTRACT(MILLENNIUM FROM TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G'))
 -- !query 66 schema
-struct<date_part('DECADE', DATE '1994-12-25'):int>
+struct<date_part('MILLENNIUM', TO_DATE('0001-12-31 BC', 'yyyy-MM-dd G')):int>
 -- !query 66 output
-199
+-1
 
 
 -- !query 67
-SELECT EXTRACT(DECADE FROM DATE '0010-01-01')
+SELECT EXTRACT(MILLENNIUM FROM DATE '0001-01-01 AD')
 -- !query 67 schema
-struct<date_part('DECADE', DATE '0010-01-01'):int>
+struct<date_part('MILLENNIUM', DATE '0001-01-01'):int>
 -- !query 67 output
 1
 
 
 -- !query 68
-SELECT EXTRACT(DECADE FROM DATE '0009-12-31')
+SELECT EXTRACT(MILLENNIUM FROM DATE '1000-12-31')
 -- !query 68 schema
-struct<date_part('DECADE', DATE '0009-12-31'):int>
+struct<date_part('MILLENNIUM', DATE '1000-12-31'):int>
 -- !query 68 output
-0
+1
 
 
 -- !query 69
-SELECT EXTRACT(DECADE FROM TO_DATE('0001-01-01 BC', 'yyyy-MM-dd G'))
+SELECT EXTRACT(MILLENNIUM FROM DATE '1001-01-01')
 -- !query 69 schema
-struct<date_part('DECADE', TO_DATE('0001-01-01 BC', 'yyyy-MM-dd G')):int>
+struct<date_part('MILLENNIUM', DATE '1001-01-01'):int>
 -- !query 69 output
-0
+2
 
 
 -- !query 70
-SELECT EXTRACT(DECADE FROM TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G'))
+SELECT EXTRACT(MILLENNIUM FROM DATE '2000-12-31')
 -- !query 70 schema
-struct<date_part('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G')):int>
+struct<date_part('MILLENNIUM', DATE '2000-12-31'):int>
 -- !query 70 output
--1
+2
 
 
 -- !query 71
-SELECT EXTRACT(DECADE FROM TO_DATE('0011-01-01 BC', 'yyyy-MM-dd G'))
+SELECT EXTRACT(MILLENNIUM FROM DATE '2001-01-01')
 -- !query 71 schema
-struct<date_part('DECADE', TO_DATE('0011-01-01 BC', 'yyyy-MM-dd G')):int>
+struct<date_part('MILLENNIUM', DATE '2001-01-01'):int>
 -- !query 71 output
--1
+3
 
 
 -- !query 72
-SELECT EXTRACT(DECADE FROM TO_DATE('0012-12-31 BC', 'yyyy-MM-dd G'))
+SELECT EXTRACT(MILLENNIUM FROM CURRENT_DATE)
 -- !query 72 schema
-struct<date_part('DECADE', TO_DATE('0012-12-31 BC', 'yyyy-MM-dd G')):int>
+struct<date_part('MILLENNIUM', current_date()):int>
 -- !query 72 output
--2
+3
 
 
 -- !query 73
-SELECT EXTRACT(CENTURY FROM NOW())>=21 AS True
+SELECT EXTRACT(DECADE FROM DATE '1994-12-25')
 -- !query 73 schema
-struct<True:boolean>
+struct<date_part('DECADE', DATE '1994-12-25'):int>
 -- !query 73 output
-true
+199
 
 
 -- !query 74
-SELECT EXTRACT(CENTURY FROM TIMESTAMP '1970-03-20 04:30:00.00000')
+SELECT EXTRACT(DECADE FROM DATE '0010-01-01')
 -- !query 74 schema
-struct<date_part('CENTURY', TIMESTAMP('1970-03-20 04:30:00')):int>
+struct<date_part('DECADE', DATE '0010-01-01'):int>
 -- !query 74 output
-20
+1
 
 
 -- !query 75
-SELECT DATE_TRUNC('MILLENNIUM', TIMESTAMP '1970-03-20 04:30:00.00000')
+SELECT EXTRACT(DECADE FROM DATE '0009-12-31')
 -- !query 75 schema
-struct<date_trunc(MILLENNIUM, TIMESTAMP('1970-03-20 04:30:00')):timestamp>
+struct<date_part('DECADE', DATE '0009-12-31'):int>
 -- !query 75 output
-1001-01-01 00:07:02
+0
 
 
 -- !query 76
-SELECT DATE_TRUNC('MILLENNIUM', DATE '1970-03-20')
+SELECT EXTRACT(DECADE FROM TO_DATE('0001-01-01 BC', 'yyyy-MM-dd G'))
 -- !query 76 schema
-struct<date_trunc(MILLENNIUM, CAST(DATE '1970-03-20' AS TIMESTAMP)):timestamp>
+struct<date_part('DECADE', TO_DATE('0001-01-01 BC', 'yyyy-MM-dd G')):int>
 -- !query 76 output
-1001-01-01 00:07:02
+0
 
 
 -- !query 77
-SELECT DATE_TRUNC('CENTURY', TIMESTAMP '1970-03-20 04:30:00.00000')
+SELECT EXTRACT(DECADE FROM TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G'))
 -- !query 77 schema
-struct<date_trunc(CENTURY, TIMESTAMP('1970-03-20 04:30:00')):timestamp>
+struct<date_part('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G')):int>
 -- !query 77 output
-1901-01-01 00:00:00
+-1
 
 
 -- !query 78
-SELECT DATE_TRUNC('CENTURY', DATE '1970-03-20')
+SELECT EXTRACT(DECADE FROM TO_DATE('0011-01-01 BC', 'yyyy-MM-dd G'))
 -- !query 78 schema
-struct<date_trunc(CENTURY, CAST(DATE '1970-03-20' AS TIMESTAMP)):timestamp>
+struct<date_part('DECADE', TO_DATE('0011-01-01 BC', 'yyyy-MM-dd G')):int>
 -- !query 78 output
-1901-01-01 00:00:00
+-1
 
 
 -- !query 79
-SELECT DATE_TRUNC('CENTURY', DATE '2004-08-10')
+SELECT EXTRACT(DECADE FROM TO_DATE('0012-12-31 BC', 'yyyy-MM-dd G'))
 -- !query 79 schema
-struct<date_trunc(CENTURY, CAST(DATE '2004-08-10' AS TIMESTAMP)):timestamp>
+struct<date_part('DECADE', TO_DATE('0012-12-31 BC', 'yyyy-MM-dd G')):int>
 -- !query 79 output
-2001-01-01 00:00:00
+-2
 
 
 -- !query 80
-SELECT DATE_TRUNC('CENTURY', DATE '0002-02-04')
+SELECT EXTRACT(CENTURY FROM NOW())>=21 AS True
 -- !query 80 schema
-struct<date_trunc(CENTURY, CAST(DATE '0002-02-04' AS TIMESTAMP)):timestamp>
+struct<True:boolean>
 -- !query 80 output
-0001-01-01 00:07:02
+true
 
 
 -- !query 81
-SELECT DATE_TRUNC('CENTURY', TO_DATE('0055-08-10 BC', 'yyyy-MM-dd G'))
+SELECT EXTRACT(CENTURY FROM TIMESTAMP '1970-03-20 04:30:00.00000')
 -- !query 81 schema
-struct<date_trunc(CENTURY, CAST(to_date('0055-08-10 BC', 'yyyy-MM-dd G') AS TIMESTAMP)):timestamp>
+struct<date_part('CENTURY', TIMESTAMP('1970-03-20 04:30:00')):int>
 -- !query 81 output
--0099-01-01 00:07:02
+20
 
 
 -- !query 82
-SELECT DATE_TRUNC('DECADE', DATE '1993-12-25')
+SELECT DATE_TRUNC('MILLENNIUM', TIMESTAMP '1970-03-20 04:30:00.00000')
 -- !query 82 schema
-struct<date_trunc(DECADE, CAST(DATE '1993-12-25' AS TIMESTAMP)):timestamp>
+struct<date_trunc(MILLENNIUM, TIMESTAMP('1970-03-20 04:30:00')):timestamp>
 -- !query 82 output
-1990-01-01 00:00:00
+1001-01-01 00:07:02
 
 
 -- !query 83
-SELECT DATE_TRUNC('DECADE', DATE '0004-12-25')
+SELECT DATE_TRUNC('MILLENNIUM', DATE '1970-03-20')
 -- !query 83 schema
-struct<date_trunc(DECADE, CAST(DATE '0004-12-25' AS TIMESTAMP)):timestamp>
+struct<date_trunc(MILLENNIUM, CAST(DATE '1970-03-20' AS TIMESTAMP)):timestamp>
 -- !query 83 output
-0000-01-01 00:07:02
+1001-01-01 00:07:02
 
 
 -- !query 84
-SELECT DATE_TRUNC('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G'))
+SELECT DATE_TRUNC('CENTURY', TIMESTAMP '1970-03-20 04:30:00.00000')
 -- !query 84 schema
-struct<date_trunc(DECADE, CAST(to_date('0002-12-31 BC', 'yyyy-MM-dd G') AS TIMESTAMP)):timestamp>
+struct<date_trunc(CENTURY, TIMESTAMP('1970-03-20 04:30:00')):timestamp>
 -- !query 84 output
--0010-01-01 00:07:02
+1901-01-01 00:00:00
 
 
 -- !query 85
-select make_date(2013, 7, 15)
+SELECT DATE_TRUNC('CENTURY', DATE '1970-03-20')
 -- !query 85 schema
-struct<make_date(2013, 7, 15):date>
+struct<date_trunc(CENTURY, CAST(DATE '1970-03-20' AS TIMESTAMP)):timestamp>
 -- !query 85 output
-2013-07-15
+1901-01-01 00:00:00
 
 
 -- !query 86
-select make_date(-44, 3, 15)
+SELECT DATE_TRUNC('CENTURY', DATE '2004-08-10')
 -- !query 86 schema
-struct<make_date(-44, 3, 15):date>
+struct<date_trunc(CENTURY, CAST(DATE '2004-08-10' AS TIMESTAMP)):timestamp>
 -- !query 86 output
--0044-03-15
+2001-01-01 00:00:00
 
 
 -- !query 87
-select make_date(2013, 2, 30)
+SELECT DATE_TRUNC('CENTURY', DATE '0002-02-04')
 -- !query 87 schema
-struct<make_date(2013, 2, 30):date>
+struct<date_trunc(CENTURY, CAST(DATE '0002-02-04' AS TIMESTAMP)):timestamp>
 -- !query 87 output
-NULL
+0001-01-01 00:07:02
 
 
 -- !query 88
-select make_date(2013, 13, 1)
+SELECT DATE_TRUNC('CENTURY', TO_DATE('0055-08-10 BC', 'yyyy-MM-dd G'))
 -- !query 88 schema
-struct<make_date(2013, 13, 1):date>
+struct<date_trunc(CENTURY, CAST(to_date('0055-08-10 BC', 'yyyy-MM-dd G') AS TIMESTAMP)):timestamp>
 -- !query 88 output
-NULL
+-0099-01-01 00:07:02
 
 
 -- !query 89
-select make_date(2013, 11, -1)
+SELECT DATE_TRUNC('DECADE', DATE '1993-12-25')
 -- !query 89 schema
-struct<make_date(2013, 11, -1):date>
+struct<date_trunc(DECADE, CAST(DATE '1993-12-25' AS TIMESTAMP)):timestamp>
 -- !query 89 output
-NULL
+1990-01-01 00:00:00
 
 
 -- !query 90
-DROP TABLE DATE_TBL
+SELECT DATE_TRUNC('DECADE', DATE '0004-12-25')
 -- !query 90 schema
-struct<>
+struct<date_trunc(DECADE, CAST(DATE '0004-12-25' AS TIMESTAMP)):timestamp>
 -- !query 90 output
+0000-01-01 00:07:02
+
+
+-- !query 91
+SELECT DATE_TRUNC('DECADE', TO_DATE('0002-12-31 BC', 'yyyy-MM-dd G'))
+-- !query 91 schema
+struct<date_trunc(DECADE, CAST(to_date('0002-12-31 BC', 'yyyy-MM-dd G') AS TIMESTAMP)):timestamp>
+-- !query 91 output
+-0010-01-01 00:07:02
+
+
+-- !query 92
+select make_date(2013, 7, 15)
+-- !query 92 schema
+struct<make_date(2013, 7, 15):date>
+-- !query 92 output
+2013-07-15
+
+
+-- !query 93
+select make_date(-44, 3, 15)
+-- !query 93 schema
+struct<make_date(-44, 3, 15):date>
+-- !query 93 output
+-0044-03-15
+
+
+-- !query 94
+select make_date(2013, 2, 30)
+-- !query 94 schema
+struct<make_date(2013, 2, 30):date>
+-- !query 94 output
+NULL
+
+
+-- !query 95
+select make_date(2013, 13, 1)
+-- !query 95 schema
+struct<make_date(2013, 13, 1):date>
+-- !query 95 output
+NULL
+
+
+-- !query 96
+select make_date(2013, 11, -1)
+-- !query 96 schema
+struct<make_date(2013, 11, -1):date>
+-- !query 96 output
+NULL
+
+
+-- !query 97
+DROP TABLE DATE_TBL
+-- !query 97 schema
+struct<>
+-- !query 97 output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
similarity index 97%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
index 6e47cff91a7d5..c205f70a65f2d 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/float4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
@@ -334,9 +334,10 @@ struct<CAST(CAST(-2147483648.5 AS FLOAT) AS INT):int>
 -- !query 37
 SELECT int(float('-2147483900'))
 -- !query 37 schema
-struct<CAST(CAST(-2147483900 AS FLOAT) AS INT):int>
+struct<>
 -- !query 37 output
--2147483648
+java.lang.ArithmeticException
+Casting -2.1474839E9 to int causes overflow
 
 
 -- !query 38
@@ -366,9 +367,10 @@ struct<CAST(CAST(-9223372036854775808.5 AS FLOAT) AS BIGINT):bigint>
 -- !query 41
 SELECT bigint(float('-9223380000000000000'))
 -- !query 41 schema
-struct<CAST(CAST(-9223380000000000000 AS FLOAT) AS BIGINT):bigint>
+struct<>
 -- !query 41 output
--9223372036854775808
+java.lang.ArithmeticException
+Casting -9.22338E18 to int causes overflow
 
 
 -- !query 42
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
similarity index 99%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
index b4ea3c1ad1cab..9a785670fa950 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/float8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
@@ -826,9 +826,10 @@ struct<CAST(CAST(-9223372036854775808.5 AS DOUBLE) AS BIGINT):bigint>
 -- !query 93
 SELECT bigint(double('-9223372036854780000'))
 -- !query 93 schema
-struct<CAST(CAST(-9223372036854780000 AS DOUBLE) AS BIGINT):bigint>
+struct<>
 -- !query 93 output
--9223372036854775808
+java.lang.ArithmeticException
+Casting -9.22337203685478E18 to long causes overflow
 
 
 -- !query 94
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/int2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int2.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/int2.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/int2.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/int4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
similarity index 90%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/int4.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
index 879b3c626ec1b..52221d58392f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/int4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 54
+-- Number of queries: 53
 
 
 -- !query 0
@@ -51,27 +51,30 @@ struct<>
 
 
 -- !query 6
-set spark.sql.arithmeticOperations.failOnOverFlow=false
+SELECT '' AS five, * FROM INT4_TBL
 -- !query 6 schema
-struct<key:string,value:string>
+struct<five:string,f1:int>
 -- !query 6 output
-spark.sql.arithmeticOperations.failOnOverFlow	false
+	-123456
+	-2147483647
+	0
+	123456
+	2147483647
 
 
 -- !query 7
-SELECT '' AS five, * FROM INT4_TBL
+SELECT '' AS four, i.* FROM INT4_TBL i WHERE i.f1 <> smallint('0')
 -- !query 7 schema
-struct<five:string,f1:int>
+struct<four:string,f1:int>
 -- !query 7 output
 	-123456
 	-2147483647
-	0
 	123456
 	2147483647
 
 
 -- !query 8
-SELECT '' AS four, i.* FROM INT4_TBL i WHERE i.f1 <> smallint('0')
+SELECT '' AS four, i.* FROM INT4_TBL i WHERE i.f1 <> int('0')
 -- !query 8 schema
 struct<four:string,f1:int>
 -- !query 8 output
@@ -82,18 +85,15 @@ struct<four:string,f1:int>
 
 
 -- !query 9
-SELECT '' AS four, i.* FROM INT4_TBL i WHERE i.f1 <> int('0')
+SELECT '' AS one, i.* FROM INT4_TBL i WHERE i.f1 = smallint('0')
 -- !query 9 schema
-struct<four:string,f1:int>
+struct<one:string,f1:int>
 -- !query 9 output
-	-123456
-	-2147483647
-	123456
-	2147483647
+	0
 
 
 -- !query 10
-SELECT '' AS one, i.* FROM INT4_TBL i WHERE i.f1 = smallint('0')
+SELECT '' AS one, i.* FROM INT4_TBL i WHERE i.f1 = int('0')
 -- !query 10 schema
 struct<one:string,f1:int>
 -- !query 10 output
@@ -101,15 +101,16 @@ struct<one:string,f1:int>
 
 
 -- !query 11
-SELECT '' AS one, i.* FROM INT4_TBL i WHERE i.f1 = int('0')
+SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 < smallint('0')
 -- !query 11 schema
-struct<one:string,f1:int>
+struct<two:string,f1:int>
 -- !query 11 output
-	0
+	-123456
+	-2147483647
 
 
 -- !query 12
-SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 < smallint('0')
+SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 < int('0')
 -- !query 12 schema
 struct<two:string,f1:int>
 -- !query 12 output
@@ -118,16 +119,17 @@ struct<two:string,f1:int>
 
 
 -- !query 13
-SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 < int('0')
+SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 <= smallint('0')
 -- !query 13 schema
-struct<two:string,f1:int>
+struct<three:string,f1:int>
 -- !query 13 output
 	-123456
 	-2147483647
+	0
 
 
 -- !query 14
-SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 <= smallint('0')
+SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 <= int('0')
 -- !query 14 schema
 struct<three:string,f1:int>
 -- !query 14 output
@@ -137,17 +139,16 @@ struct<three:string,f1:int>
 
 
 -- !query 15
-SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 <= int('0')
+SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 > smallint('0')
 -- !query 15 schema
-struct<three:string,f1:int>
+struct<two:string,f1:int>
 -- !query 15 output
-	-123456
-	-2147483647
-	0
+	123456
+	2147483647
 
 
 -- !query 16
-SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 > smallint('0')
+SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 > int('0')
 -- !query 16 schema
 struct<two:string,f1:int>
 -- !query 16 output
@@ -156,16 +157,17 @@ struct<two:string,f1:int>
 
 
 -- !query 17
-SELECT '' AS two, i.* FROM INT4_TBL i WHERE i.f1 > int('0')
+SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 >= smallint('0')
 -- !query 17 schema
-struct<two:string,f1:int>
+struct<three:string,f1:int>
 -- !query 17 output
+	0
 	123456
 	2147483647
 
 
 -- !query 18
-SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 >= smallint('0')
+SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 >= int('0')
 -- !query 18 schema
 struct<three:string,f1:int>
 -- !query 18 output
@@ -175,81 +177,75 @@ struct<three:string,f1:int>
 
 
 -- !query 19
-SELECT '' AS three, i.* FROM INT4_TBL i WHERE i.f1 >= int('0')
+SELECT '' AS one, i.* FROM INT4_TBL i WHERE (i.f1 % smallint('2')) = smallint('1')
 -- !query 19 schema
-struct<three:string,f1:int>
+struct<one:string,f1:int>
 -- !query 19 output
-	0
-	123456
 	2147483647
 
 
 -- !query 20
-SELECT '' AS one, i.* FROM INT4_TBL i WHERE (i.f1 % smallint('2')) = smallint('1')
+SELECT '' AS three, i.* FROM INT4_TBL i WHERE (i.f1 % int('2')) = smallint('0')
 -- !query 20 schema
-struct<one:string,f1:int>
+struct<three:string,f1:int>
 -- !query 20 output
-	2147483647
+	-123456
+	0
+	123456
 
 
 -- !query 21
-SELECT '' AS three, i.* FROM INT4_TBL i WHERE (i.f1 % int('2')) = smallint('0')
+SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i
 -- !query 21 schema
-struct<three:string,f1:int>
+struct<>
 -- !query 21 output
-	-123456
-	0
-	123456
+java.lang.ArithmeticException
+integer overflow
 
 
 -- !query 22
 SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i
+WHERE abs(f1) < 1073741824
 -- !query 22 schema
 struct<five:string,f1:int,x:int>
 -- !query 22 output
 	-123456	-246912
-	-2147483647	2
 	0	0
 	123456	246912
-	2147483647	-2
 
 
 -- !query 23
-SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i
-WHERE abs(f1) < 1073741824
+SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i
 -- !query 23 schema
-struct<five:string,f1:int,x:int>
+struct<>
 -- !query 23 output
-	-123456	-246912
-	0	0
-	123456	246912
+java.lang.ArithmeticException
+integer overflow
 
 
 -- !query 24
 SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i
+WHERE abs(f1) < 1073741824
 -- !query 24 schema
 struct<five:string,f1:int,x:int>
 -- !query 24 output
 	-123456	-246912
-	-2147483647	2
 	0	0
 	123456	246912
-	2147483647	-2
 
 
 -- !query 25
-SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i
-WHERE abs(f1) < 1073741824
+SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i
 -- !query 25 schema
-struct<five:string,f1:int,x:int>
+struct<>
 -- !query 25 output
-	-123456	-246912
-	0	0
-	123456	246912
+java.lang.ArithmeticException
+integer overflow
 
 
 -- !query 26
 SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i
+WHERE f1 < 2147483646
 -- !query 26 schema
 struct<five:string,f1:int,x:int>
 -- !query 26 output
@@ -257,23 +253,20 @@ struct<five:string,f1:int,x:int>
 	-2147483647	-2147483645
 	0	2
 	123456	123458
-	2147483647	-2147483647
 
 
 -- !query 27
-SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i
-WHERE f1 < 2147483646
+SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i
 -- !query 27 schema
-struct<five:string,f1:int,x:int>
+struct<>
 -- !query 27 output
-	-123456	-123454
-	-2147483647	-2147483645
-	0	2
-	123456	123458
+java.lang.ArithmeticException
+integer overflow
 
 
 -- !query 28
 SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i
+WHERE f1 < 2147483646
 -- !query 28 schema
 struct<five:string,f1:int,x:int>
 -- !query 28 output
@@ -281,71 +274,64 @@ struct<five:string,f1:int,x:int>
 	-2147483647	-2147483645
 	0	2
 	123456	123458
-	2147483647	-2147483647
 
 
 -- !query 29
-SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i
-WHERE f1 < 2147483646
+SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i
 -- !query 29 schema
-struct<five:string,f1:int,x:int>
+struct<>
 -- !query 29 output
-	-123456	-123454
-	-2147483647	-2147483645
-	0	2
-	123456	123458
+java.lang.ArithmeticException
+integer overflow
 
 
 -- !query 30
 SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i
+WHERE f1 > -2147483647
 -- !query 30 schema
 struct<five:string,f1:int,x:int>
 -- !query 30 output
 	-123456	-123458
-	-2147483647	2147483647
 	0	-2
 	123456	123454
 	2147483647	2147483645
 
 
 -- !query 31
-SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i
-WHERE f1 > -2147483647
+SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i
 -- !query 31 schema
-struct<five:string,f1:int,x:int>
+struct<>
 -- !query 31 output
-	-123456	-123458
-	0	-2
-	123456	123454
-	2147483647	2147483645
+java.lang.ArithmeticException
+integer overflow
 
 
 -- !query 32
 SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i
+WHERE f1 > -2147483647
 -- !query 32 schema
 struct<five:string,f1:int,x:int>
 -- !query 32 output
 	-123456	-123458
-	-2147483647	2147483647
 	0	-2
 	123456	123454
 	2147483647	2147483645
 
 
 -- !query 33
-SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i
-WHERE f1 > -2147483647
+SELECT '' AS five, i.f1, i.f1 / smallint('2') AS x FROM INT4_TBL i
 -- !query 33 schema
 struct<five:string,f1:int,x:int>
 -- !query 33 output
-	-123456	-123458
-	0	-2
-	123456	123454
-	2147483647	2147483645
+	-123456	-61728
+	-2147483647	-1073741823
+	0	0
+	123456	61728
+	2147483647	1073741823
 
 
 -- !query 34
-SELECT '' AS five, i.f1, i.f1 / smallint('2') AS x FROM INT4_TBL i
+SELECT '' AS five, i.f1, i.f1 / int('2') AS x FROM INT4_TBL i
 -- !query 34 schema
 struct<five:string,f1:int,x:int>
 -- !query 34 output
@@ -357,51 +343,47 @@ struct<five:string,f1:int,x:int>
 
 
 -- !query 35
-SELECT '' AS five, i.f1, i.f1 / int('2') AS x FROM INT4_TBL i
+SELECT -2+3 AS one
 -- !query 35 schema
-struct<five:string,f1:int,x:int>
+struct<one:int>
 -- !query 35 output
-	-123456	-61728
-	-2147483647	-1073741823
-	0	0
-	123456	61728
-	2147483647	1073741823
+1
 
 
 -- !query 36
-SELECT -2+3 AS one
+SELECT 4-2 AS two
 -- !query 36 schema
-struct<one:int>
+struct<two:int>
 -- !query 36 output
-1
+2
 
 
 -- !query 37
-SELECT 4-2 AS two
+SELECT 2- -1 AS three
 -- !query 37 schema
-struct<two:int>
+struct<three:int>
 -- !query 37 output
-2
+3
 
 
 -- !query 38
-SELECT 2- -1 AS three
+SELECT 2 - -2 AS four
 -- !query 38 schema
-struct<three:int>
+struct<four:int>
 -- !query 38 output
-3
+4
 
 
 -- !query 39
-SELECT 2 - -2 AS four
+SELECT smallint('2') * smallint('2') = smallint('16') / smallint('4') AS true
 -- !query 39 schema
-struct<four:int>
+struct<true:boolean>
 -- !query 39 output
-4
+true
 
 
 -- !query 40
-SELECT smallint('2') * smallint('2') = smallint('16') / smallint('4') AS true
+SELECT int('2') * smallint('2') = smallint('16') / int('4') AS true
 -- !query 40 schema
 struct<true:boolean>
 -- !query 40 output
@@ -409,7 +391,7 @@ true
 
 
 -- !query 41
-SELECT int('2') * smallint('2') = smallint('16') / int('4') AS true
+SELECT smallint('2') * int('2') = int('16') / smallint('4') AS true
 -- !query 41 schema
 struct<true:boolean>
 -- !query 41 output
@@ -417,78 +399,70 @@ true
 
 
 -- !query 42
-SELECT smallint('2') * int('2') = int('16') / smallint('4') AS true
+SELECT int('1000') < int('999') AS `false`
 -- !query 42 schema
-struct<true:boolean>
+struct<false:boolean>
 -- !query 42 output
-true
+false
 
 
 -- !query 43
-SELECT int('1000') < int('999') AS `false`
+SELECT 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 AS ten
 -- !query 43 schema
-struct<false:boolean>
+struct<ten:int>
 -- !query 43 output
-false
+10
 
 
 -- !query 44
-SELECT 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 AS ten
+SELECT 2 + 2 / 2 AS three
 -- !query 44 schema
-struct<ten:int>
+struct<three:int>
 -- !query 44 output
-10
+3
 
 
 -- !query 45
-SELECT 2 + 2 / 2 AS three
+SELECT (2 + 2) / 2 AS two
 -- !query 45 schema
-struct<three:int>
+struct<two:int>
 -- !query 45 output
-3
+2
 
 
 -- !query 46
-SELECT (2 + 2) / 2 AS two
+SELECT string(shiftleft(int(-1), 31))
 -- !query 46 schema
-struct<two:int>
+struct<CAST(shiftleft(CAST(-1 AS INT), 31) AS STRING):string>
 -- !query 46 output
-2
+-2147483648
 
 
 -- !query 47
-SELECT string(shiftleft(int(-1), 31))
+SELECT string(int(shiftleft(int(-1), 31))+1)
 -- !query 47 schema
-struct<CAST(shiftleft(CAST(-1 AS INT), 31) AS STRING):string>
+struct<CAST((CAST(shiftleft(CAST(-1 AS INT), 31) AS INT) + 1) AS STRING):string>
 -- !query 47 output
--2147483648
+-2147483647
 
 
 -- !query 48
-SELECT string(int(shiftleft(int(-1), 31))+1)
+SELECT int(-2147483648) % int(-1)
 -- !query 48 schema
-struct<CAST((CAST(shiftleft(CAST(-1 AS INT), 31) AS INT) + 1) AS STRING):string>
+struct<(CAST(-2147483648 AS INT) % CAST(-1 AS INT)):int>
 -- !query 48 output
--2147483647
+0
 
 
 -- !query 49
-SELECT int(-2147483648) % int(-1)
+SELECT int(-2147483648) % smallint(-1)
 -- !query 49 schema
-struct<(CAST(-2147483648 AS INT) % CAST(-1 AS INT)):int>
+struct<(CAST(-2147483648 AS INT) % CAST(CAST(-1 AS SMALLINT) AS INT)):int>
 -- !query 49 output
 0
 
 
 -- !query 50
-SELECT int(-2147483648) % smallint(-1)
--- !query 50 schema
-struct<(CAST(-2147483648 AS INT) % CAST(CAST(-1 AS SMALLINT) AS INT)):int>
--- !query 50 output
-0
-
-
--- !query 51
 SELECT x, int(x) AS int4_value
 FROM (VALUES double(-2.5),
              double(-1.5),
@@ -497,9 +471,9 @@ FROM (VALUES double(-2.5),
              double(0.5),
              double(1.5),
              double(2.5)) t(x)
--- !query 51 schema
+-- !query 50 schema
 struct<x:double,int4_value:int>
--- !query 51 output
+-- !query 50 output
 -0.5	0
 -1.5	-1
 -2.5	-2
@@ -509,7 +483,7 @@ struct<x:double,int4_value:int>
 2.5	2
 
 
--- !query 52
+-- !query 51
 SELECT x, int(x) AS int4_value
 FROM (VALUES cast(-2.5 as decimal(38, 18)),
              cast(-1.5 as decimal(38, 18)),
@@ -518,9 +492,9 @@ FROM (VALUES cast(-2.5 as decimal(38, 18)),
              cast(0.5 as decimal(38, 18)),
              cast(1.5 as decimal(38, 18)),
              cast(2.5 as decimal(38, 18))) t(x)
--- !query 52 schema
+-- !query 51 schema
 struct<x:decimal(38,18),int4_value:int>
--- !query 52 output
+-- !query 51 output
 -0.5	0
 -1.5	-1
 -2.5	-2
@@ -530,9 +504,9 @@ struct<x:decimal(38,18),int4_value:int>
 2.5	2
 
 
--- !query 53
+-- !query 52
 DROP TABLE INT4_TBL
--- !query 53 schema
+-- !query 52 schema
 struct<>
--- !query 53 output
+-- !query 52 output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
similarity index 95%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
index fc9f1474eb26c..20bb5dbda645f 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/int8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
@@ -389,13 +389,10 @@ struct<five:string,q1:bigint,q2:bigint,minus:bigint>
 -- !query 40
 SELECT '' AS three, q1, q2, q1 * q2 AS multiply FROM INT8_TBL
 -- !query 40 schema
-struct<three:string,q1:bigint,q2:bigint,multiply:bigint>
+struct<>
 -- !query 40 output
-	123	456	56088
-	123	4567890123456789	561850485185185047
-	4567890123456789	-4567890123456789	-4868582358072306617
-	4567890123456789	123	561850485185185047
-	4567890123456789	4567890123456789	4868582358072306617
+java.lang.ArithmeticException
+long overflow
 
 
 -- !query 41
@@ -604,12 +601,10 @@ struct<q1:int>
 -- !query 60
 SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456
 -- !query 60 schema
-struct<q1:int>
+struct<>
 -- !query 60 output
--869367531
--869367531
--869367531
-123
+java.lang.ArithmeticException
+Casting 4567890123456789 to int causes overflow
 
 
 -- !query 61
@@ -623,12 +618,10 @@ struct<q1:smallint>
 -- !query 62
 SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456
 -- !query 62 schema
-struct<q1:smallint>
+struct<>
 -- !query 62 output
--32491
--32491
--32491
-123
+java.lang.ArithmeticException
+Casting 4567890123456789 to short causes overflow
 
 
 -- !query 63
@@ -662,9 +655,10 @@ struct<CAST(CAST(36854775807.0 AS FLOAT) AS BIGINT):bigint>
 -- !query 66
 SELECT CAST(double('922337203685477580700.0') AS bigint)
 -- !query 66 schema
-struct<CAST(CAST(922337203685477580700.0 AS DOUBLE) AS BIGINT):bigint>
+struct<>
 -- !query 66 output
-9223372036854775807
+java.lang.ArithmeticException
+Casting 9.223372036854776E20 to long causes overflow
 
 
 -- !query 67
@@ -728,17 +722,19 @@ struct<CAST(shiftleft(CAST(-1 AS BIGINT), 63) AS STRING):string>
 -- !query 72
 SELECT string(int(shiftleft(bigint(-1), 63))+1)
 -- !query 72 schema
-struct<CAST((CAST(shiftleft(CAST(-1 AS BIGINT), 63) AS INT) + 1) AS STRING):string>
+struct<>
 -- !query 72 output
-1
+java.lang.ArithmeticException
+Casting -9223372036854775808 to int causes overflow
 
 
 -- !query 73
 SELECT bigint((-9223372036854775808)) * bigint((-1))
 -- !query 73 schema
-struct<(CAST(-9223372036854775808 AS BIGINT) * CAST(-1 AS BIGINT)):bigint>
+struct<>
 -- !query 73 output
--9223372036854775808
+java.lang.ArithmeticException
+long overflow
 
 
 -- !query 74
@@ -760,9 +756,10 @@ struct<(CAST(-9223372036854775808 AS BIGINT) % CAST(-1 AS BIGINT)):bigint>
 -- !query 76
 SELECT bigint((-9223372036854775808)) * int((-1))
 -- !query 76 schema
-struct<(CAST(-9223372036854775808 AS BIGINT) * CAST(CAST(-1 AS INT) AS BIGINT)):bigint>
+struct<>
 -- !query 76 output
--9223372036854775808
+java.lang.ArithmeticException
+long overflow
 
 
 -- !query 77
@@ -784,9 +781,10 @@ struct<(CAST(-9223372036854775808 AS BIGINT) % CAST(CAST(-1 AS INT) AS BIGINT)):
 -- !query 79
 SELECT bigint((-9223372036854775808)) * smallint((-1))
 -- !query 79 schema
-struct<(CAST(-9223372036854775808 AS BIGINT) * CAST(CAST(-1 AS SMALLINT) AS BIGINT)):bigint>
+struct<>
 -- !query 79 output
--9223372036854775808
+java.lang.ArithmeticException
+long overflow
 
 
 -- !query 80
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/join.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/numeric.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/select.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_distinct.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_distinct.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/select_distinct.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/select_distinct.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/select_having.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/select_implicit.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/strings.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/strings.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
similarity index 97%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/text.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
index 352b0232e8945..2e1d639974ec6 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/text.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
@@ -151,11 +151,11 @@ edcba
 
 
 -- !query 18
-set spark.sql.parser.ansi.enabled=false
+set spark.sql.ansi.enabled=false
 -- !query 18 schema
 struct<key:string,value:string>
 -- !query 18 output
-spark.sql.parser.ansi.enabled	false
+spark.sql.ansi.enabled	false
 
 
 -- !query 19
@@ -177,11 +177,11 @@ struct<i:bigint,left('ahoj', t.`i`):string,right('ahoj', t.`i`):string>
 
 
 -- !query 20
-set spark.sql.parser.ansi.enabled=true
+set spark.sql.ansi.enabled=true
 -- !query 20 schema
 struct<key:string,value:string>
 -- !query 20 output
-spark.sql.parser.ansi.enabled	true
+spark.sql.ansi.enabled	true
 
 
 -- !query 21
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out
similarity index 62%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/timestamp.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out
index 75d9ee8d9c797..80322fb562895 100644
--- a/sql/core/src/test/resources/sql-tests/results/pgSQL/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 18
+-- Number of queries: 32
 
 
 -- !query 0
@@ -11,7 +11,7 @@ struct<>
 
 
 -- !query 1
-INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02')
+INSERT INTO TIMESTAMP_TBL VALUES ('now')
 -- !query 1 schema
 struct<>
 -- !query 1 output
@@ -19,7 +19,7 @@ struct<>
 
 
 -- !query 2
-INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02 03:04:05')
+INSERT INTO TIMESTAMP_TBL VALUES ('now')
 -- !query 2 schema
 struct<>
 -- !query 2 output
@@ -27,7 +27,7 @@ struct<>
 
 
 -- !query 3
-INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01-08')
+INSERT INTO TIMESTAMP_TBL VALUES ('today')
 -- !query 3 schema
 struct<>
 -- !query 3 output
@@ -35,7 +35,7 @@ struct<>
 
 
 -- !query 4
-INSERT INTO TIMESTAMP_TBL VALUES ('2001-09-22T18:19:20')
+INSERT INTO TIMESTAMP_TBL VALUES ('yesterday')
 -- !query 4 schema
 struct<>
 -- !query 4 output
@@ -43,139 +43,269 @@ struct<>
 
 
 -- !query 5
-SELECT '' AS `64`, d1 FROM TIMESTAMP_TBL
+INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow')
 -- !query 5 schema
-struct<64:string,d1:timestamp>
+struct<>
 -- !query 5 output
+
+
+
+-- !query 6
+INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow EST')
+-- !query 6 schema
+struct<>
+-- !query 6 output
+
+
+
+-- !query 7
+INSERT INTO TIMESTAMP_TBL VALUES ('tomorrow Zulu')
+-- !query 7 schema
+struct<>
+-- !query 7 output
+
+
+
+-- !query 8
+SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'today'
+-- !query 8 schema
+struct<One:bigint>
+-- !query 8 output
+1
+
+
+-- !query 9
+SELECT count(*) AS Three FROM TIMESTAMP_TBL WHERE d1 = timestamp 'tomorrow'
+-- !query 9 schema
+struct<Three:bigint>
+-- !query 9 output
+3
+
+
+-- !query 10
+SELECT count(*) AS One FROM TIMESTAMP_TBL WHERE d1 = timestamp 'yesterday'
+-- !query 10 schema
+struct<One:bigint>
+-- !query 10 output
+1
+
+
+-- !query 11
+TRUNCATE TABLE TIMESTAMP_TBL
+-- !query 11 schema
+struct<>
+-- !query 11 output
+
+
+
+-- !query 12
+INSERT INTO TIMESTAMP_TBL VALUES ('epoch')
+-- !query 12 schema
+struct<>
+-- !query 12 output
+
+
+
+-- !query 13
+INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02')
+-- !query 13 schema
+struct<>
+-- !query 13 output
+
+
+
+-- !query 14
+INSERT INTO TIMESTAMP_TBL VALUES ('1997-01-02 03:04:05')
+-- !query 14 schema
+struct<>
+-- !query 14 output
+
+
+
+-- !query 15
+INSERT INTO TIMESTAMP_TBL VALUES ('1997-02-10 17:32:01-08')
+-- !query 15 schema
+struct<>
+-- !query 15 output
+
+
+
+-- !query 16
+INSERT INTO TIMESTAMP_TBL VALUES ('2001-09-22T18:19:20')
+-- !query 16 schema
+struct<>
+-- !query 16 output
+
+
+
+-- !query 17
+SELECT '' AS `64`, d1 FROM TIMESTAMP_TBL
+-- !query 17 schema
+struct<64:string,d1:timestamp>
+-- !query 17 output
+	1969-12-31 16:00:00
 	1997-01-02 00:00:00
 	1997-01-02 03:04:05
 	1997-02-10 17:32:01
 	2001-09-22 18:19:20
 
 
--- !query 6
+-- !query 18
 SELECT '' AS `48`, d1 FROM TIMESTAMP_TBL
    WHERE d1 > timestamp '1997-01-02'
--- !query 6 schema
+-- !query 18 schema
 struct<48:string,d1:timestamp>
--- !query 6 output
+-- !query 18 output
 	1997-01-02 03:04:05
 	1997-02-10 17:32:01
 	2001-09-22 18:19:20
 
 
--- !query 7
+-- !query 19
 SELECT '' AS `15`, d1 FROM TIMESTAMP_TBL
    WHERE d1 < timestamp '1997-01-02'
--- !query 7 schema
+-- !query 19 schema
 struct<15:string,d1:timestamp>
--- !query 7 output
-
+-- !query 19 output
+	1969-12-31 16:00:00
 
 
--- !query 8
+-- !query 20
 SELECT '' AS one, d1 FROM TIMESTAMP_TBL
    WHERE d1 = timestamp '1997-01-02'
--- !query 8 schema
+-- !query 20 schema
 struct<one:string,d1:timestamp>
--- !query 8 output
+-- !query 20 output
 	1997-01-02 00:00:00
 
 
--- !query 9
+-- !query 21
 SELECT '' AS `63`, d1 FROM TIMESTAMP_TBL
    WHERE d1 != timestamp '1997-01-02'
--- !query 9 schema
+-- !query 21 schema
 struct<63:string,d1:timestamp>
--- !query 9 output
+-- !query 21 output
+	1969-12-31 16:00:00
 	1997-01-02 03:04:05
 	1997-02-10 17:32:01
 	2001-09-22 18:19:20
 
 
--- !query 10
+-- !query 22
 SELECT '' AS `16`, d1 FROM TIMESTAMP_TBL
    WHERE d1 <= timestamp '1997-01-02'
--- !query 10 schema
+-- !query 22 schema
 struct<16:string,d1:timestamp>
--- !query 10 output
+-- !query 22 output
+	1969-12-31 16:00:00
 	1997-01-02 00:00:00
 
 
--- !query 11
+-- !query 23
 SELECT '' AS `49`, d1 FROM TIMESTAMP_TBL
    WHERE d1 >= timestamp '1997-01-02'
--- !query 11 schema
+-- !query 23 schema
 struct<49:string,d1:timestamp>
--- !query 11 output
+-- !query 23 output
 	1997-01-02 00:00:00
 	1997-01-02 03:04:05
 	1997-02-10 17:32:01
 	2001-09-22 18:19:20
 
 
--- !query 12
+-- !query 24
+SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
+   FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
+-- !query 24 schema
+struct<54:string,diff:interval>
+-- !query 24 output
+	interval -1409 weeks -8 hours
+	interval 0 microseconds
+	interval 246 weeks 2 days 17 hours 19 minutes 20 seconds
+	interval 3 hours 4 minutes 5 seconds
+	interval 5 weeks 4 days 17 hours 32 minutes 1 seconds
+
+
+-- !query 25
 SELECT '' AS date_trunc_week, date_trunc( 'week', timestamp '2004-02-29 15:44:17.71393' ) AS week_trunc
--- !query 12 schema
+-- !query 25 schema
 struct<date_trunc_week:string,week_trunc:timestamp>
--- !query 12 output
+-- !query 25 output
 	2004-02-23 00:00:00
 
 
--- !query 13
+-- !query 26
+SELECT '' AS `54`, d1 - timestamp '1997-01-02' AS diff
+  FROM TIMESTAMP_TBL
+  WHERE d1 BETWEEN timestamp '1902-01-01'
+   AND timestamp '2038-01-01'
+-- !query 26 schema
+struct<54:string,diff:interval>
+-- !query 26 output
+	interval -1409 weeks -8 hours
+	interval 0 microseconds
+	interval 246 weeks 2 days 17 hours 19 minutes 20 seconds
+	interval 3 hours 4 minutes 5 seconds
+	interval 5 weeks 4 days 17 hours 32 minutes 1 seconds
+
+
+-- !query 27
 SELECT '' AS `54`, d1 as `timestamp`,
     date_part( 'year', d1) AS `year`, date_part( 'month', d1) AS `month`,
     date_part( 'day', d1) AS `day`, date_part( 'hour', d1) AS `hour`,
     date_part( 'minute', d1) AS `minute`, date_part( 'second', d1) AS `second`
     FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
--- !query 13 schema
-struct<54:string,timestamp:timestamp,year:int,month:int,day:int,hour:int,minute:int,second:int>
--- !query 13 output
+-- !query 27 schema
+struct<54:string,timestamp:timestamp,year:int,month:int,day:int,hour:int,minute:int,second:decimal(8,6)>
+-- !query 27 output
+	1969-12-31 16:00:00	1969	12	31	16	0	0
 	1997-01-02 00:00:00	1997	1	2	0	0	0
 	1997-01-02 03:04:05	1997	1	2	3	4	5
 	1997-02-10 17:32:01	1997	2	10	17	32	1
 	2001-09-22 18:19:20	2001	9	22	18	19	20
 
 
--- !query 14
+-- !query 28
 SELECT '' AS `54`, d1 as `timestamp`,
     date_part( 'quarter', d1) AS quarter, date_part( 'msec', d1) AS msec,
     date_part( 'usec', d1) AS usec
     FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
--- !query 14 schema
+-- !query 28 schema
 struct<54:string,timestamp:timestamp,quarter:int,msec:decimal(8,3),usec:int>
--- !query 14 output
+-- !query 28 output
+	1969-12-31 16:00:00	4	0	0
 	1997-01-02 00:00:00	1	0	0
 	1997-01-02 03:04:05	1	5000	5000000
 	1997-02-10 17:32:01	1	1000	1000000
 	2001-09-22 18:19:20	3	20000	20000000
 
 
--- !query 15
+-- !query 29
 SELECT '' AS `54`, d1 as `timestamp`,
     date_part( 'isoyear', d1) AS isoyear, date_part( 'week', d1) AS week,
     date_part( 'dow', d1) AS dow
     FROM TIMESTAMP_TBL WHERE d1 BETWEEN '1902-01-01' AND '2038-01-01'
--- !query 15 schema
+-- !query 29 schema
 struct<54:string,timestamp:timestamp,isoyear:int,week:int,dow:int>
--- !query 15 output
+-- !query 29 output
+	1969-12-31 16:00:00	1970	1	3
 	1997-01-02 00:00:00	1997	1	4
 	1997-01-02 03:04:05	1997	1	4
 	1997-02-10 17:32:01	1997	7	1
 	2001-09-22 18:19:20	2001	38	6
 
 
--- !query 16
+-- !query 30
 SELECT make_timestamp(2014,12,28,6,30,45.887)
--- !query 16 schema
+-- !query 30 schema
 struct<make_timestamp(2014, 12, 28, 6, 30, CAST(45.887 AS DECIMAL(8,6))):timestamp>
--- !query 16 output
+-- !query 30 output
 2014-12-28 06:30:45.887
 
 
--- !query 17
+-- !query 31
 DROP TABLE TIMESTAMP_TBL
--- !query 17 schema
+-- !query 31 schema
 struct<>
--- !query 17 output
+-- !query 31 output
 
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/union.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/union.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/pgSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/pgSQL/with.sql.out
rename to sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out
index ab6a11a2b7efa..7089fbd25e28c 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out
@@ -102,7 +102,7 @@ GROUP BY  t1a,
           t3a,
           t3b,
           t3c
-ORDER BY  t1a DESC, t3b DESC
+ORDER BY  t1a DESC, t3b DESC, t3c ASC
 -- !query 4 schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint,t3a:string,t3b:smallint,t3c:int>
 -- !query 4 output
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out
index bae5d00cc8632..498378bfa5c37 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out
@@ -169,7 +169,7 @@ GROUP  BY t1b,
 HAVING t1d NOT IN (SELECT t2d
                    FROM   t2
                    WHERE  t1d = t2d)
-ORDER BY t1b DESC
+ORDER BY t1b DESC, t1d ASC
 -- !query 6 schema
 struct<count(DISTINCT t1a):bigint,t1b:smallint,t1c:int,t1d:bigint>
 -- !query 6 output
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part1.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part2.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part3.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-aggregates_part4.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-case.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-join.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_having.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/udf/pgSQL/udf-select_implicit.sql.out
rename to sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
diff --git a/sql/core/src/test/resources/test-data/malformedRow.csv b/sql/core/src/test/resources/test-data/malformedRow.csv
new file mode 100644
index 0000000000000..8cfb3eefb982c
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/malformedRow.csv
@@ -0,0 +1,5 @@
+fruit,color,price,quantity
+apple,red,1,3
+banana,yellow,2,4
+orange,orange,3,5
+malformedrow
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
index 3fcb9892800b6..07afd4195c3d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BenchmarkQueryTest.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql
 
 import org.apache.spark.internal.config.Tests.IS_TESTING
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodeFormatter, CodeGenerator}
+import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeFormatter, CodeGenerator}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution.{SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.test.SharedSparkSession
@@ -43,12 +43,12 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
     }
   }
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     RuleExecutor.resetMetrics()
   }
 
-  protected def checkGeneratedCode(plan: SparkPlan): Unit = {
+  protected def checkGeneratedCode(plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
     val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegenExec]()
     plan foreach {
       case s: WholeStageCodegenExec =>
@@ -57,7 +57,7 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
     }
     codegenSubtrees.toSeq.foreach { subtree =>
       val code = subtree.doCodeGen()._2
-      try {
+      val (_, ByteCodeStats(maxMethodCodeSize, _, _)) = try {
         // Just check the generated code can be properly compiled
         CodeGenerator.compile(code)
       } catch {
@@ -72,6 +72,11 @@ abstract class BenchmarkQueryTest extends QueryTest with SharedSparkSession {
              """.stripMargin
           throw new Exception(msg, e)
       }
+
+      assert(!checkMethodCodeSize ||
+          maxMethodCodeSize <= CodeGenerator.DEFAULT_JVM_HUGE_METHOD_LIMIT,
+        s"too long generated codes found in the WholeStageCodegenExec subtree (id=${subtree.id}) " +
+          s"and JIT optimization might not work:\n${subtree.treeString}")
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 6e1ee6da9200d..edafdaa54945a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -849,7 +849,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with SharedSparkSessi
     sparkContext.addSparkListener(jobListener)
     try {
       val result = f
-      sparkContext.listenerBus.waitUntilEmpty(10000L)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(numJobTrigered === 0)
       result
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index a52c6d503d147..c346c8946a972 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat}
 import org.scalatest.Matchers._
 
-import org.apache.spark.sql.catalyst.expressions.NamedExpression
+import org.apache.spark.sql.catalyst.expressions.{In, InSet, NamedExpression}
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -454,25 +454,36 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("isInCollection: Scala Collection") {
     val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
-    // Test with different types of collections
-    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1))),
-      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
-    checkAnswer(df.filter($"a".isInCollection(Seq(1, 2).toSet)),
-      df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
-    checkAnswer(df.filter($"a".isInCollection(Seq(3, 2).toArray)),
-      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
-    checkAnswer(df.filter($"a".isInCollection(Seq(3, 1).toList)),
-      df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
 
-    val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
+    Seq(1, 2).foreach { conf =>
+      withSQLConf(SQLConf.OPTIMIZER_INSET_CONVERSION_THRESHOLD.key -> conf.toString) {
+        if (conf <= 1) {
+          assert($"a".isInCollection(Seq(3, 1)).expr.isInstanceOf[InSet], "Expect expr to be InSet")
+        } else {
+          assert($"a".isInCollection(Seq(3, 1)).expr.isInstanceOf[In], "Expect expr to be In")
+        }
 
-    val e = intercept[AnalysisException] {
-      df2.filter($"a".isInCollection(Seq($"b")))
-    }
-    Seq("cannot resolve", "due to data type mismatch: Arguments must be same type but were")
-      .foreach { s =>
-        assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
+        // Test with different types of collections
+        checkAnswer(df.filter($"a".isInCollection(Seq(3, 1))),
+          df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+        checkAnswer(df.filter($"a".isInCollection(Seq(1, 2).toSet)),
+          df.collect().toSeq.filter(r => r.getInt(0) == 1 || r.getInt(0) == 2))
+        checkAnswer(df.filter($"a".isInCollection(Seq(3, 2).toArray)),
+          df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 2))
+        checkAnswer(df.filter($"a".isInCollection(Seq(3, 1).toList)),
+          df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
+
+        val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
+
+        val e = intercept[AnalysisException] {
+          df2.filter($"a".isInCollection(Seq($"b")))
+        }
+        Seq("cannot resolve",
+          "due to data type mismatch: Arguments must be same type but were").foreach { s =>
+            assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
+          }
       }
+    }
   }
 
   test("&&") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
index 4d0eb04be751b..6b503334f9f23 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
@@ -23,14 +23,14 @@ import org.apache.spark.sql.test.SharedSparkSession
 
 class ComplexTypesSuite extends QueryTest with SharedSparkSession {
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     spark.range(10).selectExpr(
       "id + 1 as i1", "id + 2 as i2", "id + 3 as i3", "id + 4 as i4", "id + 5 as i5")
       .write.saveAsTable("tab")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       spark.sql("DROP TABLE IF EXISTS tab")
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index 52cf91cfade51..d34e50518348a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
 import java.util.Locale
 
@@ -181,4 +182,22 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(df, Row(Row(java.sql.Timestamp.valueOf("2018-11-06 18:00:00.0"))))
     }
   }
+
+  test("special timestamp values") {
+    Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue =>
+      val input = Seq(specialValue).toDS()
+      val readback = input.select(from_csv($"value", lit("t timestamp"),
+        Map.empty[String, String].asJava)).collect()
+      assert(readback(0).getAs[Row](0).getAs[Timestamp](0).getTime >= 0)
+    }
+  }
+
+  test("special date values") {
+    Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue =>
+      val input = Seq(specialValue).toDS()
+      val readback = input.select(from_csv($"value", lit("d date"),
+        Map.empty[String, String].asJava)).collect()
+      assert(readback(0).getAs[Row](0).getAs[Date](0).getTime >= 0)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 7d044638db571..06484908f5e73 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -1930,6 +1930,18 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq(5, 9, 11, 10, 6)),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(transform(col("i"), x => x + 1)),
+        Seq(
+          Row(Seq(2, 10, 9, 8)),
+          Row(Seq(6, 9, 10, 8, 3)),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(transform(col("i"), (x, i) => x + i)),
+        Seq(
+          Row(Seq(1, 10, 10, 10)),
+          Row(Seq(5, 9, 11, 10, 6)),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -1960,6 +1972,18 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq(5, null, 10, 12, 11, 7)),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(transform(col("i"), x => x + 1)),
+        Seq(
+          Row(Seq(2, 10, 9, null, 8)),
+          Row(Seq(6, null, 9, 10, 8, 3)),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(transform(col("i"), (x, i) => x + i)),
+        Seq(
+          Row(Seq(1, 10, 10, null, 11)),
+          Row(Seq(5, null, 10, 12, 11, 7)),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -1990,6 +2014,18 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq("b0", null, "c2", null)),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(transform(col("s"), x => concat(x, x))),
+        Seq(
+          Row(Seq("cc", "aa", "bb")),
+          Row(Seq("bb", null, "cc", null)),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(transform(col("s"), (x, i) => concat(x, i))),
+        Seq(
+          Row(Seq("c0", "a1", "b2")),
+          Row(Seq("b0", null, "c2", null)),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2034,6 +2070,32 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
             Seq("b", null, "c", null, null))),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(transform(col("arg"), arg => arg)),
+        Seq(
+          Row(Seq("c", "a", "b")),
+          Row(Seq("b", null, "c", null)),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(transform(col("arg"), _ => col("arg"))),
+        Seq(
+          Row(Seq(Seq("c", "a", "b"), Seq("c", "a", "b"), Seq("c", "a", "b"))),
+          Row(Seq(
+            Seq("b", null, "c", null),
+            Seq("b", null, "c", null),
+            Seq("b", null, "c", null),
+            Seq("b", null, "c", null))),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(transform(col("arg"), x => concat(col("arg"), array(x)))),
+        Seq(
+          Row(Seq(Seq("c", "a", "b", "c"), Seq("c", "a", "b", "a"), Seq("c", "a", "b", "b"))),
+          Row(Seq(
+            Seq("b", null, "c", null, "b"),
+            Seq("b", null, "c", null, null),
+            Seq("b", null, "c", null, "c"),
+            Seq("b", null, "c", null, null))),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2080,6 +2142,14 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Row(Map(), Map(1 -> -1, 2 -> -2, 3 -> -3)),
         Row(Map(1 -> 10), Map(3 -> -3))))
 
+    checkAnswer(dfInts.select(
+      map_filter(col("m"), (k, v) => k * 10 === v),
+      map_filter(col("m"), (k, v) => k === (v * -1))),
+      Seq(
+        Row(Map(1 -> 10, 2 -> 20, 3 -> 30), Map()),
+        Row(Map(), Map(1 -> -1, 2 -> -2, 3 -> -3)),
+        Row(Map(1 -> 10), Map(3 -> -3))))
+
     val dfComplex = Seq(
       Map(1 -> Seq(Some(1)), 2 -> Seq(Some(1), Some(2)), 3 -> Seq(Some(1), Some(2), Some(3))),
       Map(1 -> null, 2 -> Seq(Some(-2), Some(-2)), 3 -> Seq[Option[Int]](None))).toDF("m")
@@ -2090,6 +2160,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Row(Map(1 -> Seq(1)), Map(1 -> Seq(1), 2 -> Seq(1, 2), 3 -> Seq(1, 2, 3))),
         Row(Map(), Map(2 -> Seq(-2, -2)))))
 
+    checkAnswer(dfComplex.select(
+      map_filter(col("m"), (k, v) => k === element_at(v, 1)),
+      map_filter(col("m"), (k, v) => k === size(v))),
+      Seq(
+        Row(Map(1 -> Seq(1)), Map(1 -> Seq(1), 2 -> Seq(1, 2), 3 -> Seq(1, 2, 3))),
+        Row(Map(), Map(2 -> Seq(-2, -2)))))
+
     // Invalid use cases
     val df = Seq(
       (Map(1 -> "a"), 1),
@@ -2112,6 +2189,11 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 1 requires map type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(map_filter(col("i"), (k, v) => k > v))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 1 requires map type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("map_filter(a, (k, v) -> k > v)")
     }
@@ -2133,6 +2215,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq(8, 2)),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(filter(col("i"), _ % 2 === 0)),
+        Seq(
+          Row(Seq(8)),
+          Row(Seq(8, 2)),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2157,6 +2245,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq(8, 2)),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(filter(col("i"), _ % 2 === 0)),
+        Seq(
+          Row(Seq(8)),
+          Row(Seq(8, 2)),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2181,6 +2275,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(Seq("b", "c")),
           Row(Seq.empty),
           Row(null)))
+      checkAnswer(df.select(filter(col("s"), x => x.isNotNull)),
+        Seq(
+          Row(Seq("c", "a", "b")),
+          Row(Seq("b", "c")),
+          Row(Seq.empty),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2190,6 +2290,36 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     testNonPrimitiveType()
   }
 
+  test("filter function - index argument") {
+    val df = Seq(
+      Seq("c", "a", "b"),
+      Seq("b", null, "c", null),
+      Seq.empty,
+      null
+    ).toDF("s")
+
+    def testIndexArgument(): Unit = {
+      checkAnswer(df.selectExpr("filter(s, (x, i) -> i % 2 == 0)"),
+        Seq(
+          Row(Seq("c", "b")),
+          Row(Seq("b", "c")),
+          Row(Seq.empty),
+          Row(null)))
+      checkAnswer(df.select(filter(col("s"), (x, i) => i % 2 === 0)),
+        Seq(
+          Row(Seq("c", "b")),
+          Row(Seq("b", "c")),
+          Row(Seq.empty),
+          Row(null)))
+    }
+
+    // Test with local relation, the Project will be evaluated without codegen
+    testIndexArgument()
+    // Test with cached relation, the Project will be evaluated with codegen
+    df.cache()
+    testIndexArgument()
+  }
+
   test("filter function - invalid") {
     val df = Seq(
       (Seq("c", "a", "b"), 1),
@@ -2199,20 +2329,30 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).toDF("s", "i")
 
     val ex1 = intercept[AnalysisException] {
-      df.selectExpr("filter(s, (x, y) -> x + y)")
+      df.selectExpr("filter(s, (x, y, z) -> x + y)")
     }
-    assert(ex1.getMessage.contains("The number of lambda function arguments '2' does not match"))
+    assert(ex1.getMessage.contains("The number of lambda function arguments '3' does not match"))
 
     val ex2 = intercept[AnalysisException] {
       df.selectExpr("filter(i, x -> x)")
     }
     assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
 
+    val ex2a = intercept[AnalysisException] {
+      df.select(filter(col("i"), x => x))
+    }
+    assert(ex2a.getMessage.contains("data type mismatch: argument 1 requires array type"))
+
     val ex3 = intercept[AnalysisException] {
       df.selectExpr("filter(s, x -> x)")
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(filter(col("s"), x => x))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("filter(a, x -> x)")
     }
@@ -2234,6 +2374,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(false),
           Row(false),
           Row(null)))
+      checkAnswer(df.select(exists(col("i"), _ % 2 === 0)),
+        Seq(
+          Row(true),
+          Row(false),
+          Row(false),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2260,6 +2406,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(null),
           Row(false),
           Row(null)))
+      checkAnswer(df.select(exists(col("i"), _ % 2 === 0)),
+        Seq(
+          Row(true),
+          Row(false),
+          Row(null),
+          Row(false),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2284,6 +2437,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(true),
           Row(false),
           Row(null)))
+      checkAnswer(df.select(exists(col("s"), x => x.isNull)),
+        Seq(
+          Row(false),
+          Row(true),
+          Row(false),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2311,11 +2470,21 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
 
+    val ex2a = intercept[AnalysisException] {
+      df.select(exists(col("i"), x => x))
+    }
+    assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
+
     val ex3 = intercept[AnalysisException] {
       df.selectExpr("exists(s, x -> x)")
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(exists(df("s"), x => x))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("exists(a, x -> x)")
     }
@@ -2337,6 +2506,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(true),
           Row(true),
           Row(null)))
+      checkAnswer(df.select(forall(col("i"), x => x % 2 === 0)),
+        Seq(
+          Row(false),
+          Row(true),
+          Row(true),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2363,6 +2538,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(true),
           Row(true),
           Row(null)))
+      checkAnswer(df.select(forall(col("i"), x => (x % 2 === 0) || x.isNull)),
+        Seq(
+          Row(false),
+          Row(true),
+          Row(true),
+          Row(true),
+          Row(null)))
       checkAnswer(df.selectExpr("forall(i, x -> x % 2 == 0)"),
         Seq(
           Row(false),
@@ -2370,6 +2552,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(true),
           Row(true),
           Row(null)))
+      checkAnswer(df.select(forall(col("i"), x => x % 2 === 0)),
+        Seq(
+          Row(false),
+          Row(null),
+          Row(true),
+          Row(true),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2394,6 +2583,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(true),
           Row(true),
           Row(null)))
+      checkAnswer(df.select(forall(col("s"), _.isNull)),
+        Seq(
+          Row(false),
+          Row(true),
+          Row(true),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2421,15 +2616,30 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
 
+    val ex2a = intercept[AnalysisException] {
+      df.select(forall(col("i"), x => x))
+    }
+    assert(ex2a.getMessage.contains("data type mismatch: argument 1 requires array type"))
+
     val ex3 = intercept[AnalysisException] {
       df.selectExpr("forall(s, x -> x)")
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(forall(col("s"), x => x))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("forall(a, x -> x)")
     }
     assert(ex4.getMessage.contains("cannot resolve '`a`'"))
+
+    val ex4a = intercept[AnalysisException] {
+      df.select(forall(col("a"), x => x))
+    }
+    assert(ex4a.getMessage.contains("cannot resolve '`a`'"))
   }
 
   test("aggregate function - array for primitive type not containing null") {
@@ -2453,6 +2663,18 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(310),
           Row(0),
           Row(null)))
+      checkAnswer(df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x)),
+        Seq(
+          Row(25),
+          Row(31),
+          Row(0),
+          Row(null)))
+      checkAnswer(df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x, _ * 10)),
+        Seq(
+          Row(250),
+          Row(310),
+          Row(0),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2484,6 +2706,20 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(0),
           Row(0),
           Row(null)))
+      checkAnswer(df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x)),
+        Seq(
+          Row(25),
+          Row(null),
+          Row(0),
+          Row(null)))
+      checkAnswer(
+        df.select(
+          aggregate(col("i"), lit(0), (acc, x) => acc + x, acc => coalesce(acc, lit(0)) * 10)),
+        Seq(
+          Row(250),
+          Row(0),
+          Row(0),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2515,6 +2751,21 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           Row(""),
           Row("c"),
           Row(null)))
+      checkAnswer(df.select(aggregate(col("ss"), col("s"), (acc, x) => concat(acc, x))),
+        Seq(
+          Row("acab"),
+          Row(null),
+          Row("c"),
+          Row(null)))
+      checkAnswer(
+        df.select(
+          aggregate(col("ss"), col("s"), (acc, x) => concat(acc, x),
+            acc => coalesce(acc, lit("")))),
+        Seq(
+          Row("acab"),
+          Row(""),
+          Row("c"),
+          Row(null)))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2547,11 +2798,21 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 1 requires array type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(aggregate(col("i"), lit(0), (acc, x) => x))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 1 requires array type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("aggregate(s, 0, (acc, x) -> x)")
     }
     assert(ex4.getMessage.contains("data type mismatch: argument 3 requires int type"))
 
+    val ex4a = intercept[AnalysisException] {
+      df.select(aggregate(col("s"), lit(0), (acc, x) => x))
+    }
+    assert(ex4a.getMessage.contains("data type mismatch: argument 3 requires int type"))
+
     val ex5 = intercept[AnalysisException] {
       df.selectExpr("aggregate(a, 0, (acc, x) -> x)")
     }
@@ -2572,6 +2833,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Row(Map(10 -> null, 8 -> false, 4 -> null)),
         Row(Map(5 -> null)),
         Row(null)))
+
+    checkAnswer(df.select(map_zip_with(df("m1"), df("m2"), (k, v1, v2) => k === v1 + v2)),
+      Seq(
+        Row(Map(8 -> true, 3 -> false, 6 -> true)),
+        Row(Map(10 -> null, 8 -> false, 4 -> null)),
+        Row(Map(5 -> null)),
+        Row(null)))
   }
 
   test("map_zip_with function - map of non-primitive types") {
@@ -2588,6 +2856,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Row(Map("b" -> Row("a", null), "c" -> Row("d", "a"), "d" -> Row(null, "k"))),
         Row(Map("a" -> Row("d", null))),
         Row(null)))
+
+    checkAnswer(df.select(map_zip_with(col("m1"), col("m2"), (k, v1, v2) => struct(v1, v2))),
+      Seq(
+        Row(Map("z" -> Row("a", "c"), "y" -> Row("b", null), "x" -> Row("c", "a"))),
+        Row(Map("b" -> Row("a", null), "c" -> Row("d", "a"), "d" -> Row(null, "k"))),
+        Row(Map("a" -> Row("d", null))),
+        Row(null)))
   }
 
   test("map_zip_with function - invalid") {
@@ -2606,16 +2881,32 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     assert(ex2.getMessage.contains("The input to function map_zip_with should have " +
       "been two maps with compatible key types"))
 
+    val ex2a = intercept[AnalysisException] {
+      df.select(map_zip_with(df("mis"), col("mmi"), (x, y, z) => concat(x, y, z)))
+    }
+    assert(ex2a.getMessage.contains("The input to function map_zip_with should have " +
+      "been two maps with compatible key types"))
+
     val ex3 = intercept[AnalysisException] {
       df.selectExpr("map_zip_with(i, mis, (x, y, z) -> concat(x, y, z))")
     }
     assert(ex3.getMessage.contains("type mismatch: argument 1 requires map type"))
 
+    val ex3a = intercept[AnalysisException] {
+      df.select(map_zip_with(col("i"), col("mis"), (x, y, z) => concat(x, y, z)))
+    }
+    assert(ex3a.getMessage.contains("type mismatch: argument 1 requires map type"))
+
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("map_zip_with(mis, i, (x, y, z) -> concat(x, y, z))")
     }
     assert(ex4.getMessage.contains("type mismatch: argument 2 requires map type"))
 
+    val ex4a = intercept[AnalysisException] {
+      df.select(map_zip_with(col("mis"), col("i"), (x, y, z) => concat(x, y, z)))
+    }
+    assert(ex4a.getMessage.contains("type mismatch: argument 2 requires map type"))
+
     val ex5 = intercept[AnalysisException] {
       df.selectExpr("map_zip_with(mmi, mmi, (x, y, z) -> x)")
     }
@@ -2644,27 +2935,59 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(dfExample1.selectExpr("transform_keys(i, (k, v) -> k + v)"),
         Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
 
+      checkAnswer(dfExample1.select(transform_keys(col("i"), (k, v) => k + v)),
+        Seq(Row(Map(2 -> 1, 18 -> 9, 16 -> 8, 14 -> 7))))
+
       checkAnswer(dfExample2.selectExpr("transform_keys(j, " +
         "(k, v) -> map_from_arrays(ARRAY(1, 2, 3), ARRAY('one', 'two', 'three'))[k])"),
         Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
 
+      checkAnswer(dfExample2.select(
+          transform_keys(
+            col("j"),
+            (k, v) => element_at(
+              map_from_arrays(
+                array(lit(1), lit(2), lit(3)),
+                array(lit("one"), lit("two"), lit("three"))
+              ),
+              k
+            )
+          )
+        ),
+        Seq(Row(Map("one" -> 1.0, "two" -> 1.4, "three" -> 1.7))))
+
       checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> CAST(v * 2 AS BIGINT) + k)"),
         Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
 
+      checkAnswer(dfExample2.select(transform_keys(col("j"),
+        (k, v) => (v * 2).cast("bigint") + k)),
+        Seq(Row(Map(3 -> 1.0, 4 -> 1.4, 6 -> 1.7))))
+
       checkAnswer(dfExample2.selectExpr("transform_keys(j, (k, v) -> k + v)"),
         Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
 
+      checkAnswer(dfExample2.select(transform_keys(col("j"), (k, v) => k + v)),
+        Seq(Row(Map(2.0 -> 1.0, 3.4 -> 1.4, 4.7 -> 1.7))))
+
       checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) ->  k % 2 = 0 OR v)"),
         Seq(Row(Map(true -> true, true -> false))))
 
+      checkAnswer(dfExample3.select(transform_keys(col("x"), (k, v) => k % 2 === 0 || v)),
+        Seq(Row(Map(true -> true, true -> false))))
+
       checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
         Seq(Row(Map(50 -> true, 78 -> false))))
 
-      checkAnswer(dfExample3.selectExpr("transform_keys(x, (k, v) -> if(v, 2 * k, 3 * k))"),
+      checkAnswer(dfExample3.select(transform_keys(col("x"),
+        (k, v) => when(v, k * 2).otherwise(k * 3))),
         Seq(Row(Map(50 -> true, 78 -> false))))
 
       checkAnswer(dfExample4.selectExpr("transform_keys(y, (k, v) -> array_contains(k, 3) AND v)"),
         Seq(Row(Map(false -> false))))
+
+      checkAnswer(dfExample4.select(transform_keys(col("y"),
+        (k, v) => array_contains(k, lit(3)) && v)),
+        Seq(Row(Map(false -> false))))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2702,6 +3025,11 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex3.getMessage.contains("Cannot use null as map key"))
 
+    val ex3a = intercept[Exception] {
+      dfExample1.select(transform_keys(col("i"), (k, v) => v)).show()
+    }
+    assert(ex3a.getMessage.contains("Cannot use null as map key"))
+
     val ex4 = intercept[AnalysisException] {
       dfExample2.selectExpr("transform_keys(j, (k, v) -> k + 1)")
     }
@@ -2766,6 +3094,46 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(
         dfExample5.selectExpr("transform_values(c, (k, v) -> k + cardinality(v))"),
         Seq(Row(Map(1 -> 3))))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => k + v)),
+        Seq(Row(Map(1 -> 2, 9 -> 18, 8 -> 16, 7 -> 14))))
+
+      checkAnswer(dfExample2.select(
+        transform_values(col("x"), (k, v) => when(k, v).otherwise(k.cast("string")))),
+        Seq(Row(Map(false -> "false", true -> "def"))))
+
+      checkAnswer(dfExample2.select(transform_values(col("x"),
+        (k, v) => (!k) && v === "abc")),
+        Seq(Row(Map(false -> true, true -> false))))
+
+      checkAnswer(dfExample3.select(transform_values(col("y"), (k, v) => v * v)),
+        Seq(Row(Map("a" -> 1, "b" -> 4, "c" -> 9))))
+
+      checkAnswer(dfExample3.select(
+        transform_values(col("y"), (k, v) => concat(k, lit(":"), v.cast("string")))),
+        Seq(Row(Map("a" -> "a:1", "b" -> "b:2", "c" -> "c:3"))))
+
+      checkAnswer(
+        dfExample3.select(transform_values(col("y"), (k, v) => concat(k, v.cast("string")))),
+        Seq(Row(Map("a" -> "a1", "b" -> "b2", "c" -> "c3"))))
+
+      val testMap = map_from_arrays(
+        array(lit(1), lit(2), lit(3)),
+        array(lit("one"), lit("two"), lit("three"))
+      )
+
+      checkAnswer(
+        dfExample4.select(transform_values(col("z"),
+          (k, v) => concat(element_at(testMap, k), lit("_"), v.cast("string")))),
+        Seq(Row(Map(1 -> "one_1.0", 2 -> "two_1.4", 3 ->"three_1.7"))))
+
+      checkAnswer(
+        dfExample4.select(transform_values(col("z"), (k, v) => k - v)),
+        Seq(Row(Map(1 -> 0.0, 2 -> 0.6000000000000001, 3 -> 1.3))))
+
+      checkAnswer(
+        dfExample5.select(transform_values(col("c"), (k, v) => k + size(v))),
+        Seq(Row(Map(1 -> 3))))
     }
 
     // Test with local relation, the Project will be evaluated without codegen
@@ -2809,6 +3177,28 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
       checkAnswer(dfExample2.selectExpr("transform_values(j, (k, v) -> k + cast(v as BIGINT))"),
         Seq(Row(Map.empty[BigInt, BigInt])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"),
+        (k, v) => lit(null).cast("int"))),
+        Seq(Row(Map.empty[Integer, Integer])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => k)),
+        Seq(Row(Map.empty[Integer, Integer])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => v)),
+        Seq(Row(Map.empty[Integer, Integer])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => lit(0))),
+        Seq(Row(Map.empty[Integer, Integer])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => lit("value"))),
+        Seq(Row(Map.empty[Integer, String])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => lit(true))),
+        Seq(Row(Map.empty[Integer, Boolean])))
+
+      checkAnswer(dfExample1.select(transform_values(col("i"), (k, v) => v.cast("bigint"))),
+        Seq(Row(Map.empty[BigInt, BigInt])))
     }
 
     testEmpty()
@@ -2833,6 +3223,15 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(dfExample2.selectExpr(
         "transform_values(b, (k, v) -> IF(v IS NULL, k + 1, k + 2))"),
         Seq(Row(Map(1 -> 3, 2 -> 4, 3 -> 4))))
+
+      checkAnswer(dfExample1.select(transform_values(col("a"),
+        (k, v) => lit(null).cast("int"))),
+        Seq(Row(Map[Int, Integer](1 -> null, 2 -> null, 3 -> null, 4 -> null))))
+
+      checkAnswer(dfExample2.select(
+        transform_values(col("b"), (k, v) => when(v.isNull, k + 1).otherwise(k + 2))
+        ),
+        Seq(Row(Map(1 -> 3, 2 -> 4, 3 -> 4))))
     }
 
     testNullValue()
@@ -2871,6 +3270,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       }
       assert(ex3.getMessage.contains(
         "data type mismatch: argument 1 requires map type"))
+
+      val ex3a = intercept[AnalysisException] {
+        dfExample3.select(transform_values(col("x"), (k, v) => k + 1))
+      }
+      assert(ex3a.getMessage.contains(
+        "data type mismatch: argument 1 requires map type"))
     }
 
     testInvalidLambdaFunctions()
@@ -2897,10 +3302,15 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(Seq.empty),
       Row(null))
     checkAnswer(df1.selectExpr("zip_with(val1, val2, (x, y) -> x + y)"), expectedValue1)
+    checkAnswer(df1.select(zip_with(df1("val1"), df1("val2"), (x, y) => x + y)), expectedValue1)
     val expectedValue2 = Seq(
       Row(Seq(Row(1L, 1), Row(2L, null), Row(null, 3))),
       Row(Seq(Row(4L, 1), Row(11L, 2), Row(null, 3))))
     checkAnswer(df2.selectExpr("zip_with(val1, val2, (x, y) -> (y, x))"), expectedValue2)
+    checkAnswer(
+      df2.select(zip_with(df2("val1"), df2("val2"), (x, y) => struct(y, x))),
+      expectedValue2
+    )
   }
 
   test("arrays zip_with function - for non-primitive types") {
@@ -2915,7 +3325,14 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(Seq(Row("x", "a"), Row("y", null))),
       Row(Seq.empty),
       Row(null))
-    checkAnswer(df.selectExpr("zip_with(val1, val2, (x, y) -> (y, x))"), expectedValue1)
+    checkAnswer(
+      df.selectExpr("zip_with(val1, val2, (x, y) -> (y, x))"),
+      expectedValue1
+    )
+    checkAnswer(
+      df.select(zip_with(col("val1"), col("val2"), (x, y) => struct(y, x))),
+      expectedValue1
+    )
   }
 
   test("arrays zip_with function - invalid") {
@@ -2937,6 +3354,10 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       df.selectExpr("zip_with(i, a2, (acc, x) -> x)")
     }
     assert(ex3.getMessage.contains("data type mismatch: argument 1 requires array type"))
+    val ex3a = intercept[AnalysisException] {
+      df.select(zip_with(df("i"), df("a2"), (acc, x) => x))
+    }
+    assert(ex3a.getMessage.contains("data type mismatch: argument 1 requires array type"))
     val ex4 = intercept[AnalysisException] {
       df.selectExpr("zip_with(a1, a, (acc, x) -> x)")
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index aeee4577d3483..75642a0bd9325 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -231,6 +231,41 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  def createDFsWithSameFieldsName(): (DataFrame, DataFrame) = {
+    val df1 = Seq(
+      ("f1-1", "f2", null),
+      ("f1-2", null, null),
+      ("f1-3", "f2", "f3-1"),
+      ("f1-4", "f2", "f3-1")
+    ).toDF("f1", "f2", "f3")
+    val df2 = Seq(
+      ("f1-1", null, null),
+      ("f1-2", "f2", null),
+      ("f1-3", "f2", "f4-1")
+    ).toDF("f1", "f2", "f4")
+    (df1, df2)
+  }
+
+  test("fill unambiguous field for join operation") {
+    val (df1, df2) = createDFsWithSameFieldsName()
+    val joined_df = df1.join(df2, Seq("f1"), joinType = "left_outer")
+    checkAnswer(joined_df.na.fill("", cols = Seq("f4")),
+      Row("f1-1", "f2", null, null, "") ::
+        Row("f1-2", null, null, "f2", "") ::
+        Row("f1-3", "f2", "f3-1", "f2", "f4-1") ::
+        Row("f1-4", "f2", "f3-1", null, "") :: Nil)
+  }
+
+  test("fill ambiguous field for join operation") {
+    val (df1, df2) = createDFsWithSameFieldsName()
+    val joined_df = df1.join(df2, Seq("f1"), joinType = "left_outer")
+
+    val message = intercept[AnalysisException] {
+      joined_df.na.fill("", cols = Seq("f2"))
+    }.getMessage
+    assert(message.contains("Reference 'f2' is ambiguous"))
+  }
+
   test("replace") {
     val input = createDF()
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index b4ddfecaee469..48195ad6d7ae7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -21,6 +21,7 @@ import java.io.{ByteArrayOutputStream, File}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.util.UUID
+import java.util.concurrent.atomic.AtomicLong
 
 import scala.util.Random
 
@@ -161,10 +162,10 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
       DecimalData(BigDecimal("1"* 20 + ".123"), BigDecimal("1"* 20 + ".123")) ::
         DecimalData(BigDecimal("9"* 20 + ".123"), BigDecimal("9"* 20 + ".123")) :: Nil).toDF()
 
-    Seq(true, false).foreach { nullOnOverflow =>
-      withSQLConf((SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key, nullOnOverflow.toString)) {
+    Seq(true, false).foreach { ansiEnabled =>
+      withSQLConf((SQLConf.ANSI_ENABLED.key, ansiEnabled.toString)) {
         val structDf = largeDecimals.select("a").agg(sum("a"))
-        if (nullOnOverflow) {
+        if (!ansiEnabled) {
           checkAnswer(structDf, Row(null))
         } else {
           val e = intercept[SparkException] {
@@ -2105,17 +2106,17 @@ class DataFrameSuite extends QueryTest with SharedSparkSession {
         // partitions.
         .write.partitionBy("p").option("compression", "gzip").json(path.getCanonicalPath)
 
-      var numJobs = 0
+      val numJobs = new AtomicLong(0)
       sparkContext.addSparkListener(new SparkListener {
         override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
-          numJobs += 1
+          numJobs.incrementAndGet()
         }
       })
 
       val df = spark.read.json(path.getCanonicalPath)
       assert(df.columns === Array("i", "p"))
-      spark.sparkContext.listenerBus.waitUntilEmpty(10000)
-      assert(numJobs == 1)
+      spark.sparkContext.listenerBus.waitUntilEmpty()
+      assert(numJobs.get() == 1L)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
new file mode 100644
index 0000000000000..ce0a5f21fd7ec
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -0,0 +1,507 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.collection.JavaConverters._
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
+import org.apache.spark.sql.connector.InMemoryTableCatalog
+import org.apache.spark.sql.connector.catalog.{Identifier, TableCatalog}
+import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
+
+class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with BeforeAndAfter {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import org.apache.spark.sql.functions._
+  import testImplicits._
+
+  private def catalog(name: String): TableCatalog = {
+    spark.sessionState.catalogManager.catalog(name).asTableCatalog
+  }
+
+  before {
+    spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+
+    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+    df.createOrReplaceTempView("source")
+    val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data")
+    df2.createOrReplaceTempView("source2")
+  }
+
+  after {
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.clear()
+  }
+
+  test("Append: basic append") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    spark.table("source2").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"), Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+  }
+
+  test("Append: by name not position") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    val exc = intercept[AnalysisException] {
+      spark.table("source").withColumnRenamed("data", "d").writeTo("testcat.table_name").append()
+    }
+
+    assert(exc.getMessage.contains("Cannot find data for output column"))
+    assert(exc.getMessage.contains("'data'"))
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq())
+  }
+
+  test("Append: fail if table does not exist") {
+    val exc = intercept[NoSuchTableException] {
+      spark.table("source").writeTo("testcat.table_name").append()
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+  }
+
+  test("Overwrite: overwrite by expression: true") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    spark.table("source2").writeTo("testcat.table_name").overwrite(lit(true))
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+  }
+
+  test("Overwrite: overwrite by expression: id = 3") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    spark.table("source2").writeTo("testcat.table_name").overwrite($"id" === 3)
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+  }
+
+  test("Overwrite: by name not position") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    val exc = intercept[AnalysisException] {
+      spark.table("source").withColumnRenamed("data", "d")
+          .writeTo("testcat.table_name").overwrite(lit(true))
+    }
+
+    assert(exc.getMessage.contains("Cannot find data for output column"))
+    assert(exc.getMessage.contains("'data'"))
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq())
+  }
+
+  test("Overwrite: fail if table does not exist") {
+    val exc = intercept[NoSuchTableException] {
+      spark.table("source").writeTo("testcat.table_name").overwrite(lit(true))
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+  }
+
+  test("OverwritePartitions: overwrite conflicting partitions") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    spark.table("source2").withColumn("id", $"id" - 2)
+        .writeTo("testcat.table_name").overwritePartitions()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "d"), Row(3L, "e"), Row(4L, "f")))
+  }
+
+  test("OverwritePartitions: overwrite all rows if not partitioned") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    spark.table("source").writeTo("testcat.table_name").append()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    spark.table("source2").writeTo("testcat.table_name").overwritePartitions()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+  }
+
+  test("OverwritePartitions: by name not position") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+    checkAnswer(spark.table("testcat.table_name"), Seq.empty)
+
+    val exc = intercept[AnalysisException] {
+      spark.table("source").withColumnRenamed("data", "d")
+          .writeTo("testcat.table_name").overwritePartitions()
+    }
+
+    assert(exc.getMessage.contains("Cannot find data for output column"))
+    assert(exc.getMessage.contains("'data'"))
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq())
+  }
+
+  test("OverwritePartitions: fail if table does not exist") {
+    val exc = intercept[NoSuchTableException] {
+      spark.table("source").writeTo("testcat.table_name").overwritePartitions()
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+  }
+
+  test("Create: basic behavior") {
+    spark.table("source").writeTo("testcat.table_name").create()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning.isEmpty)
+    assert(table.properties.isEmpty)
+  }
+
+  test("Create: with using") {
+    spark.table("source").writeTo("testcat.table_name").using("foo").create()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning.isEmpty)
+    assert(table.properties === Map("provider" -> "foo").asJava)
+  }
+
+  test("Create: with property") {
+    spark.table("source").writeTo("testcat.table_name").tableProperty("prop", "value").create()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning.isEmpty)
+    assert(table.properties === Map("prop" -> "value").asJava)
+  }
+
+  test("Create: identity partitioned table") {
+    spark.table("source").writeTo("testcat.table_name").partitionedBy($"id").create()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning === Seq(IdentityTransform(FieldReference("id"))))
+    assert(table.properties.isEmpty)
+  }
+
+  test("Create: partitioned by years(ts)") {
+    spark.table("source")
+        .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp"))
+        .writeTo("testcat.table_name")
+        .tableProperty("allow-unsupported-transforms", "true")
+        .partitionedBy(years($"ts"))
+        .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(YearsTransform(FieldReference("ts"))))
+  }
+
+  test("Create: partitioned by months(ts)") {
+    spark.table("source")
+        .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp"))
+        .writeTo("testcat.table_name")
+        .tableProperty("allow-unsupported-transforms", "true")
+        .partitionedBy(months($"ts"))
+        .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(MonthsTransform(FieldReference("ts"))))
+  }
+
+  test("Create: partitioned by days(ts)") {
+    spark.table("source")
+        .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp"))
+        .writeTo("testcat.table_name")
+        .tableProperty("allow-unsupported-transforms", "true")
+        .partitionedBy(days($"ts"))
+        .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(DaysTransform(FieldReference("ts"))))
+  }
+
+  test("Create: partitioned by hours(ts)") {
+    spark.table("source")
+        .withColumn("ts", lit("2019-06-01 10:00:00.000000").cast("timestamp"))
+        .writeTo("testcat.table_name")
+        .tableProperty("allow-unsupported-transforms", "true")
+        .partitionedBy(hours($"ts"))
+        .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(HoursTransform(FieldReference("ts"))))
+  }
+
+  test("Create: partitioned by bucket(4, id)") {
+    spark.table("source")
+        .writeTo("testcat.table_name")
+        .tableProperty("allow-unsupported-transforms", "true")
+        .partitionedBy(bucket(4, $"id"))
+        .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning ===
+        Seq(BucketTransform(LiteralValue(4, IntegerType), Seq(FieldReference("id")))))
+  }
+
+  test("Create: fail if table already exists") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+    val exc = intercept[TableAlreadyExistsException] {
+      spark.table("source").writeTo("testcat.table_name").create()
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // table should not have been changed
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning === Seq(IdentityTransform(FieldReference("id"))))
+    assert(table.properties === Map("provider" -> "foo").asJava)
+  }
+
+  test("Replace: basic behavior") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+    spark.sql("INSERT INTO TABLE testcat.table_name SELECT * FROM source")
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the initial table
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning === Seq(IdentityTransform(FieldReference("id"))))
+    assert(table.properties === Map("provider" -> "foo").asJava)
+
+    spark.table("source2")
+        .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd"))
+        .writeTo("testcat.table_name").replace()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d", "even"), Row(5L, "e", "odd"), Row(6L, "f", "even")))
+
+    val replaced = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the replacement table
+    assert(replaced.name === "testcat.table_name")
+    assert(replaced.schema === new StructType()
+        .add("id", LongType)
+        .add("data", StringType)
+        .add("even_or_odd", StringType))
+    assert(replaced.partitioning.isEmpty)
+    assert(replaced.properties.isEmpty)
+  }
+
+  test("Replace: partitioned table") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+    spark.sql("INSERT INTO TABLE testcat.table_name SELECT * FROM source")
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the initial table
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning.isEmpty)
+    assert(table.properties === Map("provider" -> "foo").asJava)
+
+    spark.table("source2")
+        .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd"))
+        .writeTo("testcat.table_name").partitionedBy($"id").replace()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d", "even"), Row(5L, "e", "odd"), Row(6L, "f", "even")))
+
+    val replaced = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the replacement table
+    assert(replaced.name === "testcat.table_name")
+    assert(replaced.schema === new StructType()
+        .add("id", LongType)
+        .add("data", StringType)
+        .add("even_or_odd", StringType))
+    assert(replaced.partitioning === Seq(IdentityTransform(FieldReference("id"))))
+    assert(replaced.properties.isEmpty)
+  }
+
+  test("Replace: fail if table does not exist") {
+    val exc = intercept[CannotReplaceMissingTableException] {
+      spark.table("source").writeTo("testcat.table_name").replace()
+    }
+
+    assert(exc.getMessage.contains("table_name"))
+  }
+
+  test("CreateOrReplace: table does not exist") {
+    spark.table("source2").writeTo("testcat.table_name").createOrReplace()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+
+    val replaced = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the replacement table
+    assert(replaced.name === "testcat.table_name")
+    assert(replaced.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(replaced.partitioning.isEmpty)
+    assert(replaced.properties.isEmpty)
+  }
+
+  test("CreateOrReplace: table exists") {
+    spark.sql(
+      "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+    spark.sql("INSERT INTO TABLE testcat.table_name SELECT * FROM source")
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the initial table
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning === Seq(IdentityTransform(FieldReference("id"))))
+    assert(table.properties === Map("provider" -> "foo").asJava)
+
+    spark.table("source2")
+        .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd"))
+        .writeTo("testcat.table_name").createOrReplace()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d", "even"), Row(5L, "e", "odd"), Row(6L, "f", "even")))
+
+    val replaced = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the replacement table
+    assert(replaced.name === "testcat.table_name")
+    assert(replaced.schema === new StructType()
+        .add("id", LongType)
+        .add("data", StringType)
+        .add("even_or_odd", StringType))
+    assert(replaced.partitioning.isEmpty)
+    assert(replaced.properties.isEmpty)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 42e5ee58954e8..9c50e374f74de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -21,6 +21,7 @@ import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.sql.{Date, Timestamp}
 
 import org.scalatest.exceptions.TestFailedException
+import org.scalatest.prop.TableDrivenPropertyChecks._
 
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.sql.catalyst.ScroogeLikeExample
@@ -1841,6 +1842,24 @@ class DatasetSuite extends QueryTest with SharedSparkSession {
     val instant = java.time.Instant.parse("2019-03-30T09:54:00Z")
     assert(spark.range(1).map { _ => instant }.head === instant)
   }
+
+  val dotColumnTestModes = Table(
+    ("caseSensitive", "colName"),
+    ("true", "field.1"),
+    ("false", "Field.1")
+  )
+
+  test("SPARK-25153: Improve error messages for columns with dots/periods") {
+    forAll(dotColumnTestModes) { (caseSensitive, colName) =>
+      val ds = Seq(SpecialCharClass("1", "2")).toDS
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) {
+        val errorMsg = intercept[AnalysisException] {
+          ds(colName)
+        }
+        assert(errorMsg.getMessage.contains(s"did you mean to quote the `$colName` column?"))
+      }
+    }
+  }
 }
 
 object AssertExecutionId {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index 2fef05f97e57c..99189a96b2995 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{DoubleType, StructField, StructType}
 import org.apache.spark.unsafe.types.CalendarInterval
 
 class DateFunctionsSuite extends QueryTest with SharedSparkSession {
@@ -796,4 +797,13 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
         Seq(Row(Instant.parse(timestamp))))
     }
   }
+
+  test("handling null field by date_part") {
+    val input = Seq(Date.valueOf("2019-09-20")).toDF("d")
+    Seq("date_part(null, d)", "date_part(null, date'2019-09-20')").foreach { expr =>
+      val df = input.selectExpr(expr)
+      assert(df.schema.headOption.get.dataType == DoubleType)
+      checkAnswer(df, Row(null))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
index 09221efe28e15..e522aa1809d29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -22,6 +22,7 @@ import org.scalatest.GivenWhenThen
 import org.apache.spark.sql.catalyst.expressions.{DynamicPruningExpression, Expression}
 import org.apache.spark.sql.catalyst.plans.ExistenceJoin
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec}
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
 import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingQueryWrapper}
 import org.apache.spark.sql.functions._
@@ -161,22 +162,36 @@ class DynamicPartitionPruningSuite
       df: DataFrame,
       withSubquery: Boolean,
       withBroadcast: Boolean): Unit = {
-    val dpExprs = collectDynamicPruningExpressions(df.queryExecution.executedPlan)
+    val plan = df.queryExecution.executedPlan
+    val dpExprs = collectDynamicPruningExpressions(plan)
     val hasSubquery = dpExprs.exists {
       case InSubqueryExec(_, _: SubqueryExec, _, _) => true
       case _ => false
     }
-    val hasSubqueryBroadcast = dpExprs.exists {
-      case InSubqueryExec(_, _: SubqueryBroadcastExec, _, _) => true
-      case _ => false
+    val subqueryBroadcast = dpExprs.collect {
+      case InSubqueryExec(_, b: SubqueryBroadcastExec, _, _) => b
     }
 
     val hasFilter = if (withSubquery) "Should" else "Shouldn't"
     assert(hasSubquery == withSubquery,
       s"$hasFilter trigger DPP with a subquery duplicate:\n${df.queryExecution}")
     val hasBroadcast = if (withBroadcast) "Should" else "Shouldn't"
-    assert(hasSubqueryBroadcast == withBroadcast,
+    assert(subqueryBroadcast.nonEmpty == withBroadcast,
       s"$hasBroadcast trigger DPP with a reused broadcast exchange:\n${df.queryExecution}")
+
+    subqueryBroadcast.foreach { s =>
+      s.child match {
+        case _: ReusedExchangeExec => // reuse check ok.
+        case b: BroadcastExchangeExec =>
+          val hasReuse = plan.find {
+            case ReusedExchangeExec(_, e) => e eq b
+            case _ => false
+          }.isDefined
+          assert(hasReuse, s"$s\nshould have been reused in\n$plan")
+        case _ =>
+          fail(s"Invalid child node found in\n$s")
+      }
+    }
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 23848d90dc53d..d08f4b9066d2b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -481,14 +481,14 @@ class FileBasedDataSourceSuite extends QueryTest with SharedSparkSession {
         spark.range(1000).repartition(1).write.csv(path)
         val bytesReads = new mutable.ArrayBuffer[Long]()
         val bytesReadListener = new SparkListener() {
-          override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+          override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
             bytesReads += taskEnd.taskMetrics.inputMetrics.bytesRead
           }
         }
         sparkContext.addSparkListener(bytesReadListener)
         try {
           spark.read.csv(path).limit(1).collect()
-          sparkContext.listenerBus.waitUntilEmpty(1000L)
+          sparkContext.listenerBus.waitUntilEmpty()
           assert(bytesReads.sum === 7860)
         } finally {
           sparkContext.removeSparkListener(bytesReadListener)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 92a4acc130be5..e55d2bbe00e63 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import java.sql.{Date, Timestamp}
 import java.text.SimpleDateFormat
 import java.util.Locale
 
@@ -608,4 +609,22 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(df, Row(Row(java.sql.Timestamp.valueOf("2018-11-06 18:00:00.0"))))
     }
   }
+
+  test("special timestamp values") {
+    Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue =>
+      val input = Seq(s"""{"t": "$specialValue"}""").toDS()
+      val readback = input.select(from_json($"value", lit("t timestamp"),
+        Map.empty[String, String].asJava)).collect()
+      assert(readback(0).getAs[Row](0).getAs[Timestamp](0).getTime >= 0)
+    }
+  }
+
+  test("special date values") {
+    Seq("now", "today", "epoch", "tomorrow", "yesterday").foreach { specialValue =>
+      val input = Seq(s"""{"d": "$specialValue"}""").toDS()
+      val readback = input.select(from_json($"value", lit("d date"),
+        Map.empty[String, String].asJava)).collect()
+      assert(readback(0).getAs[Row](0).getAs[Date](0).getTime >= 0)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
index 6b90f20a94fa4..36db95ff8a31b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/LocalSparkSession.scala
@@ -27,14 +27,14 @@ trait LocalSparkSession extends BeforeAndAfterEach with BeforeAndAfterAll { self
 
   @transient var spark: SparkSession = _
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     InternalLoggerFactory.setDefaultFactory(Slf4JLoggerFactory.INSTANCE)
     SparkSession.clearActiveSession()
     SparkSession.clearDefaultSession()
   }
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     try {
       LocalSparkSession.stop(spark)
       SparkSession.clearActiveSession()
@@ -47,7 +47,7 @@ trait LocalSparkSession extends BeforeAndAfterEach with BeforeAndAfterAll { self
 }
 
 object LocalSparkSession {
-  def stop(spark: SparkSession) {
+  def stop(spark: SparkSession): Unit = {
     if (spark != null) {
       spark.stop()
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala
new file mode 100644
index 0000000000000..1354dcfda45fe
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/PostgreSQLDialectQuerySuite.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class PostgreSQLDialectQuerySuite extends QueryTest with SharedSparkSession {
+
+  override def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.DIALECT.key, SQLConf.Dialect.POSTGRESQL.toString)
+
+  test("cast string to boolean") {
+    Seq("true", "tru", "tr", "t", "    tRue   ", "    tRu   ", "yes", "ye",
+      "y", "1", "on").foreach { input =>
+      checkAnswer(sql(s"select cast('$input' as boolean)"), Row(true))
+   }
+    Seq("false", "fals", "fal", "fa", "f", "    fAlse   ", "    fAls   ", "no", "n",
+      "0", "off", "of").foreach { input =>
+      checkAnswer(sql(s"select cast('$input' as boolean)"), Row(false))
+    }
+
+    Seq("o", "abc", "").foreach { input =>
+      checkAnswer(sql(s"select cast('$input' as boolean)"), Row(null))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index b8b157e275b61..1afe3976b2a1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -22,9 +22,13 @@ import java.net.{MalformedURLException, URL}
 import java.sql.{Date, Timestamp}
 import java.util.concurrent.atomic.AtomicBoolean
 
+import scala.collection.parallel.immutable.ParVector
+
 import org.apache.spark.{AccumulatorSuite, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
+import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.execution.HiveResult.hiveResultString
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, SortAggregateExec}
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
@@ -115,6 +119,83 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("using _FUNC_ instead of function names in examples") {
+    val exampleRe = "(>.*;)".r
+    val setStmtRe = "(?i)^(>\\s+set\\s+).+".r
+    val ignoreSet = Set(
+      // Examples for CaseWhen show simpler syntax:
+      // `CASE WHEN ... THEN ... WHEN ... THEN ... END`
+      "org.apache.spark.sql.catalyst.expressions.CaseWhen",
+      // _FUNC_ is replaced by `locate` but `locate(... IN ...)` is not supported
+      "org.apache.spark.sql.catalyst.expressions.StringLocate",
+      // _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)`
+      "org.apache.spark.sql.catalyst.expressions.Remainder",
+      // Examples demonstrate alternative names, see SPARK-20749
+      "org.apache.spark.sql.catalyst.expressions.Length")
+    spark.sessionState.functionRegistry.listFunction().foreach { funcId =>
+      val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
+      val className = info.getClassName
+      withClue(s"Expression class '$className'") {
+        val exprExamples = info.getOriginalExamples
+        if (!exprExamples.isEmpty && !ignoreSet.contains(className)) {
+          assert(exampleRe.findAllIn(exprExamples).toIterable
+            .filter(setStmtRe.findFirstIn(_).isEmpty) // Ignore SET commands
+            .forall(_.contains("_FUNC_")))
+        }
+      }
+    }
+  }
+
+  test("check outputs of expression examples") {
+    def unindentAndTrim(s: String): String = {
+      s.replaceAll("\n\\s+", "\n").trim
+    }
+    val beginSqlStmtRe = "  > ".r
+    val endSqlStmtRe = ";\n".r
+    def checkExampleSyntax(example: String): Unit = {
+      val beginStmtNum = beginSqlStmtRe.findAllIn(example).length
+      val endStmtNum = endSqlStmtRe.findAllIn(example).length
+      assert(beginStmtNum === endStmtNum,
+        "The number of ` > ` does not match to the number of `;`")
+    }
+    val exampleRe = """^(.+);\n(?s)(.+)$""".r
+    val ignoreSet = Set(
+      // One of examples shows getting the current timestamp
+      "org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
+      // Random output without a seed
+      "org.apache.spark.sql.catalyst.expressions.Rand",
+      "org.apache.spark.sql.catalyst.expressions.Randn",
+      "org.apache.spark.sql.catalyst.expressions.Shuffle",
+      "org.apache.spark.sql.catalyst.expressions.Uuid",
+      // The example calls methods that return unstable results.
+      "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection")
+
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector)
+      parFuncs.foreach { funcId =>
+        // Examples can change settings. We clone the session to prevent tests clashing.
+        val clonedSpark = spark.cloneSession()
+        val info = clonedSpark.sessionState.catalog.lookupFunctionInfo(funcId)
+        val className = info.getClassName
+        if (!ignoreSet.contains(className)) {
+          withClue(s"Function '${info.getName}', Expression class '$className'") {
+            val example = info.getExamples
+            checkExampleSyntax(example)
+            example.split("  > ").toList.foreach(_ match {
+              case exampleRe(sql, output) =>
+                val df = clonedSpark.sql(sql)
+                val actual = unindentAndTrim(
+                  hiveResultString(df.queryExecution.executedPlan).mkString("\n"))
+                val expected = unindentAndTrim(output)
+                assert(actual === expected)
+              case _ =>
+            })
+          }
+        }
+      }
+    }
+  }
+
   test("SPARK-6743: no columns from cache") {
     Seq(
       (83, 0, 38),
@@ -2609,14 +2690,14 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
     }
 
     // Make sure no spurious job starts are pending in the listener bus.
-    sparkContext.listenerBus.waitUntilEmpty(500)
+    sparkContext.listenerBus.waitUntilEmpty()
     sparkContext.addSparkListener(listener)
     try {
       // Execute the command.
       sql("show databases").head()
 
       // Make sure we have seen all events triggered by DataFrame.show()
-      sparkContext.listenerBus.waitUntilEmpty(500)
+      sparkContext.listenerBus.waitUntilEmpty()
     } finally {
       sparkContext.removeSparkListener(listener)
     }
@@ -3149,6 +3230,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
       checkAnswer(sql("select * from t1 where d > '1999-13'"), Row(result))
       checkAnswer(sql("select to_timestamp('2000-01-01 01:10:00') > '1'"), Row(true))
     }
+    sql("DROP VIEW t1")
   }
 
   test("SPARK-28156: self-join should not miss cached view") {
@@ -3180,6 +3262,59 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession {
     }
 
   }
+
+  test("SPARK-29000: arithmetic computation overflow when don't allow decimal precision loss ") {
+    withSQLConf(SQLConf.DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key -> "false") {
+      val df1 = sql("select case when 1=2 then 1 else 100.000000000000000000000000 end * 1")
+      checkAnswer(df1, Array(Row(100)))
+      val df2 = sql("select case when 1=2 then 1 else 100.000000000000000000000000 end * " +
+        "case when 1=2 then 2 else 1 end")
+      checkAnswer(df2, Array(Row(100)))
+      val df3 = sql("select case when 1=2 then 1 else 1.000000000000000000000001 end / 10")
+      checkAnswer(df3, Array(Row(new java.math.BigDecimal("0.100000000000000000000000100"))))
+    }
+  }
+
+  test("SPARK-29239: Subquery should not cause NPE when eliminating subexpression") {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+        SQLConf.SUBQUERY_REUSE_ENABLED.key -> "false",
+        SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY",
+        SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConvertToLocalRelation.ruleName) {
+      withTempView("t1", "t2") {
+        sql("create temporary view t1 as select * from values ('val1a', 10L) as t1(t1a, t1b)")
+        sql("create temporary view t2 as select * from values ('val3a', 110L) as t2(t2a, t2b)")
+        val df = sql("SELECT min, min from (SELECT (SELECT min(t2b) FROM t2) min " +
+          "FROM t1 WHERE t1a = 'val1c')")
+        assert(df.collect().size == 0)
+      }
+    }
+  }
+
+  test("SPARK-29213: FilterExec should not throw NPE") {
+    withTempView("t1", "t2", "t3") {
+      sql("SELECT ''").as[String].map(identity).toDF("x").createOrReplaceTempView("t1")
+      sql("SELECT * FROM VALUES 0, CAST(NULL AS BIGINT)")
+        .as[java.lang.Long]
+        .map(identity)
+        .toDF("x")
+        .createOrReplaceTempView("t2")
+      sql("SELECT ''").as[String].map(identity).toDF("x").createOrReplaceTempView("t3")
+      sql(
+        """
+          |SELECT t1.x
+          |FROM t1
+          |LEFT JOIN (
+          |    SELECT x FROM (
+          |        SELECT x FROM t2
+          |        UNION ALL
+          |        SELECT SUBSTR(x,5) x FROM t3
+          |    ) a
+          |    WHERE LENGTH(x)>0
+          |) t3
+          |ON t1.x=t3.x
+        """.stripMargin).collect()
+    }
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index 28ca0edaef871..e4b27c69bbc45 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -22,16 +22,19 @@ import java.util.{Locale, TimeZone}
 
 import scala.util.control.NonFatal
 
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.sql.{DescribeColumnStatement, DescribeTableStatement}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
 import org.apache.spark.sql.execution.HiveResult.hiveResultString
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.{DescribeColumnCommand, DescribeCommandBase}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.tags.ExtendedSQLTest
 
 /**
  * End-to-end test cases for SQL queries.
@@ -102,6 +105,7 @@ import org.apache.spark.sql.types.StructType
  * Therefore, UDF test cases should have single input and output files but executed by three
  * different types of UDFs. See 'udf/udf-inner-join.sql' as an example.
  */
+@ExtendedSQLTest
 class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
 
   import IntegratedUDFTestUtils._
@@ -131,6 +135,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
   private val notIncludedMsg = "[not included in comparison]"
   private val clsName = this.getClass.getCanonicalName
 
+  protected override def sparkConf: SparkConf = super.sparkConf
+    // Fewer shuffle partitions to speed up testing.
+    .set(SQLConf.SHUFFLE_PARTITIONS, 4)
+
   /** List of test cases to ignore, in lower cases. */
   protected def blackList: Set[String] = Set(
     "blacklist.sql"   // Do NOT remove this one. It is here to test the blacklist functionality.
@@ -289,10 +297,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
 
     testCase match {
       case udfTestCase: UDFTest =>
-        // In Python UDF tests, the number of shuffle partitions matters considerably in
-        // the testing time because it requires to fork and communicate between external
-        // processes.
-        localSparkSession.conf.set(SQLConf.SHUFFLE_PARTITIONS.key, 4)
         registerTestUDF(udfTestCase.udf, localSparkSession)
       case _ =>
     }
@@ -306,8 +310,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
         localSparkSession.udf.register("vol", (s: String) => s)
         // PostgreSQL enabled cartesian product by default.
         localSparkSession.conf.set(SQLConf.CROSS_JOINS_ENABLED.key, true)
-        localSparkSession.conf.set(SQLConf.ANSI_SQL_PARSER.key, true)
-        localSparkSession.conf.set(SQLConf.PREFER_INTEGRAL_DIVISION.key, true)
+        localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, true)
+        localSparkSession.conf.set(SQLConf.DIALECT.key, SQLConf.Dialect.POSTGRESQL.toString)
       case _ =>
     }
 
@@ -401,7 +405,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
       val df = session.sql(sql)
       val schema = df.schema
       // Get answer, but also get rid of the #1234 expression ids that show up in explain plans
-      val answer = hiveResultString(df.queryExecution.executedPlan).map(replaceNotIncludedMsg)
+      val answer = SQLExecution.withNewExecutionId(session, df.queryExecution, Some(sql)) {
+        hiveResultString(df.queryExecution.executedPlan).map(replaceNotIncludedMsg)
+      }
 
       // If the output is not pre-sorted, sort it.
       if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)
@@ -413,6 +419,12 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
         // with a generic pattern "###".
         val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage
         (StructType(Seq.empty), Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x")))
+      case s: SparkException if s.getCause != null =>
+        // For a runtime exception, it is hard to match because its message contains
+        // information of stage, task ID, etc.
+        // To make result matching simpler, here we match the cause of the exception if it exists.
+        val cause = s.getCause
+        (StructType(Seq.empty), Seq(cause.getClass.getName, cause.getMessage))
       case NonFatal(e) =>
         // If there is an exception, put the exception class followed by the message.
         (StructType(Seq.empty), Seq(e.getClass.getName, e.getMessage))
@@ -438,7 +450,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
       val testCaseName = absPath.stripPrefix(inputFilePath).stripPrefix(File.separator)
 
       if (file.getAbsolutePath.startsWith(
-        s"$inputFilePath${File.separator}udf${File.separator}pgSQL")) {
+        s"$inputFilePath${File.separator}udf${File.separator}postgreSQL")) {
         Seq(TestScalaUDF("udf"), TestPythonUDF("udf"), TestScalarPandasUDF("udf")).map { udf =>
           UDFPgSQLTestCase(
             s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
@@ -448,7 +460,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
           UDFTestCase(
             s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
         }
-      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}pgSQL")) {
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
       } else {
         RegularTestCase(testCaseName, absPath, resultFile) :: Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
index 9a0c61b3304c5..099b559105fe8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SSBQuerySuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.util.resourceToString
  */
 class SSBQuerySuite extends BenchmarkQueryTest {
 
-  override def beforeAll {
+  override def beforeAll: Unit = {
     super.beforeAll
 
     sql(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index 1d461a03fd1f6..31957a99e15af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -158,7 +158,7 @@ class SessionStateSuite extends SparkFunSuite {
       assert(forkedSession ne activeSession)
       assert(forkedSession.listenerManager ne activeSession.listenerManager)
       runCollectQueryOn(forkedSession)
-      activeSession.sparkContext.listenerBus.waitUntilEmpty(1000)
+      activeSession.sparkContext.listenerBus.waitUntilEmpty()
       assert(collectorA.commands.length == 1) // forked should callback to A
       assert(collectorA.commands(0) == "collect")
 
@@ -166,14 +166,14 @@ class SessionStateSuite extends SparkFunSuite {
       // => changes to forked do not affect original
       forkedSession.listenerManager.register(collectorB)
       runCollectQueryOn(activeSession)
-      activeSession.sparkContext.listenerBus.waitUntilEmpty(1000)
+      activeSession.sparkContext.listenerBus.waitUntilEmpty()
       assert(collectorB.commands.isEmpty) // original should not callback to B
       assert(collectorA.commands.length == 2) // original should still callback to A
       assert(collectorA.commands(1) == "collect")
       // <= changes to original do not affect forked
       activeSession.listenerManager.register(collectorC)
       runCollectQueryOn(forkedSession)
-      activeSession.sparkContext.listenerBus.waitUntilEmpty(1000)
+      activeSession.sparkContext.listenerBus.waitUntilEmpty()
       assert(collectorC.commands.isEmpty) // forked should not callback to C
       assert(collectorA.commands.length == 3) // forked should still callback to A
       assert(collectorB.commands.length == 1) // forked should still callback to B
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 74341f93dd5ba..b8df6f2bebf53 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -16,13 +16,15 @@
  */
 package org.apache.spark.sql
 
+import java.util.Locale
+
 import org.apache.spark.{SparkFunSuite, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, UnresolvedHint}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
@@ -122,6 +124,25 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
     }
   }
 
+  case class MyHintRule(spark: SparkSession) extends Rule[LogicalPlan] {
+    val MY_HINT_NAME = Set("CONVERT_TO_EMPTY")
+
+    override def apply(plan: LogicalPlan): LogicalPlan =
+      plan.resolveOperators {
+      case h: UnresolvedHint if MY_HINT_NAME.contains(h.name.toUpperCase(Locale.ROOT)) =>
+        LocalRelation(h.output, data = Seq.empty, isStreaming = h.isStreaming)
+    }
+  }
+
+  test("inject custom hint rule") {
+    withSession(Seq(_.injectPostHocResolutionRule(MyHintRule))) { session =>
+      assert(
+        session.range(1).hint("CONVERT_TO_EMPTY").logicalPlan.isInstanceOf[LocalRelation],
+        "plan is expected to be a local relation"
+      )
+    }
+  }
+
   test("inject columnar") {
     val extensions = create { extensions =>
       extensions.injectColumnar(session =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 88b3e5ec61f8a..ec698818a0d85 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -129,18 +129,37 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       Row("AQIDBA==", bytes))
   }
 
-  test("overlay function") {
+  test("string overlay function") {
     // scalastyle:off
     // non ascii characters are not allowed in the code, so we disable the scalastyle here.
-    val df = Seq(("Spark SQL", "Spark的SQL")).toDF("a", "b")
-    checkAnswer(df.select(overlay($"a", "_", 6)), Row("Spark_SQL"))
-    checkAnswer(df.select(overlay($"a", "CORE", 7)), Row("Spark CORE"))
-    checkAnswer(df.select(overlay($"a", "ANSI ", 7, 0)), Row("Spark ANSI SQL"))
-    checkAnswer(df.select(overlay($"a", "tructured", 2, 4)), Row("Structured SQL"))
-    checkAnswer(df.select(overlay($"b", "_", 6)), Row("Spark_SQL"))
+    val df = Seq(("Spark SQL", "Spark的SQL", "_", "CORE", "ANSI ", "tructured", 6, 7, 0, 2, 4)).
+      toDF("a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k")
+    checkAnswer(df.select(overlay($"a", $"c", $"g")), Row("Spark_SQL"))
+    checkAnswer(df.select(overlay($"a", $"d", $"h")), Row("Spark CORE"))
+    checkAnswer(df.select(overlay($"a", $"e", $"h", $"i")), Row("Spark ANSI SQL"))
+    checkAnswer(df.select(overlay($"a", $"f", $"j", $"k")), Row("Structured SQL"))
+    checkAnswer(df.select(overlay($"b", $"c", $"g")), Row("Spark_SQL"))
     // scalastyle:on
   }
 
+  test("binary overlay function") {
+    // non ascii characters are not allowed in the code, so we disable the scalastyle here.
+    val df = Seq((
+      Array[Byte](1, 2, 3, 4, 5, 6, 7, 8, 9),
+      Array[Byte](-1),
+      Array[Byte](-1, -1, -1, -1),
+      Array[Byte](-1, -1),
+      Array[Byte](-1, -1, -1, -1, -1),
+      6, 7, 0, 2, 4)).toDF("a", "b", "c", "d", "e", "f", "g", "h", "i", "j")
+    checkAnswer(df.select(overlay($"a", $"b", $"f")), Row(Array[Byte](1, 2, 3, 4, 5, -1, 7, 8, 9)))
+    checkAnswer(df.select(overlay($"a", $"c", $"g")),
+      Row(Array[Byte](1, 2, 3, 4, 5, 6, -1, -1, -1, -1)))
+    checkAnswer(df.select(overlay($"a", $"d", $"g", $"h")),
+      Row(Array[Byte](1, 2, 3, 4, 5, 6, -1, -1, 7, 8, 9)))
+    checkAnswer(df.select(overlay($"a", $"e", $"i", $"j")),
+      Row(Array[Byte](1, -1, -1, -1, -1, -1, 6, 7, 8, 9)))
+  }
+
   test("string / binary substring function") {
     // scalastyle:off
     // non ascii characters are not allowed in the code, so we disable the scalastyle here.
@@ -266,7 +285,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("string parse_url function") {
 
-    def testUrl(url: String, expected: Row) {
+    def testUrl(url: String, expected: Row): Unit = {
       checkAnswer(Seq[String]((url)).toDF("url").selectExpr(
         "parse_url(url, 'HOST')", "parse_url(url, 'PATH')",
         "parse_url(url, 'QUERY')", "parse_url(url, 'REF')",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
index a668434a68aff..aacb625d7921f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.internal.SQLConf
  */
 class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema {
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     for (tableName <- tableNames) {
       createTable(spark, tableName)
@@ -82,13 +82,19 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSSchema {
     "q3", "q7", "q10", "q19", "q27", "q34", "q42", "q43", "q46", "q52", "q53", "q55", "q59",
     "q63", "q65", "q68", "q73", "q79", "q89", "q98", "ss_max")
 
+  // List up the known queries having too large code in a generated function.
+  // A JIRA file for `modified-q3` is as follows;
+  // [SPARK-29128] Split predicate code in OR expressions
+  val blackListForMethodCodeSizeCheck = Set("modified-q3")
+
   modifiedTPCDSQueries.foreach { name =>
     val queryString = resourceToString(s"tpcds-modifiedQueries/$name.sql",
       classLoader = Thread.currentThread().getContextClassLoader)
-    test(s"modified-$name") {
+    val testName = s"modified-$name"
+    test(testName) {
       // check the plans can be properly generated
       val plan = sql(queryString).queryExecution.executedPlan
-      checkGeneratedCode(plan)
+      checkGeneratedCode(plan, !blackListForMethodCodeSizeCheck.contains(testName))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
index b32d95d0b286c..ba99e18714b1d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCHQuerySuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.util.resourceToString
  */
 class TPCHQuerySuite extends BenchmarkQueryTest {
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
 
     sql(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 2a034bcdc3f00..268fb65144e72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -360,13 +360,13 @@ class UDFSuite extends QueryTest with SharedSparkSession {
           .withColumn("b", udf1($"a", lit(10)))
         df.cache()
         df.write.saveAsTable("t")
-        sparkContext.listenerBus.waitUntilEmpty(1000)
+        sparkContext.listenerBus.waitUntilEmpty()
         assert(numTotalCachedHit == 1, "expected to be cached in saveAsTable")
         df.write.insertInto("t")
-        sparkContext.listenerBus.waitUntilEmpty(1000)
+        sparkContext.listenerBus.waitUntilEmpty()
         assert(numTotalCachedHit == 2, "expected to be cached in insertInto")
         df.write.save(path.getCanonicalPath)
-        sparkContext.listenerBus.waitUntilEmpty(1000)
+        sparkContext.listenerBus.waitUntilEmpty()
         assert(numTotalCachedHit == 3, "expected to be cached in save for native")
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/AlterTableTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
similarity index 99%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/AlterTableTests.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
index 4b7ee384b4c10..eed07aeff0903 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/AlterTableTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
@@ -49,7 +50,7 @@ trait AlterTableTests extends SharedSparkSession {
       }
 
       assert(exc.getMessage.contains(s"${catalogAndNamespace}table_name"))
-      assert(exc.getMessage.contains("Table or view not found"))
+      assert(exc.getMessage.contains("Table not found"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
similarity index 91%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSessionCatalogSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index fee6962501637..207ece83759ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -15,22 +15,18 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import java.util
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, SaveMode}
-import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, TableCatalog, TableChange}
-import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.catalog.v2.utils.CatalogV2Util
+import org.apache.spark.sql.{DataFrame, QueryTest, SaveMode}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.connector.InMemoryTable
-import org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog
-import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode, V2_SESSION_CATALOG}
-import org.apache.spark.sql.sources.v2.utils.TestV2SessionCatalogBase
+import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -39,8 +35,6 @@ class DataSourceV2DataFrameSessionCatalogSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false)
   with SessionCatalogTest[InMemoryTable, InMemoryTableSessionCatalog] {
 
-  import testImplicits._
-
   override protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = {
     val dfw = insert.write.format(v2Format)
     if (mode != null) {
@@ -89,7 +83,7 @@ class DataSourceV2DataFrameSessionCatalogSuite
     val t1 = "prop_table"
     withTable(t1) {
       spark.range(20).write.format(v2Format).option("path", "abc").saveAsTable(t1)
-      val cat = spark.sessionState.catalogManager.v2SessionCatalog.get.asInstanceOf[TableCatalog]
+      val cat = spark.sessionState.catalogManager.v2SessionCatalog.asInstanceOf[TableCatalog]
       val tableInfo = cat.loadTable(Identifier.of(Array.empty, t1))
       assert(tableInfo.properties().get("location") === "abc")
       assert(tableInfo.properties().get("provider") === v2Format)
@@ -136,7 +130,7 @@ class InMemoryTableSessionCatalog extends TestV2SessionCatalogBase[InMemoryTable
   }
 }
 
-private[v2] trait SessionCatalogTest[T <: Table, Catalog <: TestV2SessionCatalogBase[T]]
+private [connector] trait SessionCatalogTest[T <: Table, Catalog <: TestV2SessionCatalogBase[T]]
   extends QueryTest
   with SharedSparkSession
   with BeforeAndAfter {
@@ -156,7 +150,7 @@ private[v2] trait SessionCatalogTest[T <: Table, Catalog <: TestV2SessionCatalog
   override def afterEach(): Unit = {
     super.afterEach()
     catalog("session").asInstanceOf[Catalog].clearTables()
-    spark.conf.set(V2_SESSION_CATALOG.key, classOf[V2SessionCatalog].getName)
+    spark.conf.unset(V2_SESSION_CATALOG.key)
   }
 
   protected def verifyTable(tableName: String, expected: DataFrame): Unit
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
similarity index 91%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
index abccb5cec6752..8e921aaeb66f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import org.apache.spark.sql.{DataFrame, Row, SaveMode}
-import org.apache.spark.sql.connector.InMemoryTableCatalog
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 
 class DataSourceV2DataFrameSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = false) {
@@ -76,13 +76,15 @@ class DataSourceV2DataFrameSuite
     withTable(t1) {
       sql(s"CREATE TABLE $t1 (id bigint, data string) USING foo")
       val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
-      // Default saveMode is append, therefore this doesn't throw a table already exists exception
-      df.write.saveAsTable(t1)
+      // Default saveMode is ErrorIfExists
+      intercept[TableAlreadyExistsException] {
+        df.write.saveAsTable(t1)
+      }
+      assert(spark.table(t1).count() === 0)
+
+      // appends are by name not by position
+      df.select('data, 'id).write.mode("append").saveAsTable(t1)
       checkAnswer(spark.table(t1), df)
-
-      // also appends are by name not by position
-      df.select('data, 'id).write.saveAsTable(t1)
-      checkAnswer(spark.table(t1), df.union(df))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
similarity index 88%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSessionCatalogSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
index cfbafdb65c7c3..e1a5dbe3351ec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
@@ -15,12 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import org.apache.spark.sql.{DataFrame, SaveMode}
-import org.apache.spark.sql.catalog.v2.{Identifier, TableCatalog}
-import org.apache.spark.sql.connector.InMemoryTable
-import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode}
+import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog}
 
 class DataSourceV2SQLSessionCatalogSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = true)
@@ -46,7 +44,7 @@ class DataSourceV2SQLSessionCatalogSuite
   }
 
   override def getTableMetadata(tableName: String): Table = {
-    val v2Catalog = spark.sessionState.catalogManager.v2SessionCatalog.get
+    val v2Catalog = spark.sessionState.catalogManager.v2SessionCatalog
     val nameParts = spark.sessionState.sqlParser.parseMultipartIdentifier(tableName)
     v2Catalog.asInstanceOf[TableCatalog]
       .loadTable(Identifier.of(Array.empty, nameParts.last))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
similarity index 82%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index b6e7bc5d1a4dc..ddb8938cea901 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -15,27 +15,24 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.SparkException
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalog.v2.{CatalogPlugin, Identifier, TableCatalog}
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchDatabaseException, NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.connector.{InMemoryTable, InMemoryTableCatalog, StagingInMemoryTableCatalog}
-import org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog
+import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG
-import org.apache.spark.sql.sources.v2.internal.V1Table
-import org.apache.spark.sql.types.{ArrayType, BooleanType, DoubleType, IntegerType, LongType, MapType, StringType, StructField, StructType, TimestampType}
+import org.apache.spark.sql.sources.SimpleScanSource
+import org.apache.spark.sql.types.{BooleanType, LongType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class DataSourceV2SQLSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = true)
   with AlterTableTests {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   private val v2Source = classOf[FakeV2Provider].getName
   override protected val v2Format = v2Source
@@ -512,7 +509,8 @@ class DataSourceV2SQLSuite
   }
 
   test("CreateTableAsSelect: v2 session catalog can load v1 source table") {
-    spark.conf.set(V2_SESSION_CATALOG.key, classOf[V2SessionCatalog].getName)
+    // unset this config to use the default v2 session catalog.
+    spark.conf.unset(V2_SESSION_CATALOG.key)
 
     val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
     df.createOrReplaceTempView("source")
@@ -747,6 +745,155 @@ class DataSourceV2SQLSuite
     assert(expected === df.collect())
   }
 
+  test("ShowNamespaces: show root namespaces with default v2 catalog") {
+    spark.conf.set("spark.sql.default.catalog", "testcat")
+
+    testShowNamespaces("SHOW NAMESPACES", Seq())
+
+    spark.sql("CREATE TABLE testcat.ns1.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns1.ns1_1.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns2.table (id bigint) USING foo")
+
+    testShowNamespaces("SHOW NAMESPACES", Seq("ns1", "ns2"))
+    testShowNamespaces("SHOW NAMESPACES LIKE '*1*'", Seq("ns1"))
+  }
+
+  test("ShowNamespaces: show namespaces with v2 catalog") {
+    spark.sql("CREATE TABLE testcat.ns1.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns1.ns1_1.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns1.ns1_2.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns2.table (id bigint) USING foo")
+    spark.sql("CREATE TABLE testcat.ns2.ns2_1.table (id bigint) USING foo")
+
+    // Look up only with catalog name, which should list root namespaces.
+    testShowNamespaces("SHOW NAMESPACES IN testcat", Seq("ns1", "ns2"))
+
+    // Look up sub-namespaces.
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1", Seq("ns1.ns1_1", "ns1.ns1_2"))
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1 LIKE '*2*'", Seq("ns1.ns1_2"))
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns2", Seq("ns2.ns2_1"))
+
+    // Try to look up namespaces that do not exist.
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns3", Seq())
+    testShowNamespaces("SHOW NAMESPACES IN testcat.ns1.ns3", Seq())
+  }
+
+  test("ShowNamespaces: default v2 catalog is not set") {
+    spark.sql("CREATE TABLE testcat.ns.table (id bigint) USING foo")
+
+    val exception = intercept[AnalysisException] {
+      sql("SHOW NAMESPACES")
+    }
+
+    assert(exception.getMessage.contains(
+      "SHOW NAMESPACES is not supported with the session catalog"))
+  }
+
+  test("ShowNamespaces: default v2 catalog doesn't support namespace") {
+    spark.conf.set(
+      "spark.sql.catalog.testcat_no_namspace",
+      classOf[BasicInMemoryTableCatalog].getName)
+    spark.conf.set("spark.sql.default.catalog", "testcat_no_namspace")
+
+    val exception = intercept[AnalysisException] {
+      sql("SHOW NAMESPACES")
+    }
+
+    assert(exception.getMessage.contains("does not support namespaces"))
+  }
+
+  test("ShowNamespaces: v2 catalog doesn't support namespace") {
+    spark.conf.set(
+      "spark.sql.catalog.testcat_no_namspace",
+      classOf[BasicInMemoryTableCatalog].getName)
+
+    val exception = intercept[AnalysisException] {
+      sql("SHOW NAMESPACES in testcat_no_namspace")
+    }
+
+    assert(exception.getMessage.contains("does not support namespaces"))
+  }
+
+  test("ShowNamespaces: session catalog") {
+    val exception = intercept[AnalysisException] {
+      sql("SHOW NAMESPACES in dummy")
+    }
+
+    assert(exception.getMessage.contains(
+      "SHOW NAMESPACES is not supported with the session catalog"))
+  }
+
+  private def testShowNamespaces(
+      sqlText: String,
+      expected: Seq[String]): Unit = {
+    val schema = new StructType().add("namespace", StringType, nullable = false)
+
+    val df = spark.sql(sqlText)
+    assert(df.schema === schema)
+    assert(df.collect().map(_.getAs[String](0)).sorted === expected.sorted)
+  }
+
+  test("Use: basic tests with USE statements") {
+    val catalogManager = spark.sessionState.catalogManager
+
+    // Validate the initial current catalog and namespace.
+    assert(catalogManager.currentCatalog.name() == "session")
+    assert(catalogManager.currentNamespace === Array("default"))
+
+    // The following implicitly creates namespaces.
+    sql("CREATE TABLE testcat.ns1.ns1_1.table (id bigint) USING foo")
+    sql("CREATE TABLE testcat2.ns2.ns2_2.table (id bigint) USING foo")
+    sql("CREATE TABLE testcat2.ns3.ns3_3.table (id bigint) USING foo")
+    sql("CREATE TABLE testcat2.testcat.table (id bigint) USING foo")
+
+    // Catalog is resolved to 'testcat'.
+    sql("USE testcat.ns1.ns1_1")
+    assert(catalogManager.currentCatalog.name() == "testcat")
+    assert(catalogManager.currentNamespace === Array("ns1", "ns1_1"))
+
+    // Catalog is resolved to 'testcat2'.
+    sql("USE testcat2.ns2.ns2_2")
+    assert(catalogManager.currentCatalog.name() == "testcat2")
+    assert(catalogManager.currentNamespace === Array("ns2", "ns2_2"))
+
+    // Only the namespace is changed.
+    sql("USE ns3.ns3_3")
+    assert(catalogManager.currentCatalog.name() == "testcat2")
+    assert(catalogManager.currentNamespace === Array("ns3", "ns3_3"))
+
+    // Only the namespace is changed (explicit).
+    sql("USE NAMESPACE testcat")
+    assert(catalogManager.currentCatalog.name() == "testcat2")
+    assert(catalogManager.currentNamespace === Array("testcat"))
+
+    // Catalog is resolved to `testcat`.
+    sql("USE testcat")
+    assert(catalogManager.currentCatalog.name() == "testcat")
+    assert(catalogManager.currentNamespace === Array())
+  }
+
+  test("Use: set v2 catalog as a current catalog") {
+    val catalogManager = spark.sessionState.catalogManager
+    assert(catalogManager.currentCatalog.name() == "session")
+
+    sql("USE testcat")
+    assert(catalogManager.currentCatalog.name() == "testcat")
+  }
+
+  test("Use: v2 session catalog is used and namespace does not exist") {
+    val exception = intercept[NoSuchDatabaseException] {
+      sql("USE ns1")
+    }
+    assert(exception.getMessage.contains("Database 'ns1' not found"))
+  }
+
+  test("Use: v2 catalog is used and namespace does not exist") {
+    // Namespaces are not required to exist for v2 catalogs.
+    sql("USE testcat.ns1.ns2")
+    val catalogManager = spark.sessionState.catalogManager
+    assert(catalogManager.currentNamespace === Array("ns1", "ns2"))
+  }
+
   test("tableCreation: partition column case insensitive resolution") {
     val testCatalog = catalog("testcat").asTableCatalog
     val sessionCatalog = catalog("session").asTableCatalog
@@ -909,7 +1056,24 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("DeleteFrom: basic") {
+  test("REPLACE TABLE: v1 table") {
+    val e = intercept[AnalysisException] {
+      sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}")
+    }
+    assert(e.message.contains("REPLACE TABLE is only supported with v2 tables"))
+  }
+
+  test("DeleteFrom: basic - delete all") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"DELETE FROM $t")
+      checkAnswer(spark.table(t), Seq())
+    }
+  }
+
+  test("DeleteFrom: basic - delete with where clause") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
       sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
@@ -920,12 +1084,23 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("DeleteFrom: alias") {
+  test("DeleteFrom: delete from aliased target table") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
       sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
       sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      sql(s"DELETE FROM $t tbl WHERE tbl.id = 2")
+      sql(s"DELETE FROM $t AS tbl WHERE tbl.id = 2")
+      checkAnswer(spark.table(t), Seq(
+        Row(3, "c", 3)))
+    }
+  }
+
+  test("DeleteFrom: normalize attribute names") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"DELETE FROM $t AS tbl WHERE tbl.ID = 2")
       checkAnswer(spark.table(t), Seq(
         Row(3, "c", 3)))
     }
@@ -945,6 +1120,31 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("Update: basic - update all") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      sql(
+        s"""
+           |CREATE TABLE $t (id bigint, name string, age int, p int)
+           |USING foo
+           |PARTITIONED BY (id, p)
+         """.stripMargin)
+      sql(
+        s"""
+           |INSERT INTO $t
+           |VALUES (1L, 'Herry', 26, 1),
+           |(2L, 'Jack', 31, 2),
+           |(3L, 'Lisa', 28, 3),
+           |(4L, 'Frank', 33, 3)
+         """.stripMargin)
+    }
+    val errMsg = "UPDATE TABLE is not supported temporarily"
+    testCreateAnalysisError(
+      s"UPDATE $t SET name='Robert', age=32",
+      errMsg
+    )
+  }
+
   private def testCreateAnalysisError(sqlStatement: String, expectedError: String): Unit = {
     val errMsg = intercept[AnalysisException] {
       sql(sqlStatement)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
similarity index 96%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
index 8f7dbe8d13c39..138bbc3f04f64 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import java.io.File
 import java.util
@@ -23,19 +23,20 @@ import java.util.OptionalLong
 
 import scala.collection.JavaConverters._
 
-import test.org.apache.spark.sql.sources.v2._
+import test.org.apache.spark.sql.connector._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.read._
+import org.apache.spark.sql.connector.read.partitioning.{ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation}
 import org.apache.spark.sql.execution.exchange.{Exchange, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.sources.{Filter, GreaterThan}
-import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.partitioning.{ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -224,8 +225,12 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
           spark.read.format(cls.getName).option("path", path).load(),
           spark.range(10).select('id, -'id))
 
-        // default save mode is append
-        spark.range(10).select('id as 'i, -'id as 'j).write.format(cls.getName)
+        // default save mode is ErrorIfExists
+        intercept[AnalysisException] {
+          spark.range(10).select('id as 'i, -'id as 'j).write.format(cls.getName)
+            .option("path", path).save()
+        }
+        spark.range(10).select('id as 'i, -'id as 'j).write.mode("append").format(cls.getName)
           .option("path", path).save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
@@ -280,7 +285,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
 
         val numPartition = 6
         spark.range(0, 10, 1, numPartition).select('id as 'i, -'id as 'j).write.format(cls.getName)
-          .option("path", path).save()
+          .mode("append").option("path", path).save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
           spark.range(10).select('id, -'id))
@@ -367,7 +372,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession {
           val format = classOf[SimpleWritableDataSource].getName
 
           val df = Seq((1L, 2L)).toDF("i", "j")
-          df.write.format(format).option("path", optionPath).save()
+          df.write.format(format).mode("append").option("path", optionPath).save()
           assert(!new File(sessionPath).exists)
           checkAnswer(spark.read.format(format).option("path", optionPath).load(), df)
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2UtilsSuite.scala
similarity index 95%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2UtilsSuite.scala
index 0b1e3b5fb076d..01fcced5b12a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/DataSourceV2UtilsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2UtilsSuite.scala
@@ -15,9 +15,10 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.connector.catalog.SessionConfigSupport
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.internal.SQLConf
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
similarity index 96%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
index 26f941244f5cc..2b3340527a4e2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/FileDataSourceV2FallBackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
@@ -14,21 +14,22 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.execution.{FileSourceScanExec, QueryExecution}
 import org.apache.spark.sql.execution.datasources.{FileFormat, InsertIntoHadoopFsRelationCommand}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.{CaseInsensitiveStringMap, QueryExecutionListener}
@@ -172,7 +173,7 @@ class FileDataSourceV2FallBackSuite extends QueryTest with SharedSparkSession {
           withTempPath { path =>
             val inputData = spark.range(10)
             inputData.write.format(format).save(path.getCanonicalPath)
-            sparkContext.listenerBus.waitUntilEmpty(1000)
+            sparkContext.listenerBus.waitUntilEmpty()
             assert(commands.length == 1)
             assert(commands.head._1 == "save")
             assert(commands.head._2.isInstanceOf[InsertIntoHadoopFsRelationCommand])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/InsertIntoTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
similarity index 99%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/InsertIntoTests.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
index 5b5382e5ca931..0fd6cf1b6746c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/InsertIntoTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import org.scalatest.BeforeAndAfter
 
@@ -175,7 +175,7 @@ abstract class InsertIntoTests(
   }
 }
 
-private[v2] trait InsertIntoSQLOnlyTests
+trait InsertIntoSQLOnlyTests
   extends QueryTest
   with SharedSparkSession
   with BeforeAndAfter {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
similarity index 94%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
index c9d2f1eef24bb..22d3750022c57 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/SimpleWritableDataSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import java.io.{BufferedReader, InputStreamReader, IOException}
 import java.util
@@ -27,9 +27,10 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.SparkContext
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.writer._
+import org.apache.spark.sql.connector.catalog.{SessionConfigSupport, SupportsWrite, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory, ScanBuilder}
+import org.apache.spark.sql.connector.write._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
@@ -190,7 +191,7 @@ class CSVReaderFactory(conf: SerializableConfiguration)
   }
 }
 
-private[v2] object SimpleCounter {
+private[connector] object SimpleCounter {
   private var count: Int = 0
 
   def increaseCounter: Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TableCapabilityCheckSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
similarity index 87%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TableCapabilityCheckSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
index ab47836001704..ce6d56cf84df1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/TableCapabilityCheckSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TableCapabilityCheckSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import java.util
 
@@ -24,12 +24,13 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.{AnalysisException, DataFrame, SQLContext}
 import org.apache.spark.sql.catalyst.analysis.{AnalysisSuite, NamedRelation}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal}
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LeafNode, OverwriteByExpression, OverwritePartitionsDynamic, Union}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.connector.catalog.{Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, TableCapabilityCheck}
 import org.apache.spark.sql.execution.streaming.{Offset, Source, StreamingRelation, StreamingRelationV2}
 import org.apache.spark.sql.sources.StreamSourceProvider
-import org.apache.spark.sql.sources.v2.TableCapability.{BATCH_WRITE, CONTINUOUS_READ, MICRO_BATCH_READ, OVERWRITE_BY_FILTER, OVERWRITE_DYNAMIC, TRUNCATE}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{LongType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -97,16 +98,19 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
   }
 
   test("AppendData: check correct capabilities") {
-    val plan = AppendData.byName(
-      DataSourceV2Relation.create(CapabilityTable(BATCH_WRITE), CaseInsensitiveStringMap.empty),
-      TestRelation)
+    Seq(BATCH_WRITE, V1_BATCH_WRITE).foreach { write =>
+      val plan = AppendData.byName(
+        DataSourceV2Relation.create(CapabilityTable(write), CaseInsensitiveStringMap.empty),
+        TestRelation)
 
-    TableCapabilityCheck.apply(plan)
+      TableCapabilityCheck.apply(plan)
+    }
   }
 
   test("Truncate: check missing capabilities") {
     Seq(CapabilityTable(),
       CapabilityTable(BATCH_WRITE),
+      CapabilityTable(V1_BATCH_WRITE),
       CapabilityTable(TRUNCATE),
       CapabilityTable(OVERWRITE_BY_FILTER)).foreach { table =>
 
@@ -124,7 +128,9 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
 
   test("Truncate: check correct capabilities") {
     Seq(CapabilityTable(BATCH_WRITE, TRUNCATE),
-      CapabilityTable(BATCH_WRITE, OVERWRITE_BY_FILTER)).foreach { table =>
+      CapabilityTable(V1_BATCH_WRITE, TRUNCATE),
+      CapabilityTable(BATCH_WRITE, OVERWRITE_BY_FILTER),
+      CapabilityTable(V1_BATCH_WRITE, OVERWRITE_BY_FILTER)).foreach { table =>
 
       val plan = OverwriteByExpression.byName(
         DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
@@ -136,6 +142,7 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
 
   test("OverwriteByExpression: check missing capabilities") {
     Seq(CapabilityTable(),
+      CapabilityTable(V1_BATCH_WRITE),
       CapabilityTable(BATCH_WRITE),
       CapabilityTable(OVERWRITE_BY_FILTER)).foreach { table =>
 
@@ -152,12 +159,14 @@ class TableCapabilityCheckSuite extends AnalysisSuite with SharedSparkSession {
   }
 
   test("OverwriteByExpression: check correct capabilities") {
-    val table = CapabilityTable(BATCH_WRITE, OVERWRITE_BY_FILTER)
-    val plan = OverwriteByExpression.byName(
-      DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
-      EqualTo(AttributeReference("x", LongType)(), Literal(5)))
+    Seq(BATCH_WRITE, V1_BATCH_WRITE).foreach { write =>
+      val table = CapabilityTable(write, OVERWRITE_BY_FILTER)
+      val plan = OverwriteByExpression.byName(
+        DataSourceV2Relation.create(table, CaseInsensitiveStringMap.empty), TestRelation,
+        EqualTo(AttributeReference("x", LongType)(), Literal(5)))
 
-    TableCapabilityCheck.apply(plan)
+      TableCapabilityCheck.apply(plan)
+    }
   }
 
   test("OverwritePartitionsDynamic: check missing capabilities") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/utils/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
similarity index 88%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/utils/TestV2SessionCatalogBase.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index 28ce6a94b253a..d03294cb4067b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/utils/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -15,17 +15,15 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2.utils
+package org.apache.spark.sql.connector
 
 import java.util
 import java.util.concurrent.ConcurrentHashMap
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.catalog.v2.Identifier
-import org.apache.spark.sql.catalog.v2.expressions.Transform
-import org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog
-import org.apache.spark.sql.sources.v2.Table
+import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -33,7 +31,7 @@ import org.apache.spark.sql.types.StructType
  * for testing DDL as well as write operations (through df.write.saveAsTable, df.write.insertInto
  * and SQL).
  */
-private[v2] trait TestV2SessionCatalogBase[T <: Table] extends V2SessionCatalog {
+private[connector] trait TestV2SessionCatalogBase[T <: Table] extends DelegatingCatalogExtension {
 
   protected val tables: util.Map[Identifier, T] = new ConcurrentHashMap[Identifier, T]()
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
similarity index 57%
rename from sql/core/src/test/scala/org/apache/spark/sql/sources/v2/V1WriteFallbackSuite.scala
rename to sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index 9002775bce211..de843ba4375d0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/v2/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.sources.v2
+package org.apache.spark.sql.connector
 
 import java.util
 
@@ -24,13 +24,12 @@ import scala.collection.mutable
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode, SparkSession}
-import org.apache.spark.sql.catalog.v2.expressions.{FieldReference, IdentityTransform, Transform}
-import org.apache.spark.sql.connector.InMemoryTable
-import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode}
-import org.apache.spark.sql.sources.{DataSourceRegister, Filter, InsertableRelation}
-import org.apache.spark.sql.sources.v2.utils.TestV2SessionCatalogBase
-import org.apache.spark.sql.sources.v2.writer.{SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder}
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode, SparkSession, SQLContext}
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.write.{SupportsOverwrite, SupportsTruncate, V1WriteBuilder, WriteBuilder}
+import org.apache.spark.sql.execution.datasources.{DataSource, DataSourceUtils}
+import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -54,7 +53,11 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
   test("append fallback") {
     val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
     df.write.mode("append").option("name", "t1").format(v2Format).save()
+
     checkAnswer(InMemoryV1Provider.getTableData(spark, "t1"), df)
+    assert(InMemoryV1Provider.tables("t1").schema === df.schema.asNullable)
+    assert(InMemoryV1Provider.tables("t1").partitioning.isEmpty)
+
     df.write.mode("append").option("name", "t1").format(v2Format).save()
     checkAnswer(InMemoryV1Provider.getTableData(spark, "t1"), df.union(df))
   }
@@ -67,6 +70,59 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
     df2.write.mode("overwrite").option("name", "t1").format(v2Format).save()
     checkAnswer(InMemoryV1Provider.getTableData(spark, "t1"), df2)
   }
+
+  SaveMode.values().foreach { mode =>
+    test(s"save: new table creations with partitioning for table - mode: $mode") {
+      val format = classOf[InMemoryV1Provider].getName
+      val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+      df.write.mode(mode).option("name", "t1").format(format).partitionBy("a").save()
+
+      checkAnswer(InMemoryV1Provider.getTableData(spark, "t1"), df)
+      assert(InMemoryV1Provider.tables("t1").schema === df.schema.asNullable)
+      assert(InMemoryV1Provider.tables("t1").partitioning.sameElements(
+        Array(IdentityTransform(FieldReference(Seq("a"))))))
+    }
+  }
+
+  test("save: default mode is ErrorIfExists") {
+    val format = classOf[InMemoryV1Provider].getName
+    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+
+    df.write.option("name", "t1").format(format).partitionBy("a").save()
+    // default is ErrorIfExists, and since a table already exists we throw an exception
+    val e = intercept[AnalysisException] {
+      df.write.option("name", "t1").format(format).partitionBy("a").save()
+    }
+    assert(e.getMessage.contains("already exists"))
+  }
+
+  test("save: Ignore mode") {
+    val format = classOf[InMemoryV1Provider].getName
+    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+
+    df.write.option("name", "t1").format(format).partitionBy("a").save()
+    // no-op
+    df.write.option("name", "t1").format(format).mode("ignore").partitionBy("a").save()
+
+    checkAnswer(InMemoryV1Provider.getTableData(spark, "t1"), df)
+  }
+
+  test("save: tables can perform schema and partitioning checks if they already exist") {
+    val format = classOf[InMemoryV1Provider].getName
+    val df = Seq((1, "x"), (2, "y"), (3, "z")).toDF("a", "b")
+
+    df.write.option("name", "t1").format(format).partitionBy("a").save()
+    val e2 = intercept[IllegalArgumentException] {
+      df.write.mode("append").option("name", "t1").format(format).partitionBy("b").save()
+    }
+    assert(e2.getMessage.contains("partitioning"))
+
+    val e3 = intercept[IllegalArgumentException] {
+      Seq((1, "x")).toDF("c", "d").write.mode("append").option("name", "t1").format(format)
+        .save()
+    }
+    assert(e3.getMessage.contains("schema"))
+  }
 }
 
 class V1WriteFallbackSessionCatalogSuite
@@ -116,26 +172,83 @@ private object InMemoryV1Provider {
   }
 }
 
-class InMemoryV1Provider extends TableProvider with DataSourceRegister {
+class InMemoryV1Provider
+  extends TableProvider
+  with DataSourceRegister
+  with CreatableRelationProvider {
   override def getTable(options: CaseInsensitiveStringMap): Table = {
-    InMemoryV1Provider.tables.getOrElseUpdate(options.get("name"), {
+
+    InMemoryV1Provider.tables.getOrElse(options.get("name"), {
       new InMemoryTableWithV1Fallback(
         "InMemoryTableWithV1Fallback",
-        new StructType().add("a", IntegerType).add("b", StringType),
-        Array(IdentityTransform(FieldReference(Seq("a")))),
+        new StructType(),
+        Array.empty,
         options.asCaseSensitiveMap()
       )
     })
   }
 
   override def shortName(): String = "in-memory"
+
+  override def createRelation(
+      sqlContext: SQLContext,
+      mode: SaveMode,
+      parameters: Map[String, String],
+      data: DataFrame): BaseRelation = {
+    val _sqlContext = sqlContext
+
+    val partitioning = parameters.get(DataSourceUtils.PARTITIONING_COLUMNS_KEY).map { value =>
+      DataSourceUtils.decodePartitioningColumns(value).map { partitioningColumn =>
+        IdentityTransform(FieldReference(partitioningColumn))
+      }
+    }.getOrElse(Nil)
+
+    val tableName = parameters("name")
+    val tableOpt = InMemoryV1Provider.tables.get(tableName)
+    val table = tableOpt.getOrElse(new InMemoryTableWithV1Fallback(
+      "InMemoryTableWithV1Fallback",
+      data.schema.asNullable,
+      partitioning.toArray,
+      Map.empty[String, String].asJava
+    ))
+    if (tableOpt.isEmpty) {
+      InMemoryV1Provider.tables.put(tableName, table)
+    } else {
+      if (data.schema.asNullable != table.schema) {
+        throw new IllegalArgumentException("Wrong schema provided")
+      }
+      if (!partitioning.sameElements(table.partitioning)) {
+        throw new IllegalArgumentException("Wrong partitioning provided")
+      }
+    }
+
+    def getRelation: BaseRelation = new BaseRelation {
+      override def sqlContext: SQLContext = _sqlContext
+      override def schema: StructType = table.schema
+    }
+
+    if (mode == SaveMode.ErrorIfExists && tableOpt.isDefined) {
+      throw new AnalysisException("Table already exists")
+    } else if (mode == SaveMode.Ignore && tableOpt.isDefined) {
+      // do nothing
+      return getRelation
+    }
+    val writer = table.newWriteBuilder(new CaseInsensitiveStringMap(parameters.asJava))
+    if (mode == SaveMode.Overwrite) {
+      writer.asInstanceOf[SupportsTruncate].truncate()
+    }
+    writer.asInstanceOf[V1WriteBuilder].buildForV1Write().insert(data, overwrite = false)
+    getRelation
+  }
 }
 
 class InMemoryTableWithV1Fallback(
     override val name: String,
     override val schema: StructType,
     override val partitioning: Array[Transform],
-    override val properties: util.Map[String, String]) extends Table with SupportsWrite {
+    override val properties: util.Map[String, String])
+  extends Table
+  with SupportsWrite {
 
   partitioning.foreach { t =>
     if (!t.isInstanceOf[IdentityTransform]) {
@@ -144,7 +257,6 @@ class InMemoryTableWithV1Fallback(
   }
 
   override def capabilities: util.Set[TableCapability] = Set(
-    TableCapability.BATCH_WRITE,
     TableCapability.V1_BATCH_WRITE,
     TableCapability.OVERWRITE_BY_FILTER,
     TableCapability.TRUNCATE).asJava
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
index aa83b9b11dcfc..44af422b90837 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala
@@ -32,8 +32,9 @@ import org.apache.spark.sql.execution.window.WindowExec
 
 class LogicalPlanTagInSparkPlanSuite extends TPCDSQuerySuite {
 
-  override protected def checkGeneratedCode(plan: SparkPlan): Unit = {
-    super.checkGeneratedCode(plan)
+  override protected def checkGeneratedCode(
+      plan: SparkPlan, checkMethodCodeSize: Boolean = true): Unit = {
+    super.checkGeneratedCode(plan, checkMethodCodeSize)
     checkLogicalPlanTag(plan)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
index f6b006b98edd1..8bf7fe62cd49b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import java.util.Properties
+import scala.collection.parallel.immutable.ParRange
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
@@ -46,7 +46,7 @@ class SQLExecutionSuite extends SparkFunSuite {
     import spark.implicits._
     try {
       // Should not throw IllegalArgumentException
-      (1 to 100).par.foreach { _ =>
+      new ParRange(1 to 100).foreach { _ =>
         spark.sparkContext.parallelize(1 to 5).map { i => (i, i) }.toDF("a", "b").count()
       }
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index ff84b05713676..8ff293146127a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -136,12 +136,14 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       assertNoSuchTable(s"ALTER TABLE $viewName SET SERDE 'whatever'")
       assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'")
       assertNoSuchTable(s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')")
-      assertNoSuchTable(s"ALTER TABLE $viewName SET LOCATION '/path/to/your/lovely/heart'")
       assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') SET LOCATION '/path/to/home'")
       assertNoSuchTable(s"ALTER TABLE $viewName ADD IF NOT EXISTS PARTITION (a='4', b='8')")
       assertNoSuchTable(s"ALTER TABLE $viewName DROP PARTITION (a='4', b='8')")
       assertNoSuchTable(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')")
       assertNoSuchTable(s"ALTER TABLE $viewName RECOVER PARTITIONS")
+
+      // For v2 ALTER TABLE statements, we have better error message saying view is not supported.
+      assertViewNotSupported(s"ALTER TABLE $viewName SET LOCATION '/path/to/your/lovely/heart'")
     }
   }
 
@@ -175,6 +177,11 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  private def assertViewNotSupported(query: String): Unit = {
+    val e = intercept[AnalysisException](sql(query))
+    assert(e.message.contains("'testView' is a view not a table"))
+  }
+
   test("error handling: insert/load/truncate table commands against a view") {
     val viewName = "testView"
     withView(viewName) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
index 392cce54ebede..ef81f1b788496 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
@@ -57,7 +57,7 @@ class UnsafeFixedWidthAggregationMapSuite
 
   private var taskContext: TaskContext = null
 
-  def testWithMemoryLeakDetection(name: String)(f: => Unit) {
+  def testWithMemoryLeakDetection(name: String)(f: => Unit): Unit = {
     def cleanup(): Unit = {
       if (taskMemoryManager != null) {
         assert(taskMemoryManager.cleanUpAllAllocatedMemory() === 0)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index d8727d5b584f1..afe9eb5c151d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.{Dataset, QueryTest, Row, SaveMode}
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, CodeGenerator}
+import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeAndComment, CodeGenerator}
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
@@ -213,10 +213,10 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession {
 
   ignore("SPARK-21871 check if we can get large code size when compiling too long functions") {
     val codeWithShortFunctions = genGroupByCode(3)
-    val (_, maxCodeSize1) = CodeGenerator.compile(codeWithShortFunctions)
+    val (_, ByteCodeStats(maxCodeSize1, _, _)) = CodeGenerator.compile(codeWithShortFunctions)
     assert(maxCodeSize1 < SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.defaultValue.get)
     val codeWithLongFunctions = genGroupByCode(50)
-    val (_, maxCodeSize2) = CodeGenerator.compile(codeWithLongFunctions)
+    val (_, ByteCodeStats(maxCodeSize2, _, _)) = CodeGenerator.compile(codeWithLongFunctions)
     assert(maxCodeSize2 > SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.defaultValue.get)
   }
 
@@ -403,7 +403,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession {
     withSQLConf(
         SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "true",
         SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1",
-        "spark.sql.HashAggregateExec.validParamLength" -> "0") {
+        "spark.sql.CodeGenerator.validParamLength" -> "0") {
       withTable("t") {
         val expectedErrMsg = "Failed to split aggregate code into small functions"
         Seq(
@@ -419,4 +419,27 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  test("Give up splitting subexpression code if a parameter length goes over the limit") {
+    withSQLConf(
+        SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "false",
+        SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1",
+        "spark.sql.CodeGenerator.validParamLength" -> "0") {
+      withTable("t") {
+        val expectedErrMsg = "Failed to split subexpression code into small functions"
+        Seq(
+          // Test case without keys
+          "SELECT AVG(a + b), SUM(a + b + c) FROM VALUES((1, 1, 1)) t(a, b, c)",
+          // Tet case with keys
+          "SELECT k, AVG(a + b), SUM(a + b + c) FROM VALUES((1, 1, 1, 1)) t(k, a, b, c) " +
+            "GROUP BY k").foreach { query =>
+          val e = intercept[Exception] {
+            sql(query).collect
+          }.getCause
+          assert(e.isInstanceOf[IllegalStateException])
+          assert(e.getMessage.contains(expectedErrMsg))
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 243ef5438ff39..37b106c3ea530 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -20,13 +20,16 @@ package org.apache.spark.sql.execution.adaptive
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.execution.{ReusedSubqueryExec, SparkPlan}
 import org.apache.spark.sql.execution.adaptive.rule.CoalescedShuffleReaderExec
-import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BuildRight, SortMergeJoinExec}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
-class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
+class AdaptiveQueryExecSuite
+  extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
+
   import testImplicits._
 
   setupTestData()
@@ -51,34 +54,27 @@ class AdaptiveQueryExecSuite extends QueryTest with SharedSparkSession {
   }
 
   private def findTopLevelBroadcastHashJoin(plan: SparkPlan): Seq[BroadcastHashJoinExec] = {
-    plan.collect {
-      case j: BroadcastHashJoinExec => Seq(j)
-      case s: QueryStageExec => findTopLevelBroadcastHashJoin(s.plan)
-    }.flatten
+    collect(plan) {
+      case j: BroadcastHashJoinExec => j
+    }
   }
 
   private def findTopLevelSortMergeJoin(plan: SparkPlan): Seq[SortMergeJoinExec] = {
-    plan.collect {
-      case j: SortMergeJoinExec => Seq(j)
-      case s: QueryStageExec => findTopLevelSortMergeJoin(s.plan)
-    }.flatten
+    collect(plan) {
+      case j: SortMergeJoinExec => j
+    }
   }
 
   private def findReusedExchange(plan: SparkPlan): Seq[ReusedQueryStageExec] = {
-    plan.collect {
-      case e: ReusedQueryStageExec => Seq(e)
-      case a: AdaptiveSparkPlanExec => findReusedExchange(a.executedPlan)
-      case s: QueryStageExec => findReusedExchange(s.plan)
-      case p: SparkPlan => p.subqueries.flatMap(findReusedExchange)
-    }.flatten
+    collectInPlanAndSubqueries(plan) {
+      case e: ReusedQueryStageExec => e
+    }
   }
 
   private def findReusedSubquery(plan: SparkPlan): Seq[ReusedSubqueryExec] = {
-    plan.collect {
-      case e: ReusedSubqueryExec => Seq(e)
-      case s: QueryStageExec => findReusedSubquery(s.plan)
-      case p: SparkPlan => p.subqueries.flatMap(findReusedSubquery)
-    }.flatten
+    collectInPlanAndSubqueries(plan) {
+      case e: ReusedSubqueryExec => e
+    }
   }
 
   test("Change merge join to broadcast join") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
index bd2470ee20660..df122977fe5f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
@@ -22,11 +22,10 @@ import scala.collection.JavaConverters._
 import scala.util.Random
 
 import org.apache.spark.SparkConf
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.sql.{DataFrame, DataFrameWriter, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.datasources.parquet.{SpecificParquetRecordReaderBase, VectorizedParquetRecordReader}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -44,21 +43,26 @@ import org.apache.spark.sql.vectorized.ColumnVector
  *      Results will be written to "benchmarks/DataSourceReadBenchmark-results.txt".
  * }}}
  */
-object DataSourceReadBenchmark extends BenchmarkBase with SQLHelper {
-  val conf = new SparkConf()
-    .setAppName("DataSourceReadBenchmark")
-    // Since `spark.master` always exists, overrides this value
-    .set("spark.master", "local[1]")
-    .setIfMissing("spark.driver.memory", "3g")
-    .setIfMissing("spark.executor.memory", "3g")
-    .setIfMissing(UI_ENABLED, false)
-
-  val spark = SparkSession.builder.config(conf).getOrCreate()
-
-  // Set default configs. Individual cases will change them if necessary.
-  spark.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true")
-  spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
-  spark.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
+object DataSourceReadBenchmark extends SqlBasedBenchmark {
+
+  override def getSparkSession: SparkSession = {
+    val conf = new SparkConf()
+      .setAppName("DataSourceReadBenchmark")
+      // Since `spark.master` always exists, overrides this value
+      .set("spark.master", "local[1]")
+      .setIfMissing("spark.driver.memory", "3g")
+      .setIfMissing("spark.executor.memory", "3g")
+      .setIfMissing(UI_ENABLED, false)
+
+    val sparkSession = SparkSession.builder.config(conf).getOrCreate()
+
+    // Set default configs. Individual cases will change them if necessary.
+    sparkSession.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true")
+    sparkSession.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
+    sparkSession.conf.set(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, "true")
+
+    sparkSession
+  }
 
   def withTempTable(tableNames: String*)(f: => Unit): Unit = {
     try f finally tableNames.foreach(spark.catalog.dropTempView)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index df0f87e483cdc..9cfa9070aca67 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.benchmark
 import java.sql.Timestamp
 
 import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.SaveMode.Overwrite
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -36,7 +37,12 @@ import org.apache.spark.sql.internal.SQLConf
  */
 object DateTimeBenchmark extends SqlBasedBenchmark {
   private def doBenchmark(cardinality: Int, exprs: String*): Unit = {
-    spark.range(cardinality).selectExpr(exprs: _*).write.format("noop").save()
+    spark.range(cardinality)
+      .selectExpr(exprs: _*)
+      .write
+      .format("noop")
+      .mode(Overwrite)
+      .save()
   }
 
   private def run(cardinality: Int, name: String, exprs: String*): Unit = {
@@ -132,7 +138,10 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
       benchmark.addCase("From java.sql.Timestamp", numIters) { _ =>
         spark.range(rowsNum)
           .map(millis => new Timestamp(millis))
-          .write.format("noop").save()
+          .write
+          .format("noop")
+          .mode(Overwrite)
+          .save()
       }
       benchmark.addCase("Collect longs", numIters) { _ =>
         spark.range(0, rowsNum, 1, 1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
index dbbad43efa08c..2bd73c1dc14f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
@@ -19,6 +19,10 @@ package org.apache.spark.sql.execution.benchmark
 
 import java.time.Instant
 
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.SaveMode.Overwrite
+import org.apache.spark.sql.internal.SQLConf
+
 /**
  * Synthetic benchmark for the extract function.
  * To run this benchmark:
@@ -32,51 +36,72 @@ import java.time.Instant
  * }}}
  */
 object ExtractBenchmark extends SqlBasedBenchmark {
+
   private def doBenchmark(cardinality: Long, exprs: String*): Unit = {
     val sinceSecond = Instant.parse("2010-01-01T00:00:00Z").getEpochSecond
-    spark
-      .range(sinceSecond, sinceSecond + cardinality, 1, 1)
-      .selectExpr(exprs: _*)
-      .write
-      .format("noop")
-      .save()
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+      spark
+        .range(sinceSecond, sinceSecond + cardinality, 1, 1)
+        .selectExpr(exprs: _*)
+        .write
+        .format("noop")
+        .mode(Overwrite)
+        .save()
+    }
   }
 
-  private def run(cardinality: Long, name: String, exprs: String*): Unit = {
-    codegenBenchmark(name, cardinality) {
+  private def run(
+      benchmark: Benchmark,
+      cardinality: Long,
+      name: String,
+      exprs: String*): Unit = {
+    benchmark.addCase(name, numIters = 3) { _ =>
       doBenchmark(cardinality, exprs: _*)
     }
   }
 
-  private def run(cardinality: Long, field: String): Unit = {
-    codegenBenchmark(s"$field of timestamp", cardinality) {
-      doBenchmark(cardinality, s"EXTRACT($field FROM (cast(id as timestamp)))")
+  private def castExpr(from: String): String = from match {
+    case "timestamp" => s"cast(id as timestamp)"
+    case "date" => s"cast(cast(id as timestamp) as date)"
+    case other => throw new IllegalArgumentException(
+      s"Unsupported column type $other. Valid column types are 'timestamp' and 'date'")
+  }
+
+  private def run(
+      benchmark: Benchmark,
+      func: String,
+      cardinality: Long,
+      field: String,
+      from: String): Unit = {
+    val expr = func match {
+      case "extract" => s"EXTRACT($field FROM ${castExpr(from)})"
+      case "date_part" => s"DATE_PART('$field', ${castExpr(from)})"
+      case other => throw new IllegalArgumentException(
+        s"Unsupported function '$other'. Valid functions are 'extract' and 'date_part'.")
+    }
+    benchmark.addCase(s"$field of $from", numIters = 3) { _ =>
+      doBenchmark(cardinality, expr)
     }
   }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     val N = 10000000L
-    runBenchmark("Extract") {
-      run(N, "cast to timestamp", "cast(id as timestamp)")
-      run(N, "MILLENNIUM")
-      run(N, "CENTURY")
-      run(N, "DECADE")
-      run(N, "YEAR")
-      run(N, "ISOYEAR")
-      run(N, "QUARTER")
-      run(N, "MONTH")
-      run(N, "WEEK")
-      run(N, "DAY")
-      run(N, "DAYOFWEEK")
-      run(N, "DOW")
-      run(N, "ISODOW")
-      run(N, "DOY")
-      run(N, "HOUR")
-      run(N, "MINUTE")
-      run(N, "SECOND")
-      run(N, "MILLISECONDS")
-      run(N, "MICROSECONDS")
-      run(N, "EPOCH")
+    val fields = Seq(
+      "MILLENNIUM", "CENTURY", "DECADE", "YEAR",
+      "ISOYEAR", "QUARTER", "MONTH", "WEEK",
+      "DAY", "DAYOFWEEK", "DOW", "ISODOW",
+      "DOY", "HOUR", "MINUTE", "SECOND",
+      "MILLISECONDS", "MICROSECONDS", "EPOCH")
+
+    Seq("extract", "date_part").foreach { func =>
+      Seq("timestamp", "date").foreach { dateType =>
+        val benchmark = new Benchmark(s"Invoke $func for $dateType", N, output = output)
+
+        run(benchmark, N, s"cast to $dateType", castExpr(dateType))
+        fields.foreach(run(benchmark, func, N, _, dateType))
+
+        benchmark.run()
+      }
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
index b040243717137..50ba50176c7f5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
@@ -22,10 +22,9 @@ import java.io.File
 import scala.util.Random
 
 import org.apache.spark.SparkConf
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.sql.{DataFrame, SparkSession}
-import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.functions.monotonically_increasing_id
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType
@@ -41,17 +40,21 @@ import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType, TimestampType
  *      Results will be written to "benchmarks/FilterPushdownBenchmark-results.txt".
  * }}}
  */
-object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper {
-
-  private val conf = new SparkConf()
-    .setAppName(this.getClass.getSimpleName)
-    // Since `spark.master` always exists, overrides this value
-    .set("spark.master", "local[1]")
-    .setIfMissing("spark.driver.memory", "3g")
-    .setIfMissing("spark.executor.memory", "3g")
-    .setIfMissing(UI_ENABLED, false)
-    .setIfMissing("orc.compression", "snappy")
-    .setIfMissing("spark.sql.parquet.compression.codec", "snappy")
+object FilterPushdownBenchmark extends SqlBasedBenchmark {
+
+  override def getSparkSession: SparkSession = {
+    val conf = new SparkConf()
+      .setAppName(this.getClass.getSimpleName)
+      // Since `spark.master` always exists, overrides this value
+      .set("spark.master", "local[1]")
+      .setIfMissing("spark.driver.memory", "3g")
+      .setIfMissing("spark.executor.memory", "3g")
+      .setIfMissing(UI_ENABLED, false)
+      .setIfMissing("orc.compression", "snappy")
+      .setIfMissing("spark.sql.parquet.compression.codec", "snappy")
+
+    SparkSession.builder().config(conf).getOrCreate()
+  }
 
   private val numRows = 1024 * 1024 * 15
   private val width = 5
@@ -59,8 +62,6 @@ object FilterPushdownBenchmark extends BenchmarkBase with SQLHelper {
   // For Parquet/ORC, we will use the same value for block size and compression size
   private val blockSize = org.apache.parquet.hadoop.ParquetWriter.DEFAULT_PAGE_SIZE
 
-  private val spark = SparkSession.builder().config(conf).getOrCreate()
-
   def withTempTable(tableNames: String*)(f: => Unit): Unit = {
     try f finally tableNames.foreach(spark.catalog.dropTempView)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala
new file mode 100644
index 0000000000000..9e5aca70ac628
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.SaveMode.Overwrite
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Synthetic benchmark for the make_date() and make_timestamp() functions.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/MakeDateTimeBenchmark-results.txt".
+ * }}}
+ */
+object MakeDateTimeBenchmark extends SqlBasedBenchmark {
+
+  private def doBenchmark(cardinality: Long, exprs: String*): Unit = {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
+      spark
+        .range(0, cardinality, 1, 1)
+        .selectExpr(exprs: _*)
+        .write
+        .format("noop")
+        .mode(Overwrite)
+        .save()
+    }
+  }
+
+  private def run(benchmark: Benchmark, cardinality: Long, name: String, exprs: String*): Unit = {
+    benchmark.addCase(name, numIters = 3) { _ => doBenchmark(cardinality, exprs: _*) }
+  }
+
+  private val ymdExprs = Seq("(2000 + (id % 30))", "((id % 12) + 1)", "((id % 27) + 1)")
+
+  private def benchmarkMakeDate(cardinality: Long): Unit = {
+    val benchmark = new Benchmark("make_date()", cardinality, output = output)
+    val args = ymdExprs
+
+    run(benchmark, cardinality, "prepare make_date()", args: _*)
+    val foldableExpr = "make_date(2019, 9, 16)"
+    run(benchmark, cardinality, foldableExpr, foldableExpr)
+    run(
+      benchmark,
+      cardinality,
+      "make_date(*, *, *)",
+      "make_date" + args.mkString("(", ",", ")"))
+
+    benchmark.run()
+  }
+
+  private def benchmarkMakeTimestamp(cardinality: Long): Unit = {
+    val benchmark = new Benchmark("make_timestamp()", cardinality, output = output)
+    val hmExprs = Seq("id % 24", "id % 60")
+    val hmsExprs = hmExprs ++ Seq("cast((id % 60000000) / 1000000.0 as decimal(8, 6))")
+    val args = ymdExprs ++ hmsExprs
+
+    run(
+      benchmark,
+      cardinality,
+      "prepare make_timestamp()",
+      args: _*)
+    var foldableExpr = "make_timestamp(2019, 1, 2, 3, 4, 50.123456)"
+    run(benchmark, cardinality, foldableExpr, foldableExpr)
+    foldableExpr = "make_timestamp(2019, 1, 2, 3, 4, 60.000000)"
+    run(benchmark, cardinality, foldableExpr, foldableExpr)
+    foldableExpr = "make_timestamp(2019, 12, 31, 23, 59, 60.00)"
+    run(benchmark, cardinality, foldableExpr, foldableExpr)
+    run(
+      benchmark,
+      cardinality,
+      "make_timestamp(*, *, *, 3, 4, 50.123456)",
+      s"make_timestamp(${ymdExprs.mkString(",")}, 3, 4, 50.123456)")
+    run(
+      benchmark,
+      cardinality,
+      "make_timestamp(*, *, *, *, *, 0)",
+      s"make_timestamp(" + (ymdExprs ++ hmExprs).mkString(", ") + ", 0)")
+    run(
+      benchmark,
+      cardinality,
+      "make_timestamp(*, *, *, *, *, 60.0)",
+      s"make_timestamp(" + (ymdExprs ++ hmExprs).mkString(", ") + ", 60.0)")
+    run(
+      benchmark,
+      cardinality,
+      "make_timestamp(2019, 1, 2, *, *, *)",
+      s"make_timestamp(2019, 1, 2, ${hmsExprs.mkString(",")})")
+    run(
+      benchmark,
+      cardinality,
+      "make_timestamp(*, *, *, *, *, *)",
+      s"make_timestamp" + args.mkString("(", ", ", ")"))
+
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    benchmarkMakeDate(100000000L)
+    benchmarkMakeTimestamp(1000000L)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
index 96f90f29707d2..4b6da5a02eac3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/NestedSchemaPruningBenchmark.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.benchmark
 
 import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.SaveMode.Overwrite
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -47,7 +48,11 @@ abstract class NestedSchemaPruningBenchmark extends SqlBasedBenchmark {
 
   private def addCase(benchmark: Benchmark, name: String, sql: String): Unit = {
     benchmark.addCase(name) { _ =>
-      spark.sql(sql).write.format("noop").save()
+      spark.sql(sql)
+        .write
+        .format("noop")
+        .mode(Overwrite)
+        .save()
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
index 8b1c422e63a3f..e07921bf3aa74 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.benchmark
 
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.SparkSession
 
 /**
@@ -28,13 +28,16 @@ import org.apache.spark.sql.SparkSession
  * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
  *    Results will be written to "benchmarks/PrimitiveArrayBenchmark-results.txt".
  */
-object PrimitiveArrayBenchmark extends BenchmarkBase {
-  lazy val sparkSession = SparkSession.builder
-    .master("local[1]")
-    .appName("microbenchmark")
-    .config("spark.sql.shuffle.partitions", 1)
-    .config("spark.sql.autoBroadcastJoinThreshold", 1)
-    .getOrCreate()
+object PrimitiveArrayBenchmark extends SqlBasedBenchmark {
+
+  override def getSparkSession: SparkSession = {
+    SparkSession.builder
+      .master("local[1]")
+      .appName("microbenchmark")
+      .config("spark.sql.shuffle.partitions", 1)
+      .config("spark.sql.autoBroadcastJoinThreshold", 1)
+      .getOrCreate()
+  }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
     runBenchmark("Write primitive arrays in dataset") {
@@ -43,11 +46,11 @@ object PrimitiveArrayBenchmark extends BenchmarkBase {
   }
 
   def writeDatasetArray(iters: Int): Unit = {
-    import sparkSession.implicits._
+    import spark.implicits._
 
     val count = 1024 * 1024 * 2
 
-    val sc = sparkSession.sparkContext
+    val sc = spark.sparkContext
     val primitiveIntArray = Array.fill[Int](count)(65535)
     val dsInt = sc.parallelize(Seq(primitiveIntArray), 1).toDS
     dsInt.count  // force to build dataset
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
index 3760539c16841..e265e707b6b84 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
@@ -38,7 +38,8 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 object SortBenchmark extends BenchmarkBase {
 
-  private def referenceKeyPrefixSort(buf: LongArray, lo: Int, hi: Int, refCmp: PrefixComparator) {
+  private def referenceKeyPrefixSort(buf: LongArray, lo: Int, hi: Int,
+      refCmp: PrefixComparator): Unit = {
     val sortBuffer = new LongArray(MemoryBlock.fromLongArray(new Array[Long](buf.size().toInt)))
     new Sorter(new UnsafeSortDataFormat(sortBuffer)).sort(buf, lo, hi,
       (r1: RecordPointerAndKeyPrefix, r2: RecordPointerAndKeyPrefix) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala
index 9cbd6423f667f..04c1b5ade12cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.benchmark
 
 import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.SaveMode.Overwrite
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.expressions.UserDefinedFunction
 import org.apache.spark.sql.functions._
@@ -42,8 +43,12 @@ object UDFBenchmark extends SqlBasedBenchmark {
     val nullableIntCol = when(
       idCol % 2 === 0, idCol.cast(IntegerType)).otherwise(Literal(null, IntegerType))
     val stringCol = idCol.cast(StringType)
-    spark.range(cardinality).select(
-      udf(idCol, nullableIntCol, stringCol)).write.format("noop").save()
+    spark.range(cardinality)
+      .select(udf(idCol, nullableIntCol, stringCol))
+      .write
+      .format("noop")
+      .mode(Overwrite)
+      .save()
   }
 
   private def doRunBenchmarkWithPrimitiveTypes(
@@ -51,7 +56,12 @@ object UDFBenchmark extends SqlBasedBenchmark {
     val idCol = col("id")
     val nullableIntCol = when(
       idCol % 2 === 0, idCol.cast(IntegerType)).otherwise(Literal(null, IntegerType))
-    spark.range(cardinality).select(udf(idCol, nullableIntCol)).write.format("noop").save()
+    spark.range(cardinality)
+      .select(udf(idCol, nullableIntCol))
+      .write
+      .format("noop")
+      .mode(Overwrite)
+      .save()
   }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
@@ -104,16 +114,25 @@ object UDFBenchmark extends SqlBasedBenchmark {
       val benchmark = new Benchmark("UDF identity overhead", cardinality, output = output)
 
       benchmark.addCase(s"Baseline", numIters = 5) { _ =>
-        spark.range(cardinality).select(
-          col("id"), col("id") * 2, col("id") * 3).write.format("noop").save()
+        spark.range(cardinality)
+          .select(col("id"), col("id") * 2, col("id") * 3)
+          .write
+          .format("noop")
+          .mode(Overwrite)
+          .save()
       }
 
       val identityUDF = udf { x: Long => x }
       benchmark.addCase(s"With identity UDF", numIters = 5) { _ =>
-        spark.range(cardinality).select(
-          identityUDF(col("id")),
-          identityUDF(col("id") * 2),
-          identityUDF(col("id") * 3)).write.format("noop").save()
+        spark.range(cardinality)
+          .select(
+            identityUDF(col("id")),
+            identityUDF(col("id") * 2),
+            identityUDF(col("id") * 3))
+          .write
+          .format("noop")
+          .mode(Overwrite)
+          .save()
       }
 
       benchmark.run()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 0fac4dd3e5137..77047f329e105 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -38,7 +38,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSparkSession {
 
   setupTestData()
 
-  private def cachePrimitiveTest(data: DataFrame, dataType: String) {
+  private def cachePrimitiveTest(data: DataFrame, dataType: String): Unit = {
     data.createOrReplaceTempView(s"testData$dataType")
     val storageLevel = MEMORY_ONLY
     val plan = spark.sessionState.executePlan(data.logicalPlan).sparkPlan
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala
index 2d71a42628dfb..192db0e910d03 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/BooleanBitSetSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.types.BooleanType
 class BooleanBitSetSuite extends SparkFunSuite {
   import BooleanBitSet._
 
-  def skeleton(count: Int) {
+  def skeleton(count: Int): Unit = {
     // -------------
     // Tests encoder
     // -------------
@@ -87,7 +87,7 @@ class BooleanBitSetSuite extends SparkFunSuite {
     assert(!decoder.hasNext)
   }
 
-  def skeletonForDecompress(count: Int) {
+  def skeletonForDecompress(count: Int): Unit = {
     val builder = TestCompressibleColumnBuilder(new NoopColumnStats, BOOLEAN, BooleanBitSet)
     val rows = Seq.fill[InternalRow](count)(makeRandomRow(BOOLEAN))
     val values = rows.map(_.getBoolean(0))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala
index 28950b74cf1c8..61e4cc068fa80 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala
@@ -35,7 +35,7 @@ class DictionaryEncodingSuite extends SparkFunSuite {
   def testDictionaryEncoding[T <: AtomicType](
       columnStats: ColumnStats,
       columnType: NativeColumnType[T],
-      testDecompress: Boolean = true) {
+      testDecompress: Boolean = true): Unit = {
 
     val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
 
@@ -49,7 +49,7 @@ class DictionaryEncodingSuite extends SparkFunSuite {
       seq.head +: seq.tail.filterNot(_ == seq.head)
     }
 
-    def skeleton(uniqueValueCount: Int, inputSeq: Seq[Int]) {
+    def skeleton(uniqueValueCount: Int, inputSeq: Seq[Int]): Unit = {
       // -------------
       // Tests encoder
       // -------------
@@ -116,7 +116,7 @@ class DictionaryEncodingSuite extends SparkFunSuite {
       }
     }
 
-    def skeletonForDecompress(uniqueValueCount: Int, inputSeq: Seq[Int]) {
+    def skeletonForDecompress(uniqueValueCount: Int, inputSeq: Seq[Int]): Unit = {
       if (!testDecompress) return
       val builder = TestCompressibleColumnBuilder(columnStats, columnType, DictionaryEncoding)
       val (values, rows) = makeUniqueValuesAndSingleValueRows(columnType, uniqueValueCount)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala
index fb3388452e4e5..294feb72ea802 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/IntegralDeltaSuite.scala
@@ -32,9 +32,9 @@ class IntegralDeltaSuite extends SparkFunSuite {
   def testIntegralDelta[I <: IntegralType](
       columnStats: ColumnStats,
       columnType: NativeColumnType[I],
-      scheme: CompressionScheme) {
+      scheme: CompressionScheme): Unit = {
 
-    def skeleton(input: Seq[I#InternalType]) {
+    def skeleton(input: Seq[I#InternalType]): Unit = {
       // -------------
       // Tests encoder
       // -------------
@@ -112,7 +112,7 @@ class IntegralDeltaSuite extends SparkFunSuite {
       assert(!decoder.hasNext)
     }
 
-    def skeletonForDecompress(input: Seq[I#InternalType]) {
+    def skeletonForDecompress(input: Seq[I#InternalType]): Unit = {
       val builder = TestCompressibleColumnBuilder(columnStats, columnType, scheme)
       val row = new GenericInternalRow(1)
       val nullRow = new GenericInternalRow(1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala
index b6f0b5e6277b4..01dac2d1ad74c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/PassThroughEncodingSuite.scala
@@ -35,11 +35,11 @@ class PassThroughSuite extends SparkFunSuite {
 
   def testPassThrough[T <: AtomicType](
       columnStats: ColumnStats,
-      columnType: NativeColumnType[T]) {
+      columnType: NativeColumnType[T]): Unit = {
 
     val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
 
-    def skeleton(input: Seq[T#InternalType]) {
+    def skeleton(input: Seq[T#InternalType]): Unit = {
       // -------------
       // Tests encoder
       // -------------
@@ -93,7 +93,7 @@ class PassThroughSuite extends SparkFunSuite {
       assert(!decoder.hasNext)
     }
 
-    def skeletonForDecompress(input: Seq[T#InternalType]) {
+    def skeletonForDecompress(input: Seq[T#InternalType]): Unit = {
       val builder = TestCompressibleColumnBuilder(columnStats, columnType, PassThrough)
       val row = new GenericInternalRow(1)
       val nullRow = new GenericInternalRow(1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala
index eb1cdd9bbceff..29dbc13b59c6b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala
@@ -36,11 +36,11 @@ class RunLengthEncodingSuite extends SparkFunSuite {
   def testRunLengthEncoding[T <: AtomicType](
       columnStats: ColumnStats,
       columnType: NativeColumnType[T],
-      testDecompress: Boolean = true) {
+      testDecompress: Boolean = true): Unit = {
 
     val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
 
-    def skeleton(uniqueValueCount: Int, inputRuns: Seq[(Int, Int)]) {
+    def skeleton(uniqueValueCount: Int, inputRuns: Seq[(Int, Int)]): Unit = {
       // -------------
       // Tests encoder
       // -------------
@@ -98,7 +98,7 @@ class RunLengthEncodingSuite extends SparkFunSuite {
       assert(!decoder.hasNext)
     }
 
-    def skeletonForDecompress(uniqueValueCount: Int, inputRuns: Seq[(Int, Int)]) {
+    def skeletonForDecompress(uniqueValueCount: Int, inputRuns: Seq[(Int, Int)]): Unit = {
       if (!testDecompress) return
       val builder = TestCompressibleColumnBuilder(columnStats, columnType, RunLengthEncoding)
       val (values, rows) = makeUniqueValuesAndSingleValueRows(columnType, uniqueValueCount)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 74ef81f7181da..c31ae1e4d83e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -184,6 +184,15 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       containsThesePhrases = Seq("key_without_value"))
   }
 
+  test("alter database set location") {
+    // ALTER (DATABASE|SCHEMA) database_name SET LOCATION
+    val sql1 = "ALTER DATABASE database_name SET LOCATION '/home/user/db'"
+    val parsed1 = parser.parsePlan(sql1)
+
+    val expected1 = AlterDatabaseSetLocationCommand("database_name", "/home/user/db")
+    comparePlans(parsed1, expected1)
+  }
+
   test("describe database") {
     // DESCRIBE DATABASE [EXTENDED] db_name;
     val sql1 = "DESCRIBE DATABASE EXTENDED db_name"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index fd1da2011f28e..70b1db8e5f0d2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.internal.config
 import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchDatabaseException, NoSuchPartitionException, NoSuchTableException, TempTableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.internal.SQLConf
@@ -757,6 +757,29 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
             Row("Description", "") ::
             Row("Location", CatalogUtils.URIToString(location)) ::
             Row("Properties", "((a,a), (b,b), (c,c), (d,d))") :: Nil)
+
+        withTempDir { tmpDir =>
+          if (isUsingHiveMetastore) {
+            val e1 = intercept[AnalysisException] {
+              sql(s"ALTER DATABASE $dbName SET LOCATION '${tmpDir.toURI}'")
+            }
+            assert(e1.getMessage.contains("does not support altering database location"))
+          } else {
+            sql(s"ALTER DATABASE $dbName SET LOCATION '${tmpDir.toURI}'")
+            val uriInCatalog = catalog.getDatabaseMetadata(dbNameWithoutBackTicks).locationUri
+            assert("file" === uriInCatalog.getScheme)
+            assert(new Path(tmpDir.getPath).toUri.getPath === uriInCatalog.getPath)
+          }
+
+          intercept[NoSuchDatabaseException] {
+            sql(s"ALTER DATABASE `db-not-exist` SET LOCATION '${tmpDir.toURI}'")
+          }
+
+          val e3 = intercept[IllegalArgumentException] {
+            sql(s"ALTER DATABASE $dbName SET LOCATION ''")
+          }
+          assert(e3.getMessage.contains("Can not create a Path from an empty string"))
+        }
       } finally {
         catalog.reset()
       }
@@ -1134,7 +1157,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  protected def testRecoverPartitions() {
+  protected def testRecoverPartitions(): Unit = {
     val catalog = spark.sessionState.catalog
     // table to alter does not exist
     intercept[AnalysisException] {
@@ -1372,7 +1395,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       // if (isUsingHiveMetastore) {
       //  assert(storageFormat.properties.get("path") === expected)
       // }
-      assert(storageFormat.locationUri.map(_.getPath) === Some(expected.getPath))
+      assert(storageFormat.locationUri ===
+        Some(makeQualifiedPath(CatalogUtils.URIToString(expected))))
     }
     // set table location
     sql("ALTER TABLE dbx.tab1 SET LOCATION '/path/to/your/lovely/heart'")
@@ -1386,7 +1410,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     verifyLocation(new URI("/swanky/steak/place"))
     // set table partition location without explicitly specifying database
     sql("ALTER TABLE tab1 PARTITION (a='1', b='2') SET LOCATION 'vienna'")
-    verifyLocation(new URI("vienna"), Some(partSpec))
+    val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tab1"))
+    val viennaPartPath = new Path(new Path(table. location), "vienna")
+    verifyLocation(CatalogUtils.stringToURI(viennaPartPath.toString), Some(partSpec))
     // table to alter does not exist
     intercept[AnalysisException] {
       sql("ALTER TABLE dbx.does_not_exist SET LOCATION '/mister/spark'")
@@ -1550,13 +1576,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       "PARTITION (a='2', b='6') LOCATION 'paris' PARTITION (a='3', b='7')")
     assert(catalog.listPartitions(tableIdent).map(_.spec).toSet == Set(part1, part2, part3))
     assert(catalog.getPartition(tableIdent, part1).storage.locationUri.isDefined)
-    val partitionLocation = if (isUsingHiveMetastore) {
-      val tableLocation = catalog.getTableMetadata(tableIdent).storage.locationUri
-      assert(tableLocation.isDefined)
-      makeQualifiedPath(new Path(tableLocation.get.toString, "paris").toString)
-    } else {
-      new URI("paris")
-    }
+
+    val tableLocation = catalog.getTableMetadata(tableIdent).storage.locationUri
+    assert(tableLocation.isDefined)
+    val partitionLocation = makeQualifiedPath(
+      new Path(tableLocation.get.toString, "paris").toString)
 
     assert(catalog.getPartition(tableIdent, part2).storage.locationUri == Option(partitionLocation))
     assert(catalog.getPartition(tableIdent, part3).storage.locationUri.isDefined)
@@ -2138,7 +2162,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         spark.sessionState.catalog.refreshTable(TableIdentifier("t"))
 
         val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
-        assert(table1.location == newDir)
+        assert(table1.location == makeQualifiedPath(newDir.toString))
         assert(!newDirFile.exists)
 
         spark.sql("INSERT INTO TABLE t SELECT 'c', 1")
@@ -2501,6 +2525,13 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         assert(table.location.toString.startsWith("file:/"))
       }
 
+      withTempDir { dir =>
+        assert(!dir.getAbsolutePath.startsWith("file:/"))
+        spark.sql(s"ALTER TABLE t SET LOCATION '$dir'")
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+        assert(table.location.toString.startsWith("file:/"))
+      }
+
       withTempDir { dir =>
         assert(!dir.getAbsolutePath.startsWith("file:/"))
         // The parser does not recognize the backslashes on Windows as they are.
@@ -2519,6 +2550,37 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test("the qualified path of a partition is stored in the catalog") {
+    withTable("t") {
+      withTempDir { dir =>
+        spark.sql(
+          s"""
+             |CREATE TABLE t(a STRING, b STRING)
+             |USING ${dataSource} PARTITIONED BY(b) LOCATION '$dir'
+           """.stripMargin)
+        spark.sql("INSERT INTO TABLE t PARTITION(b=1) SELECT 2")
+        val part = spark.sessionState.catalog.getPartition(TableIdentifier("t"), Map("b" -> "1"))
+        assert(part.storage.locationUri.contains(
+          makeQualifiedPath(new File(dir, "b=1").getAbsolutePath)))
+        assert(part.storage.locationUri.get.toString.startsWith("file:/"))
+      }
+      withTempDir { dir =>
+        spark.sql(s"ALTER TABLE t PARTITION(b=1) SET LOCATION '$dir'")
+
+        val part = spark.sessionState.catalog.getPartition(TableIdentifier("t"), Map("b" -> "1"))
+        assert(part.storage.locationUri.contains(makeQualifiedPath(dir.getAbsolutePath)))
+        assert(part.storage.locationUri.get.toString.startsWith("file:/"))
+      }
+
+      withTempDir { dir =>
+        spark.sql(s"ALTER TABLE t ADD PARTITION(b=2) LOCATION '$dir'")
+        val part = spark.sessionState.catalog.getPartition(TableIdentifier("t"), Map("b" -> "2"))
+        assert(part.storage.locationUri.contains(makeQualifiedPath(dir.getAbsolutePath)))
+        assert(part.storage.locationUri.get.toString.startsWith("file:/"))
+      }
+    }
+  }
+
   protected def testAddColumn(provider: String): Unit = {
     withTable("t1") {
       sql(s"CREATE TABLE t1 (c1 int) USING $provider")
@@ -2599,7 +2661,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       val e = intercept[AnalysisException] {
         sql("ALTER TABLE tmp_v ADD COLUMNS (c3 INT)")
       }
-      assert(e.message.contains("ALTER ADD COLUMNS does not support views"))
+      assert(e.message.contains("'tmp_v' is a view not a table"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index bba1dc0f697a1..104c845bfcc12 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -25,50 +25,77 @@ import org.mockito.Mockito.{mock, when}
 import org.mockito.invocation.InvocationOnMock
 
 import org.apache.spark.sql.{AnalysisException, SaveMode}
-import org.apache.spark.sql.catalog.v2.{CatalogManager, CatalogNotFoundException, Identifier, TableCatalog}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.AnalysisTest
-import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Analyzer, EmptyFunctionRegistry, NoSuchTableException, ResolveCatalogs, ResolveSessionCatalog, UnresolvedV2Relation}
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.plans.logical.{CreateTableAsSelect, CreateV2Table, DropTable, LogicalPlan}
-import org.apache.spark.sql.connector.InMemoryTableCatalog
-import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceResolution}
-import org.apache.spark.sql.internal.SQLConf.DEFAULT_V2_CATALOG
-import org.apache.spark.sql.sources.v2.InMemoryTableProvider
+import org.apache.spark.sql.catalyst.plans.logical.{AlterTable, CreateTableAsSelect, CreateV2Table, DescribeTable, DropTable, LogicalPlan}
+import org.apache.spark.sql.connector.InMemoryTableProvider
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, Table, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.execution.datasources.CreateTable
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructType}
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class PlanResolutionSuite extends AnalysisTest {
   import CatalystSqlParser._
 
   private val v2Format = classOf[InMemoryTableProvider].getName
 
+  private val table: Table = {
+    val t = mock(classOf[Table])
+    when(t.schema()).thenReturn(new StructType().add("i", "int"))
+    t
+  }
+
   private val testCat: TableCatalog = {
-    val newCatalog = new InMemoryTableCatalog
-    newCatalog.initialize("testcat", CaseInsensitiveStringMap.empty())
+    val newCatalog = mock(classOf[TableCatalog])
+    when(newCatalog.loadTable(any())).thenAnswer((invocation: InvocationOnMock) => {
+      invocation.getArgument[Identifier](0).name match {
+        case "tab" =>
+          table
+        case name =>
+          throw new NoSuchTableException(name)
+      }
+    })
+    when(newCatalog.name()).thenReturn("testcat")
     newCatalog
   }
 
-  private val v2SessionCatalog = {
-    val newCatalog = new InMemoryTableCatalog
-    newCatalog.initialize("session", CaseInsensitiveStringMap.empty())
+  private val v2SessionCatalog: TableCatalog = {
+    val newCatalog = mock(classOf[TableCatalog])
+    when(newCatalog.loadTable(any())).thenAnswer((invocation: InvocationOnMock) => {
+      invocation.getArgument[Identifier](0).name match {
+        case "v1Table" =>
+          mock(classOf[V1Table])
+        case "v2Table" =>
+          table
+        case name =>
+          throw new NoSuchTableException(name)
+      }
+    })
+    when(newCatalog.name()).thenReturn("session")
     newCatalog
   }
 
+  private val v1SessionCatalog: SessionCatalog = new SessionCatalog(
+    new InMemoryCatalog,
+    EmptyFunctionRegistry,
+    new SQLConf().copy(SQLConf.CASE_SENSITIVE -> true))
+
   private val catalogManagerWithDefault = {
     val manager = mock(classOf[CatalogManager])
     when(manager.catalog(any())).thenAnswer((invocation: InvocationOnMock) => {
       invocation.getArgument[String](0) match {
         case "testcat" =>
           testCat
-        case "session" =>
-          v2SessionCatalog
         case name =>
           throw new CatalogNotFoundException(s"No such catalog: $name")
       }
     })
     when(manager.defaultCatalog).thenReturn(Some(testCat))
-    when(manager.v2SessionCatalog).thenCallRealMethod()
+    when(manager.v2SessionCatalog).thenReturn(v2SessionCatalog)
+    when(manager.v1SessionCatalog).thenReturn(v1SessionCatalog)
     manager
   }
 
@@ -78,26 +105,30 @@ class PlanResolutionSuite extends AnalysisTest {
       invocation.getArgument[String](0) match {
         case "testcat" =>
           testCat
-        case "session" =>
-          v2SessionCatalog
         case name =>
           throw new CatalogNotFoundException(s"No such catalog: $name")
       }
     })
     when(manager.defaultCatalog).thenReturn(None)
-    when(manager.v2SessionCatalog).thenCallRealMethod()
+    when(manager.v2SessionCatalog).thenReturn(v2SessionCatalog)
+    when(manager.v1SessionCatalog).thenReturn(v1SessionCatalog)
     manager
   }
 
   def parseAndResolve(query: String, withDefault: Boolean = false): LogicalPlan = {
-    val newConf = conf.copy()
-    newConf.setConfString(DEFAULT_V2_CATALOG.key, "testcat")
     val catalogManager = if (withDefault) {
       catalogManagerWithDefault
     } else {
       catalogManagerWithoutDefault
     }
-    DataSourceResolution(newConf, catalogManager).apply(parsePlan(query))
+    val analyzer = new Analyzer(catalogManager, conf)
+    val rules = Seq(
+      new ResolveCatalogs(catalogManager),
+      new ResolveSessionCatalog(catalogManager, conf, _ == Seq("v")),
+      analyzer.ResolveTables)
+    rules.foldLeft(parsePlan(query)) {
+      case (plan, rule) => rule.apply(plan)
+    }
   }
 
   private def parseResolveCompare(query: String, expected: LogicalPlan): Unit =
@@ -647,51 +678,166 @@ class PlanResolutionSuite extends AnalysisTest {
   // ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
   // ALTER TABLE table_name UNSET TBLPROPERTIES [IF EXISTS] ('comment', 'key');
   test("alter table: alter table properties") {
-    val sql1_table = "ALTER TABLE table_name SET TBLPROPERTIES ('test' = 'test', " +
-        "'comment' = 'new_comment')"
-    val sql2_table = "ALTER TABLE table_name UNSET TBLPROPERTIES ('comment', 'test')"
-    val sql3_table = "ALTER TABLE table_name UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')"
+    Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
+      case (tblName, useV1Command) =>
+        val sql1 = s"ALTER TABLE $tblName SET TBLPROPERTIES ('test' = 'test', " +
+          "'comment' = 'new_comment')"
+        val sql2 = s"ALTER TABLE $tblName UNSET TBLPROPERTIES ('comment', 'test')"
+        val sql3 = s"ALTER TABLE $tblName UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')"
+
+        val parsed1 = parseAndResolve(sql1)
+        val parsed2 = parseAndResolve(sql2)
+        val parsed3 = parseAndResolve(sql3)
+
+        val tableIdent = TableIdentifier(tblName, None)
+        if (useV1Command) {
+          val expected1 = AlterTableSetPropertiesCommand(
+            tableIdent, Map("test" -> "test", "comment" -> "new_comment"), isView = false)
+          val expected2 = AlterTableUnsetPropertiesCommand(
+            tableIdent, Seq("comment", "test"), ifExists = false, isView = false)
+          val expected3 = AlterTableUnsetPropertiesCommand(
+            tableIdent, Seq("comment", "test"), ifExists = true, isView = false)
+
+          comparePlans(parsed1, expected1)
+          comparePlans(parsed2, expected2)
+          comparePlans(parsed3, expected3)
+        } else {
+          parsed1 match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(
+                TableChange.setProperty("test", "test"),
+                TableChange.setProperty("comment", "new_comment")))
+            case _ => fail("expect AlterTable")
+          }
+
+          parsed2 match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(
+                TableChange.removeProperty("comment"),
+                TableChange.removeProperty("test")))
+            case _ => fail("expect AlterTable")
+          }
+
+          parsed3 match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(
+                TableChange.removeProperty("comment"),
+                TableChange.removeProperty("test")))
+            case _ => fail("expect AlterTable")
+          }
+        }
+    }
 
-    val parsed1_table = parseAndResolve(sql1_table)
-    val parsed2_table = parseAndResolve(sql2_table)
-    val parsed3_table = parseAndResolve(sql3_table)
+    val sql4 = "ALTER TABLE non_exist SET TBLPROPERTIES ('test' = 'test')"
+    val sql5 = "ALTER TABLE non_exist UNSET TBLPROPERTIES ('test')"
+    val parsed4 = parseAndResolve(sql4)
+    val parsed5 = parseAndResolve(sql5)
 
-    val tableIdent = TableIdentifier("table_name", None)
-    val expected1_table = AlterTableSetPropertiesCommand(
-      tableIdent, Map("test" -> "test", "comment" -> "new_comment"), isView = false)
-    val expected2_table = AlterTableUnsetPropertiesCommand(
-      tableIdent, Seq("comment", "test"), ifExists = false, isView = false)
-    val expected3_table = AlterTableUnsetPropertiesCommand(
-      tableIdent, Seq("comment", "test"), ifExists = true, isView = false)
-
-    comparePlans(parsed1_table, expected1_table)
-    comparePlans(parsed2_table, expected2_table)
-    comparePlans(parsed3_table, expected3_table)
+    // For non-existing tables, we convert it to v2 command with `UnresolvedV2Table`
+    parsed4 match {
+      case AlterTable(_, _, _: UnresolvedV2Relation, _) => // OK
+      case _ => fail("unexpected plan:\n" + parsed4.treeString)
+    }
+    parsed5 match {
+      case AlterTable(_, _, _: UnresolvedV2Relation, _) => // OK
+      case _ => fail("unexpected plan:\n" + parsed5.treeString)
+    }
   }
 
   test("support for other types in TBLPROPERTIES") {
-    val sql =
-      """
-        |ALTER TABLE table_name
-        |SET TBLPROPERTIES ('a' = 1, 'b' = 0.1, 'c' = TRUE)
-      """.stripMargin
-    val parsed = parseAndResolve(sql)
-    val expected = AlterTableSetPropertiesCommand(
-      TableIdentifier("table_name"),
-      Map("a" -> "1", "b" -> "0.1", "c" -> "true"),
-      isView = false)
-
-    comparePlans(parsed, expected)
+    Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
+      case (tblName, useV1Command) =>
+        val sql =
+          s"""
+            |ALTER TABLE $tblName
+            |SET TBLPROPERTIES ('a' = 1, 'b' = 0.1, 'c' = TRUE)
+          """.stripMargin
+        val parsed = parseAndResolve(sql)
+        if (useV1Command) {
+          val expected = AlterTableSetPropertiesCommand(
+            TableIdentifier(tblName),
+            Map("a" -> "1", "b" -> "0.1", "c" -> "true"),
+            isView = false)
+
+          comparePlans(parsed, expected)
+        } else {
+          parsed match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(
+                TableChange.setProperty("a", "1"),
+                TableChange.setProperty("b", "0.1"),
+                TableChange.setProperty("c", "true")))
+            case _ => fail("expect AlterTable")
+          }
+        }
+    }
   }
 
   test("alter table: set location") {
-    val sql1 = "ALTER TABLE table_name SET LOCATION 'new location'"
-    val parsed1 = parseAndResolve(sql1)
-    val tableIdent = TableIdentifier("table_name", None)
-    val expected1 = AlterTableSetLocationCommand(
-      tableIdent,
-      None,
-      "new location")
-    comparePlans(parsed1, expected1)
+    Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
+      case (tblName, useV1Command) =>
+        val sql = s"ALTER TABLE $tblName SET LOCATION 'new location'"
+        val parsed = parseAndResolve(sql)
+        if (useV1Command) {
+          val expected = AlterTableSetLocationCommand(
+            TableIdentifier(tblName, None),
+            None,
+            "new location")
+          comparePlans(parsed, expected)
+        } else {
+          parsed match {
+            case AlterTable(_, _, _: DataSourceV2Relation, changes) =>
+              assert(changes == Seq(TableChange.setProperty("location", "new location")))
+            case _ => fail("expect AlterTable")
+          }
+        }
+    }
+  }
+
+  test("describe table") {
+    Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
+      case (tblName, useV1Command) =>
+        val sql1 = s"DESC TABLE $tblName"
+        val sql2 = s"DESC TABLE EXTENDED $tblName"
+        val parsed1 = parseAndResolve(sql1)
+        val parsed2 = parseAndResolve(sql2)
+        if (useV1Command) {
+          val expected1 = DescribeTableCommand(TableIdentifier(tblName, None), Map.empty, false)
+          val expected2 = DescribeTableCommand(TableIdentifier(tblName, None), Map.empty, true)
+
+          comparePlans(parsed1, expected1)
+          comparePlans(parsed2, expected2)
+        } else {
+          parsed1 match {
+            case DescribeTable(_: DataSourceV2Relation, isExtended) =>
+              assert(!isExtended)
+            case _ => fail("expect DescribeTable")
+          }
+
+          parsed2 match {
+            case DescribeTable(_: DataSourceV2Relation, isExtended) =>
+              assert(isExtended)
+            case _ => fail("expect DescribeTable")
+          }
+        }
+
+        val sql3 = s"DESC TABLE $tblName PARTITION(a=1)"
+        if (useV1Command) {
+          val parsed3 = parseAndResolve(sql3)
+          val expected3 = DescribeTableCommand(
+            TableIdentifier(tblName, None), Map("a" -> "1"), false)
+          comparePlans(parsed3, expected3)
+        } else {
+          val e = intercept[AnalysisException](parseAndResolve(sql3))
+          assert(e.message.contains("DESCRIBE TABLE does not support partition for v2 tables"))
+        }
+    }
+
+    // use v1 command to describe views.
+    val sql4 = "DESC TABLE v"
+    val parsed4 = parseAndResolve(sql4)
+    assert(parsed4.isInstanceOf[DescribeTableCommand])
   }
+
+  // TODO: add tests for more commands.
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
index d5502ba5737c0..5256043289d5e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
@@ -99,13 +99,13 @@ class OrcReadSchemaSuite
 
   override val format: String = "orc"
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     originalConf = spark.conf.get(SQLConf.ORC_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, "false")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     spark.conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, originalConf)
     super.afterAll()
   }
@@ -124,13 +124,13 @@ class VectorizedOrcReadSchemaSuite
 
   override val format: String = "orc"
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     originalConf = spark.conf.get(SQLConf.ORC_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, "true")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     spark.conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, originalConf)
     super.afterAll()
   }
@@ -165,13 +165,13 @@ class ParquetReadSchemaSuite
 
   override val format: String = "parquet"
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     originalConf = spark.conf.get(SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "false")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, originalConf)
     super.afterAll()
   }
@@ -187,13 +187,13 @@ class VectorizedParquetReadSchemaSuite
 
   override val format: String = "parquet"
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     originalConf = spark.conf.get(SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, originalConf)
     super.afterAll()
   }
@@ -209,13 +209,13 @@ class MergedParquetReadSchemaSuite
 
   override val format: String = "parquet"
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     originalConf = spark.conf.get(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED)
     spark.conf.set(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key, "true")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     spark.conf.set(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key, originalConf)
     super.afterAll()
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
index bb3cec579016e..8cb5c12d76c97 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
@@ -269,7 +269,7 @@ abstract class SchemaPruningSuite
     checkAnswer(query, Row("Y.", 1) :: Row("X.", 1) :: Row(null, 2) :: Row(null, 2) :: Nil)
   }
 
-  protected def testSchemaPruning(testName: String)(testThunk: => Unit) {
+  protected def testSchemaPruning(testName: String)(testThunk: => Unit): Unit = {
     test(s"Spark vectorized reader - without partition data column - $testName") {
       withSQLConf(vectorizedReaderEnabledKey -> "true") {
         withContacts(testThunk)
@@ -293,7 +293,7 @@ abstract class SchemaPruningSuite
     }
   }
 
-  private def withContacts(testThunk: => Unit) {
+  private def withContacts(testThunk: => Unit): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
@@ -315,7 +315,7 @@ abstract class SchemaPruningSuite
     }
   }
 
-  private def withContactsWithDataPartitionColumn(testThunk: => Unit) {
+  private def withContactsWithDataPartitionColumn(testThunk: => Unit): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
@@ -381,7 +381,7 @@ abstract class SchemaPruningSuite
   // Tests schema pruning for a query whose column and field names are exactly the same as the table
   // schema's column and field names. N.B. this implies that `testThunk` should pass using either a
   // case-sensitive or case-insensitive query parser
-  private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit) {
+  private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit): Unit = {
     test(s"Case-sensitive parser - mixed-case schema - $testName") {
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
         withMixedCaseData(testThunk)
@@ -392,7 +392,7 @@ abstract class SchemaPruningSuite
 
   // Tests schema pruning for a query whose column and field names may differ in case from the table
   // schema's column and field names
-  private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit) {
+  private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit): Unit = {
     test(s"Case-insensitive parser - mixed-case schema - $testName") {
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
         withMixedCaseData(testThunk)
@@ -401,7 +401,7 @@ abstract class SchemaPruningSuite
   }
 
   // Tests given test function with Spark vectorized reader and non-vectorized reader.
-  private def withMixedCaseData(testThunk: => Unit) {
+  private def withMixedCaseData(testThunk: => Unit): Unit = {
     withDataSourceTable(mixedCaseData, "mixedcase") {
       testThunk
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
index e41e81af508f1..a4cffedaf82d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
@@ -21,6 +21,7 @@ import java.time.{Instant, LocalDate}
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{Column, Dataset, Row}
+import org.apache.spark.sql.SaveMode.Overwrite
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
@@ -42,7 +43,9 @@ import org.apache.spark.sql.types._
 object CSVBenchmark extends SqlBasedBenchmark {
   import spark.implicits._
 
-  private def toNoop(ds: Dataset[_]): Unit = ds.write.format("noop").save()
+  private def toNoop(ds: Dataset[_]): Unit = {
+    ds.write.format("noop").mode(Overwrite).save()
+  }
 
   private def quotedValuesBenchmark(rowsNum: Int, numIters: Int): Unit = {
     val benchmark = new Benchmark(s"Parsing quoted values", rowsNum, output = output)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 5afd019c11a16..835fa4ef166b2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -66,6 +66,7 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
   private val unescapedQuotesFile = "test-data/unescaped-quotes.csv"
   private val valueMalformedFile = "test-data/value-malformed.csv"
   private val badAfterGoodFile = "test-data/bad_after_good.csv"
+  private val malformedRowFile = "test-data/malformedRow.csv"
 
   /** Verifies data and schema. */
   private def verifyCars(
@@ -2109,4 +2110,17 @@ class CSVSuite extends QueryTest with SharedSparkSession with TestCsvData {
         "expect the TextParsingException truncate the error content to be 1000 length.")
     }
   }
+
+  test("SPARK-29101 test count with DROPMALFORMED mode") {
+    Seq((true, 4), (false, 3)).foreach { case (csvColumnPruning, expectedCount) =>
+      withSQLConf(SQLConf.CSV_PARSER_COLUMN_PRUNING.key -> csvColumnPruning.toString) {
+        val count = spark.read
+          .option("header", "true")
+          .option("mode", "DROPMALFORMED")
+          .csv(testFile(malformedRowFile))
+          .count()
+        assert(expectedCount == count)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index f486e603e2552..58e710a7d66b6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -21,6 +21,7 @@ import java.time.{Instant, LocalDate}
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql.SaveMode.Overwrite
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
@@ -39,7 +40,7 @@ import org.apache.spark.sql.types._
  * }}}
  */
 
-object JSONBenchmark extends SqlBasedBenchmark {
+object JsonBenchmark extends SqlBasedBenchmark {
   import spark.implicits._
 
   private def prepareDataInfo(benchmark: Benchmark): Unit = {
@@ -49,7 +50,7 @@ object JSONBenchmark extends SqlBasedBenchmark {
   }
 
   private def run(ds: Dataset[_]): Unit = {
-    ds.write.format("noop").save()
+    ds.write.format("noop").mode(Overwrite).save()
   }
 
   def schemaInferring(rowsNum: Int, numIters: Int): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 2998e673bd45c..3574aa266b35f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -49,7 +49,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
   import testImplicits._
 
   test("Type promotion") {
-    def checkTypePromotion(expected: Any, actual: Any) {
+    def checkTypePromotion(expected: Any, actual: Any): Unit = {
       assert(expected.getClass == actual.getClass,
         s"Failed to promote ${actual.getClass} to ${expected.getClass}.")
       assert(expected == actual,
@@ -128,7 +128,7 @@ class JsonSuite extends QueryTest with SharedSparkSession with TestJsonData {
   }
 
   test("Get compatible type") {
-    def checkDataType(t1: DataType, t2: DataType, expected: DataType) {
+    def checkDataType(t1: DataType, t2: DataType, expected: DataType): Unit = {
       var actual = JsonInferSchema.compatibleType(t1, t2)
       assert(actual == expected,
         s"Expected $expected as the most general data type for $t1 and $t2, found $actual")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
index c5a03cb8ef6d3..b4073bedf5597 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
@@ -32,6 +32,7 @@ class NoopSuite extends SharedSparkSession {
       }
       .write
       .format("noop")
+      .mode("append")
       .save()
     assert(accum.value == numElems)
   }
@@ -54,7 +55,7 @@ class NoopSuite extends SharedSparkSession {
           accum.add(1)
           x
         }
-        .write.format("noop").save()
+        .write.mode("append").format("noop").save()
       assert(accum.value == numElems)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index edc1822887f9f..b8bf4b16fe53c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -194,7 +194,9 @@ abstract class OrcQueryTest extends OrcTest {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("ZLIB" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("ZLIB" === reader.getCompressionKind.name)
+      }
     }
 
     // `compression` overrides `orc.compress`.
@@ -209,7 +211,9 @@ abstract class OrcQueryTest extends OrcTest {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("ZLIB" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("ZLIB" === reader.getCompressionKind.name)
+      }
     }
   }
 
@@ -225,7 +229,9 @@ abstract class OrcQueryTest extends OrcTest {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("ZLIB" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("ZLIB" === reader.getCompressionKind.name)
+      }
     }
 
     withTempPath { file =>
@@ -238,7 +244,9 @@ abstract class OrcQueryTest extends OrcTest {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("SNAPPY" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("SNAPPY" === reader.getCompressionKind.name)
+      }
     }
 
     withTempPath { file =>
@@ -251,7 +259,9 @@ abstract class OrcQueryTest extends OrcTest {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("NONE" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("NONE" === reader.getCompressionKind.name)
+      }
     }
   }
 
@@ -635,7 +645,9 @@ class OrcQuerySuite extends OrcQueryTest with SharedSparkSession {
 
       val orcFilePath = new Path(maybeOrcFile.get.getAbsolutePath)
       val conf = OrcFile.readerOptions(new Configuration())
-      assert("LZO" === OrcFile.createReader(orcFilePath, conf).getCompressionKind.name)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, conf)) { reader =>
+        assert("LZO" === reader.getCompressionKind.name)
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 0d904a09c07e8..55b361d5ac994 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -60,7 +60,7 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
       .createOrReplaceTempView("orc_temp_table")
   }
 
-  protected def testBloomFilterCreation(bloomFilterKind: Kind) {
+  protected def testBloomFilterCreation(bloomFilterKind: Kind): Unit = {
     val tableName = "bloomFilter"
 
     withTempDir { dir =>
@@ -120,7 +120,8 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
     }
   }
 
-  protected def testSelectiveDictionaryEncoding(isSelective: Boolean, isHive23: Boolean = false) {
+  protected def testSelectiveDictionaryEncoding(isSelective: Boolean,
+      isHive23: Boolean = false): Unit = {
     val tableName = "orcTable"
 
     withTempDir { dir =>
@@ -372,9 +373,10 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
 
       val orcFilePath = new Path(partFiles.head.getAbsolutePath)
       val readerOptions = OrcFile.readerOptions(new Configuration())
-      val reader = OrcFile.createReader(orcFilePath, readerOptions)
-      val version = UTF_8.decode(reader.getMetadataValue(SPARK_VERSION_METADATA_KEY)).toString
-      assert(version === SPARK_VERSION_SHORT)
+      Utils.tryWithResource(OrcFile.createReader(orcFilePath, readerOptions)) { reader =>
+        val version = UTF_8.decode(reader.getMetadataValue(SPARK_VERSION_METADATA_KEY)).toString
+        assert(version === SPARK_VERSION_SHORT)
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 026ba5deffdfd..39590b063f0af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -66,7 +66,7 @@ private[parquet] class TestGroupWriteSupport(schema: MessageType) extends WriteS
     new WriteContext(schema, new java.util.HashMap[String, String]())
   }
 
-  override def write(record: Group) {
+  override def write(record: Group): Unit = {
     groupWriter.write(record)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 0a85e3cdeaf1d..138336dc7e333 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -58,7 +58,7 @@ abstract class ParquetPartitionDiscoverySuite
   val defaultPartitionName = ExternalCatalogUtils.DEFAULT_PARTITION_NAME
 
   val timeZoneId = ZoneId.systemDefault()
-  val df = DateFormatter()
+  val df = DateFormatter(timeZoneId)
   val tf = TimestampFormatter(timestampPartitionPattern, timeZoneId)
 
   protected override def beforeAll(): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
index ad0dfadacca15..7f4bbcf97b534 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
@@ -21,10 +21,10 @@ import scala.collection.JavaConverters._
 import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.{QueryTest, SparkSession}
+import org.apache.spark.sql.connector.read.ScanBuilder
+import org.apache.spark.sql.connector.write.WriteBuilder
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.text.TextFileFormat
-import org.apache.spark.sql.sources.v2.reader.ScanBuilder
-import org.apache.spark.sql.sources.v2.writer.WriteBuilder
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index 275bc339b3b5b..6bd6e0d89a677 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -24,17 +24,16 @@ import scala.collection.JavaConverters._
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalog.v2.{Catalogs, Identifier, NamespaceChange, TableChange}
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.connector.catalog.{Catalogs, Identifier, NamespaceChange, TableChange}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class V2SessionCatalogBaseSuite extends SparkFunSuite with SharedSparkSession with BeforeAndAfter {
+abstract class V2SessionCatalogBaseSuite extends SharedSparkSession with BeforeAndAfter {
 
   val emptyProps: util.Map[String, String] = Collections.emptyMap[String, String]
   val schema: StructType = new StructType()
@@ -46,7 +45,7 @@ class V2SessionCatalogBaseSuite extends SparkFunSuite with SharedSparkSession wi
   val testIdent: Identifier = Identifier.of(testNs, "test_table")
 
   def newCatalog(): V2SessionCatalog = {
-    val newCatalog = new V2SessionCatalog(spark.sessionState)
+    val newCatalog = new V2SessionCatalog(spark.sessionState.catalog, spark.sessionState.conf)
     newCatalog.initialize("test", CaseInsensitiveStringMap.empty())
     newCatalog
   }
@@ -54,11 +53,10 @@ class V2SessionCatalogBaseSuite extends SparkFunSuite with SharedSparkSession wi
 
 class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   override protected def beforeAll(): Unit = {
     super.beforeAll()
-    // TODO: when there is a public API for v2 catalogs, use that instead
     val catalog = newCatalog()
     catalog.createNamespace(Array("db"), emptyProps)
     catalog.createNamespace(Array("db2"), emptyProps)
@@ -82,16 +80,6 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
   private val testIdentNew = Identifier.of(testNs, "test_table_new")
 
-  test("Catalogs can load the catalog") {
-    val catalog = newCatalog()
-
-    val conf = new SQLConf
-    conf.setConfString("spark.sql.catalog.test", catalog.getClass.getName)
-
-    val loaded = Catalogs.load("test", conf)
-    assert(loaded.getClass == catalog.getClass)
-  }
-
   test("listTables") {
     val catalog = newCatalog()
     val ident1 = Identifier.of(Array("ns"), "test_table_1")
@@ -763,7 +751,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
 class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
 
-  import org.apache.spark.sql.catalog.v2.CatalogV2Implicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   def checkMetadata(
       expected: scala.collection.Map[String, String],
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
index 7a8da7e7669a4..9a48c1ea0f318 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -19,9 +19,15 @@ package org.apache.spark.sql.execution.debug
 
 import java.io.ByteArrayOutputStream
 
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
+import org.apache.spark.sql.execution.{CodegenSupport, LeafExecNode, WholeStageCodegenExec}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.test.SQLTestData.TestData
+import org.apache.spark.sql.types.StructType
 
 class DebuggingSuite extends SharedSparkSession {
 
@@ -46,7 +52,7 @@ class DebuggingSuite extends SharedSparkSession {
     val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
       .queryExecution.executedPlan)
     assert(res.length == 2)
-    assert(res.forall{ case (subtree, code) =>
+    assert(res.forall{ case (subtree, code, _) =>
       subtree.contains("Range") && code.contains("Object[]")})
   }
 
@@ -90,4 +96,41 @@ class DebuggingSuite extends SharedSparkSession {
         | id LongType: {}
         |""".stripMargin))
   }
+
+  case class DummyCodeGeneratorPlan(useInnerClass: Boolean)
+      extends CodegenSupport with LeafExecNode {
+    override def output: Seq[Attribute] = StructType.fromDDL("d int").toAttributes
+    override def inputRDDs(): Seq[RDD[InternalRow]] = Seq(spark.sparkContext.emptyRDD[InternalRow])
+    override protected def doExecute(): RDD[InternalRow] = sys.error("Not used")
+    override protected def doProduce(ctx: CodegenContext): String = {
+      if (useInnerClass) {
+        val innerClassName = ctx.freshName("innerClass")
+        ctx.addInnerClass(
+          s"""
+             |public class $innerClassName {
+             |  public $innerClassName() {}
+             |}
+           """.stripMargin)
+      }
+      ""
+    }
+  }
+
+  test("Prints bytecode statistics in debugCodegen") {
+    Seq(true, false).foreach { useInnerClass =>
+      val plan = WholeStageCodegenExec(DummyCodeGeneratorPlan(useInnerClass))(codegenStageId = 0)
+
+      val genCodes = codegenStringSeq(plan)
+      assert(genCodes.length == 1)
+      val (_, _, codeStats) = genCodes.head
+      val expectedNumInnerClasses = if (useInnerClass) 1 else 0
+      assert(codeStats.maxMethodCodeSize > 0 && codeStats.maxConstPoolSize > 0 &&
+        codeStats.numInnerClasses == expectedNumInnerClasses)
+
+      val debugCodegenStr = codegenString(plan)
+      assert(debugCodegenStr.contains("maxMethodCodeSize:"))
+      assert(debugCodegenStr.contains("maxConstantPoolSize:"))
+      assert(debugCodegenStr.contains("numInnerClasses:"))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index e7f1c42d7d7c5..ccefb53fc4b39 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -388,7 +388,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils {
       // Assume the execution plan is
       // PhysicalRDD(nodeId = 0)
       data.write.format("json").save(file.getAbsolutePath)
-      sparkContext.listenerBus.waitUntilEmpty(10000)
+      sparkContext.listenerBus.waitUntilEmpty()
       val executionIds = currentExecutionIds().diff(previousExecutionIds)
       assert(executionIds.size === 1)
       val executionId = executionIds.head
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index 8f26c04307adc..5ab9b6f5fc2d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -115,29 +115,31 @@ trait SQLMetricsTestUtils extends SQLTestUtils {
       provider: String,
       dataFormat: String,
       tableName: String): Unit = {
-    withTempPath { dir =>
-      spark.sql(
-        s"""
-           |CREATE TABLE $tableName(a int, b int)
-           |USING $provider
-           |PARTITIONED BY(a)
-           |LOCATION '${dir.toURI}'
-         """.stripMargin)
-      val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
-      assert(table.location == makeQualifiedPath(dir.getAbsolutePath))
-
-      val df = spark.range(start = 0, end = 40, step = 1, numPartitions = 1)
-        .selectExpr("id a", "id b")
-
-      // 40 files, 80 rows, 40 dynamic partitions.
-      verifyWriteDataMetrics(Seq(40, 40, 80)) {
-        df.union(df).repartition(2, $"a")
-          .write
-          .format(dataFormat)
-          .mode("overwrite")
-          .insertInto(tableName)
+    withTable(tableName) {
+      withTempPath { dir =>
+        spark.sql(
+          s"""
+             |CREATE TABLE $tableName(a int, b int)
+             |USING $provider
+             |PARTITIONED BY(a)
+             |LOCATION '${dir.toURI}'
+           """.stripMargin)
+        val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
+        assert(table.location == makeQualifiedPath(dir.getAbsolutePath))
+
+        val df = spark.range(start = 0, end = 40, step = 1, numPartitions = 1)
+          .selectExpr("id a", "id b")
+
+        // 40 files, 80 rows, 40 dynamic partitions.
+        verifyWriteDataMetrics(Seq(40, 40, 80)) {
+          df.union(df).repartition(2, $"a")
+            .write
+            .format(dataFormat)
+            .mode("overwrite")
+            .insertInto(tableName)
+        }
+        assert(TestUtils.recursiveList(dir).count(_.getName.startsWith("part-")) == 40)
       }
-      assert(TestUtils.recursiveList(dir).count(_.getName.startsWith("part-")) == 40)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
index d02014c0dee54..61c9782bd175d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
@@ -21,8 +21,8 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.mockito.Mockito.when
 import org.scalatest.concurrent.Eventually
-import org.scalatest.mockito.MockitoSugar
 import org.scalatest.time.SpanSugar._
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.memory.{TaskMemoryManager, TestMemoryManager}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala
index 1ec9986328429..06077c94b66fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/RowQueueSuite.scala
@@ -33,7 +33,7 @@ class RowQueueSuite extends SparkFunSuite with EncryptionFunSuite {
   test("in-memory queue") {
     val page = MemoryBlock.fromLongArray(new Array[Long](1<<10))
     val queue = new InMemoryRowQueue(page, 1) {
-      override def close() {}
+      override def close(): Unit = {}
     }
     val row = new UnsafeRow(1)
     row.pointTo(new Array[Byte](16), 16)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
index ef88598fcb11b..6440e69e2ec23 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -24,12 +24,12 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ManualClock
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
index e1284ea03267e..f791ab66e86fa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -29,12 +29,12 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.streaming.{StreamingQueryException, StreamTest}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -42,7 +42,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class TextSocketStreamSuite extends StreamTest with SharedSparkSession {
 
-  override def afterEach() {
+  override def afterEach(): Unit = {
     sqlContext.streams.active.foreach(_.stop())
     if (serverThread != null) {
       serverThread.interrupt()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index a84d107f2cbc0..f3500e3abe6ee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -406,7 +406,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
 
     var latestStoreVersion = 0
 
-    def generateStoreVersions() {
+    def generateStoreVersions(): Unit = {
       for (i <- 1 to 20) {
         val store = StateStore.get(storeProviderId, keySchema, valueSchema, None,
           latestStoreVersion, storeConf, hadoopConf)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index 90966d2efec23..88864ccec7523 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -480,7 +480,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     // At the beginning of this test case, there should be no live data in the listener.
     assert(listener.noLiveData())
     spark.sparkContext.parallelize(1 to 10).foreach(i => ())
-    spark.sparkContext.listenerBus.waitUntilEmpty(10000)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
     // Listener should ignore the non-SQL stages, as the stage data are only removed when SQL
     // execution ends, which will not be triggered for non-SQL jobs.
     assert(listener.noLiveData())
@@ -673,7 +673,7 @@ class SQLAppStatusListenerMemoryLeakSuite extends SparkFunSuite {
             case e: SparkException => // This is expected for a failed job
           }
         }
-        sc.listenerBus.waitUntilEmpty(10000)
+        sc.listenerBus.waitUntilEmpty()
         val statusStore = spark.sharedState.statusStore
         assert(statusStore.executionsCount() <= 50)
         assert(statusStore.planGraphCount() <= 50)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index 758780c80b284..c0f4bb4372bbf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -1067,7 +1067,8 @@ class ColumnarBatchSuite extends SparkFunSuite {
     }
   }
 
-  private def compareStruct(fields: Seq[StructField], r1: InternalRow, r2: Row, seed: Long) {
+  private def compareStruct(fields: Seq[StructField], r1: InternalRow, r2: Row,
+      seed: Long): Unit = {
     fields.zipWithIndex.foreach { case (field: StructField, ordinal: Int) =>
       assert(r1.isNullAt(ordinal) == r2.isNullAt(ordinal), "Seed = " + seed)
       if (!r1.isNullAt(ordinal)) {
@@ -1159,7 +1160,7 @@ class ColumnarBatchSuite extends SparkFunSuite {
    * This test generates a random schema data, serializes it to column batches and verifies the
    * results.
    */
-  def testRandomRows(flatSchema: Boolean, numFields: Int) {
+  def testRandomRows(flatSchema: Boolean, numFields: Int): Unit = {
     // TODO: Figure out why StringType doesn't work on jenkins.
     val types = Array(
       BooleanType, ByteType, FloatType, DoubleType, IntegerType, LongType, ShortType,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
index d885348f3774a..c0238069afcc4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
@@ -17,8 +17,13 @@
 
 package org.apache.spark.sql.internal
 
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.execution.{LeafExecNode, QueryExecution, SparkPlan}
 import org.apache.spark.sql.execution.debug.codegenStringSeq
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.test.SQLTestUtils
@@ -95,11 +100,48 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
         val res = codegenStringSeq(spark.range(10).groupBy(col("id") * 2).count()
           .queryExecution.executedPlan)
         assert(res.length == 2)
-        assert(res.forall { case (_, code) =>
+        assert(res.forall { case (_, code, _) =>
           (code.contains("* Codegend pipeline") == flag) &&
             (code.contains("// input[") == flag)
         })
       }
     }
   }
+
+  test("SPARK-28939: propagate SQLConf also in conversions to RDD") {
+    val confs = Seq("spark.sql.a" -> "x", "spark.sql.b" -> "y")
+    val physicalPlan = SQLConfAssertPlan(confs)
+    val dummyQueryExecution = FakeQueryExecution(spark, physicalPlan)
+    withSQLConf(confs: _*) {
+      // Force RDD evaluation to trigger asserts
+      dummyQueryExecution.toRdd.collect()
+    }
+    val dummyQueryExecution1 = FakeQueryExecution(spark, physicalPlan)
+    // Without setting the configs assertions fail
+    val e = intercept[SparkException](dummyQueryExecution1.toRdd.collect())
+    assert(e.getCause.isInstanceOf[NoSuchElementException])
+  }
+}
+
+case class SQLConfAssertPlan(confToCheck: Seq[(String, String)]) extends LeafExecNode {
+  override protected def doExecute(): RDD[InternalRow] = {
+    sqlContext
+      .sparkContext
+      .parallelize(0 until 2, 2)
+      .mapPartitions { it =>
+        val confs = SQLConf.get
+        confToCheck.foreach { case (key, expectedValue) =>
+          assert(confs.getConfString(key) == expectedValue)
+        }
+        it.map(i => InternalRow.fromSeq(Seq(i)))
+      }
+  }
+
+  override def output: Seq[Attribute] = Seq.empty
+}
+
+case class FakeQueryExecution(spark: SparkSession, physicalPlan: SparkPlan)
+    extends QueryExecution(spark, LocalRelation()) {
+  override lazy val sparkPlan: SparkPlan = physicalPlan
+  override lazy val executedPlan: SparkPlan = physicalPlan
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
index 1ece98aa7eb3a..7c10f9950f8eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceTest.scala
@@ -26,7 +26,8 @@ import org.apache.spark.unsafe.types.UTF8String
 
 private[sql] abstract class DataSourceTest extends QueryTest {
 
-  protected def sqlTest(sqlString: String, expectedAnswer: Seq[Row], enableRegex: Boolean = false) {
+  protected def sqlTest(sqlString: String, expectedAnswer: Seq[Row],
+      enableRegex: Boolean = false): Unit = {
     test(sqlString) {
       withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> enableRegex.toString) {
         checkAnswer(spark.sql(sqlString), expectedAnswer)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 0d236a43ece6b..5e853e666be64 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -634,6 +634,60 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
     }
   }
 
+  test("Throw exceptions on inserting out-of-range int value with ANSI casting policy") {
+    withSQLConf(
+      SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.ANSI.toString) {
+      withTable("t") {
+        sql("create table t(b int) using parquet")
+        val outOfRangeValue1 = (Int.MaxValue + 1L).toString
+        var msg = intercept[SparkException] {
+          sql(s"insert into t values($outOfRangeValue1)")
+        }.getCause.getMessage
+        assert(msg.contains(s"Casting $outOfRangeValue1 to int causes overflow"))
+
+        val outOfRangeValue2 = (Int.MinValue - 1L).toString
+        msg = intercept[SparkException] {
+          sql(s"insert into t values($outOfRangeValue2)")
+        }.getCause.getMessage
+        assert(msg.contains(s"Casting $outOfRangeValue2 to int causes overflow"))
+      }
+    }
+  }
+
+  test("Throw exceptions on inserting out-of-range long value with ANSI casting policy") {
+    withSQLConf(
+      SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.ANSI.toString) {
+      withTable("t") {
+        sql("create table t(b long) using parquet")
+        val outOfRangeValue1 = Math.nextUp(Long.MaxValue)
+        var msg = intercept[SparkException] {
+          sql(s"insert into t values(${outOfRangeValue1}D)")
+        }.getCause.getMessage
+        assert(msg.contains(s"Casting $outOfRangeValue1 to long causes overflow"))
+
+        val outOfRangeValue2 = Math.nextDown(Long.MinValue)
+        msg = intercept[SparkException] {
+          sql(s"insert into t values(${outOfRangeValue2}D)")
+        }.getCause.getMessage
+        assert(msg.contains(s"Casting $outOfRangeValue2 to long causes overflow"))
+      }
+    }
+  }
+
+  test("Throw exceptions on inserting out-of-range decimal value with ANSI casting policy") {
+    withSQLConf(
+      SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.ANSI.toString) {
+      withTable("t") {
+        sql("create table t(b decimal(3,2)) using parquet")
+        val outOfRangeValue = "123.45"
+        val msg = intercept[SparkException] {
+          sql(s"insert into t values(${outOfRangeValue})")
+        }.getCause.getMessage
+        assert(msg.contains("cannot be represented as Decimal(3, 2)"))
+      }
+    }
+  }
+
   test("SPARK-24860: dynamic partition overwrite specified per source without catalog table") {
     withTempPath { path =>
       Seq((1, 1), (2, 2)).toDF("i", "part")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
index 87dce376a09dd..9b26a5659df49 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
@@ -123,7 +123,8 @@ class PathOptionSuite extends DataSourceTest with SharedSparkSession {
            |USING ${classOf[TestOptionsSource].getCanonicalName}
            |OPTIONS (PATH '/tmp/path')""".stripMargin)
       sql("ALTER TABLE src SET LOCATION '/tmp/path2'")
-      assert(getPathOption("src").map(makeQualifiedPath) == Some(makeQualifiedPath("/tmp/path2")))
+      assert(getPathOption("src") ==
+        Some(CatalogUtils.URIToString(makeQualifiedPath("/tmp/path2"))))
     }
 
     withTable("src", "src2") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 7d343bb58ea3f..0b885c8429c65 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -22,10 +22,13 @@ import java.nio.file.Files
 import java.util.Locale
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
 
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.JobContext
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.{AnalysisException, DataFrame}
 import org.apache.spark.sql.execution.DataSourceScanExec
@@ -389,7 +392,7 @@ abstract class FileStreamSinkSuite extends StreamTest {
           var bytesWritten: Long = 0L
           try {
             spark.sparkContext.addSparkListener(new SparkListener() {
-              override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+              override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
                 val outputMetrics = taskEnd.taskMetrics.outputMetrics
                 recordsWritten += outputMetrics.recordsWritten
                 bytesWritten += outputMetrics.bytesWritten
@@ -473,6 +476,77 @@ abstract class FileStreamSinkSuite extends StreamTest {
       assert(outputFiles.toList.isEmpty, "Incomplete files should be cleaned up.")
     }
   }
+
+  testQuietly("cleanup complete but invalid output for aborted job") {
+    withSQLConf(("spark.sql.streaming.commitProtocolClass",
+      classOf[PendingCommitFilesTrackingManifestFileCommitProtocol].getCanonicalName)) {
+      withTempDir { tempDir =>
+        val checkpointDir = new File(tempDir, "chk")
+        val outputDir = new File(tempDir, "output @#output")
+        val inputData = MemoryStream[Int]
+        inputData.addData(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+        val q = inputData.toDS()
+          .repartition(10)
+          .map { value =>
+            // we intend task failure after some tasks succeeds
+            if (value == 5) {
+              // put some delay to let other task commits before this task fails
+              Thread.sleep(100)
+              value / 0
+            } else {
+              value
+            }
+          }
+          .writeStream
+          .option("checkpointLocation", checkpointDir.getCanonicalPath)
+          .format("parquet")
+          .start(outputDir.getCanonicalPath)
+
+        intercept[StreamingQueryException] {
+          try {
+            q.processAllAvailable()
+          } finally {
+            q.stop()
+          }
+        }
+
+        import PendingCommitFilesTrackingManifestFileCommitProtocol._
+        val outputFileNames = Files.walk(outputDir.toPath).iterator().asScala
+          .filter(_.toString.endsWith(".parquet"))
+          .map(_.getFileName.toString)
+          .toSet
+        val trackingFileNames = tracking.map(new Path(_).getName).toSet
+
+        // there would be possible to have race condition:
+        // - some tasks complete while abortJob is being called
+        // we can't delete complete files for these tasks (it's OK since this is a best effort)
+        assert(outputFileNames.intersect(trackingFileNames).isEmpty,
+          "abortJob should clean up files reported as successful.")
+      }
+    }
+  }
+}
+
+object PendingCommitFilesTrackingManifestFileCommitProtocol {
+  val tracking: ArrayBuffer[String] = new ArrayBuffer[String]()
+
+  def cleanPendingCommitFiles(): Unit = tracking.clear()
+  def addPendingCommitFiles(paths: Seq[String]): Unit = tracking ++= paths
+}
+
+class PendingCommitFilesTrackingManifestFileCommitProtocol(jobId: String, path: String)
+  extends ManifestFileCommitProtocol(jobId, path) {
+  import PendingCommitFilesTrackingManifestFileCommitProtocol._
+
+  override def setupJob(jobContext: JobContext): Unit = {
+    super.setupJob(jobContext)
+    cleanPendingCommitFiles()
+  }
+
+  override def onTaskCommit(taskCommit: FileCommitProtocol.TaskCommitMessage): Unit = {
+    super.onTaskCommit(taskCommit)
+    addPendingCommitFiles(taskCommit.obj.asInstanceOf[Seq[SinkFileStatus]].map(_.path))
+  }
 }
 
 class FileStreamSinkV1Suite extends FileStreamSinkSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index f59f819c9c108..75a0ae7cfe06a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -178,7 +178,7 @@ abstract class FileStreamSourceTest
   }
 
 
-  protected def withTempDirs(body: (File, File) => Unit) {
+  protected def withTempDirs(body: (File, File) => Unit): Unit = {
     val src = Utils.createTempDir(namePrefix = "streaming.src")
     val tmp = Utils.createTempDir(namePrefix = "streaming.tmp")
     try {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 958d15ba1701d..75ad041ccb801 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -201,7 +201,7 @@ class StreamSuite extends StreamTest {
   }
 
   test("DataFrame reuse") {
-    def assertDF(df: DataFrame) {
+    def assertDF(df: DataFrame): Unit = {
       withTempDir { outputDir =>
         withTempDir { checkpointDir =>
           val query = df.writeStream.format("parquet")
@@ -1175,7 +1175,7 @@ class FakeDefaultSource extends FakeSource {
         ds.toDF("a")
       }
 
-      override def stop() {}
+      override def stop(): Unit = {}
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 7914a713f0baa..e54a5370d351d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -37,12 +37,12 @@ import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder, Ro
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical.AllTuples
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.continuous.{ContinuousExecution, EpochCoordinatorRef, IncrementAndGetEpoch}
 import org.apache.spark.sql.execution.streaming.sources.MemorySink
 import org.apache.spark.sql.execution.streaming.state.StateStore
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.util.{Clock, SystemClock, Utils}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index d96404863a255..4c58cb85c4d36 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -30,9 +30,9 @@ import org.scalatest.concurrent.Waiters.Waiter
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler._
 import org.apache.spark.sql.{Encoder, SparkSession}
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
 import org.apache.spark.sql.streaming.StreamingQueryListener._
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.util.JsonProtocol
@@ -49,7 +49,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     assert(spark.streams.active.isEmpty)
     assert(spark.streams.listListeners().isEmpty)
     // Make sure we don't leak any events to the next test
-    spark.sparkContext.listenerBus.waitUntilEmpty(10000)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
   }
 
   testQuietly("single listener, check trigger events are generated correctly") {
@@ -320,7 +320,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
           q.recentProgress.size > 1 && q.recentProgress.size <= 11
         }
         testStream(input.toDS)(actions: _*)
-        spark.sparkContext.listenerBus.waitUntilEmpty(10000)
+        spark.sparkContext.listenerBus.waitUntilEmpty()
         // 11 is the max value of the possible numbers of events.
         assert(numProgressEvent > 1 && numProgressEvent <= 11)
       } finally {
@@ -343,7 +343,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
         AddData(mem, 1, 2, 3),
         CheckAnswer(1, 2, 3)
       )
-      session.sparkContext.listenerBus.waitUntilEmpty(5000)
+      session.sparkContext.listenerBus.waitUntilEmpty()
     }
 
     def assertEventsCollected(collector: EventCollector): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala
index 7801d968e901d..d538d93b845b4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenersConfSuite.scala
@@ -39,7 +39,7 @@ class StreamingQueryListenersConfSuite extends StreamTest with BeforeAndAfter {
       StopStream
     )
 
-    spark.sparkContext.listenerBus.waitUntilEmpty(5000)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
 
     assert(TestListener.queryStartedEvent != null)
     assert(TestListener.queryTerminatedEvent != null)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 3ad893f871c94..760731d26f051 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -29,19 +29,19 @@ import org.apache.hadoop.fs.Path
 import org.scalactic.TolerantNumerics
 import org.scalatest.BeforeAndAfter
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkException, TestUtils}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
 import org.apache.spark.sql.catalyst.expressions.{Literal, Rand, Randn, Shuffle, Uuid}
+import org.apache.spark.sql.connector.read.InputPartition
+import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2}
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.{MemorySink, TestForeachWriter}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.sources.v2.reader.InputPartition
-import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2}
 import org.apache.spark.sql.streaming.util.{BlockingSource, MockSourceProvider, StreamManualClock}
 import org.apache.spark.sql.types.StructType
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
index bad22590807a7..55b884573f647 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousQueuedDataReaderSuite.scala
@@ -20,15 +20,15 @@ package org.apache.spark.sql.streaming.continuous
 import java.util.concurrent.{ArrayBlockingQueue, BlockingQueue}
 
 import org.mockito.Mockito._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousStream, PartitionOffset}
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.streaming.continuous._
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousPartitionReader, ContinuousStream, PartitionOffset}
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.types.{DataType, IntegerType, StructType}
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
index e3498db4194e8..0e1c9b9c4ba46 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/EpochCoordinatorSuite.scala
@@ -21,16 +21,16 @@ import org.mockito.{ArgumentCaptor, InOrder}
 import org.mockito.ArgumentMatchers.{any, eq => eqTo}
 import org.mockito.Mockito._
 import org.scalatest.BeforeAndAfterEach
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.LocalSparkSession
+import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, PartitionOffset}
+import org.apache.spark.sql.connector.write.WriterCommitMessage
+import org.apache.spark.sql.connector.write.streaming.StreamingWrite
 import org.apache.spark.sql.execution.streaming.continuous._
 import org.apache.spark.sql.internal.SQLConf.CONTINUOUS_STREAMING_EPOCH_BACKLOG_QUEUE_SIZE
-import org.apache.spark.sql.sources.v2.reader.streaming.{ContinuousStream, PartitionOffset}
-import org.apache.spark.sql.sources.v2.writer.WriterCommitMessage
-import org.apache.spark.sql.sources.v2.writer.streaming.StreamingWrite
 import org.apache.spark.sql.test.TestSparkSession
 
 class EpochCoordinatorSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 4db605ee1b238..e9d148c38e6cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -23,16 +23,16 @@ import java.util.Collections
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.{DataFrame, SQLContext}
+import org.apache.spark.sql.connector.catalog.{SessionConfigSupport, SupportsRead, SupportsWrite, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan, ScanBuilder}
+import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReaderFactory, ContinuousStream, MicroBatchStream, Offset, PartitionOffset}
+import org.apache.spark.sql.connector.write.{WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.streaming.{ContinuousTrigger, RateStreamOffset, Sink, StreamingQueryWrapper}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider}
-import org.apache.spark.sql.sources.v2._
-import org.apache.spark.sql.sources.v2.TableCapability._
-import org.apache.spark.sql.sources.v2.reader._
-import org.apache.spark.sql.sources.v2.reader.streaming._
-import org.apache.spark.sql.sources.v2.writer.{WriteBuilder, WriterCommitMessage}
-import org.apache.spark.sql.sources.v2.writer.streaming.{StreamingDataWriterFactory, StreamingWrite}
 import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery, StreamTest, Trigger}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index c630f1497a17e..f9fc540c2ab80 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -92,7 +92,7 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider {
         spark.internalCreateDataFrame(spark.sparkContext.emptyRDD, schema, isStreaming = true)
       }
 
-      override def stop() {}
+      override def stop(): Unit = {}
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
index 67158fb99d13d..c1b29b5130e86 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
@@ -52,7 +52,7 @@ class BlockingSource extends StreamSourceProvider with StreamSinkProvider {
         import spark.implicits._
         Seq[Int]().toDS().toDF()
       }
-      override def stop() {}
+      override def stop(): Unit = {}
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index b98626a34cc29..cef0e5ab47568 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -277,7 +277,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
         .format(classOf[NoopDataSource].getName)
         .mode(SaveMode.Append)
         .save()
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(plan.isInstanceOf[AppendData])
 
       // overwrite mode creates `OverwriteByExpression`
@@ -285,22 +285,24 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
         .format(classOf[NoopDataSource].getName)
         .mode(SaveMode.Overwrite)
         .save()
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(plan.isInstanceOf[OverwriteByExpression])
 
       // By default the save mode is `ErrorIfExists` for data source v2.
-      spark.range(10).write
-        .format(classOf[NoopDataSource].getName)
-        .save()
-      sparkContext.listenerBus.waitUntilEmpty(1000)
-      assert(plan.isInstanceOf[AppendData])
+      val e = intercept[AnalysisException] {
+        spark.range(10).write
+          .format(classOf[NoopDataSource].getName)
+          .save()
+      }
+      assert(e.getMessage.contains("ErrorIfExists"))
 
-      spark.range(10).write
-        .format(classOf[NoopDataSource].getName)
-        .mode("default")
-        .save()
-      sparkContext.listenerBus.waitUntilEmpty(1000)
-      assert(plan.isInstanceOf[AppendData])
+      val e2 = intercept[AnalysisException] {
+        spark.range(10).write
+          .format(classOf[NoopDataSource].getName)
+          .mode("default")
+          .save()
+      }
+      assert(e2.getMessage.contains("ErrorIfExists"))
     } finally {
       spark.listenerManager.unregister(listener)
     }
@@ -1058,7 +1060,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
           checkDatasetUnorderly(
             spark.read.parquet(dir.getCanonicalPath).as[(Long, Long)],
             0L -> 0L, 1L -> 1L, 2L -> 2L)
-          sparkContext.listenerBus.waitUntilEmpty(10000)
+          sparkContext.listenerBus.waitUntilEmpty()
           assert(jobDescriptions.asScala.toList.exists(
             _.contains("Listing leaf files and directories for 3 paths")))
         } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
index a8e1a44f3d5d2..e96f05384c879 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
@@ -22,7 +22,8 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark._
 import org.apache.spark.sql.{functions, AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, InsertIntoTable, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.sql.InsertIntoStatement
 import org.apache.spark.sql.execution.{QueryExecution, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.datasources.{CreateTable, InsertIntoHadoopFsRelationCommand}
 import org.apache.spark.sql.execution.datasources.json.JsonFileFormat
@@ -48,7 +49,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
     df.select("i").collect()
     df.filter($"i" > 0).count()
 
-    sparkContext.listenerBus.waitUntilEmpty(1000)
+    sparkContext.listenerBus.waitUntilEmpty()
     assert(metrics.length == 2)
 
     assert(metrics(0)._1 == "collect")
@@ -79,7 +80,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
 
     val e = intercept[SparkException](df.select(errorUdf($"i")).collect())
 
-    sparkContext.listenerBus.waitUntilEmpty(1000)
+    sparkContext.listenerBus.waitUntilEmpty()
     assert(metrics.length == 1)
     assert(metrics(0)._1 == "collect")
     assert(metrics(0)._2.analyzed.isInstanceOf[Project])
@@ -109,12 +110,12 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
     df.collect()
     // Wait for the first `collect` to be caught by our listener. Otherwise the next `collect` will
     // reset the plan metrics.
-    sparkContext.listenerBus.waitUntilEmpty(1000)
+    sparkContext.listenerBus.waitUntilEmpty()
     df.collect()
 
     Seq(1 -> "a", 2 -> "a").toDF("i", "j").groupBy("i").count().collect()
 
-    sparkContext.listenerBus.waitUntilEmpty(1000)
+    sparkContext.listenerBus.waitUntilEmpty()
     assert(metrics.length == 3)
     assert(metrics(0) === 1)
     assert(metrics(1) === 1)
@@ -162,7 +163,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
 
     // For this simple case, the peakExecutionMemory of a stage should be the data size of the
     // aggregate operator, as we only have one memory consuming operator per stage.
-    sparkContext.listenerBus.waitUntilEmpty(1000)
+    sparkContext.listenerBus.waitUntilEmpty()
     assert(metrics.length == 2)
     assert(metrics(0) == topAggDataSize)
     assert(metrics(1) == bottomAggDataSize)
@@ -186,7 +187,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
 
     withTempPath { path =>
       spark.range(10).write.format("json").save(path.getCanonicalPath)
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(commands.length == 1)
       assert(commands.head._1 == "save")
       assert(commands.head._2.isInstanceOf[InsertIntoHadoopFsRelationCommand])
@@ -197,18 +198,18 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
     withTable("tab") {
       sql("CREATE TABLE tab(i long) using parquet") // adds commands(1) via onSuccess
       spark.range(10).write.insertInto("tab")
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(commands.length == 3)
       assert(commands(2)._1 == "insertInto")
-      assert(commands(2)._2.isInstanceOf[InsertIntoTable])
-      assert(commands(2)._2.asInstanceOf[InsertIntoTable].table
+      assert(commands(2)._2.isInstanceOf[InsertIntoStatement])
+      assert(commands(2)._2.asInstanceOf[InsertIntoStatement].table
         .asInstanceOf[UnresolvedRelation].multipartIdentifier == Seq("tab"))
     }
     // exiting withTable adds commands(3) via onSuccess (drops tab)
 
     withTable("tab") {
       spark.range(10).select($"id", $"id" % 5 as "p").write.partitionBy("p").saveAsTable("tab")
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(commands.length == 5)
       assert(commands(4)._1 == "saveAsTable")
       assert(commands(4)._2.isInstanceOf[CreateTable])
@@ -220,7 +221,7 @@ class DataFrameCallbackSuite extends QueryTest with SharedSparkSession {
       val e = intercept[AnalysisException] {
         spark.range(10).select($"id", $"id").write.insertInto("tab")
       }
-      sparkContext.listenerBus.waitUntilEmpty(1000)
+      sparkContext.listenerBus.waitUntilEmpty()
       assert(errors.length == 1)
       assert(errors.head._1 == "insertInto")
       assert(errors.head._2 == e)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala
index 79819e7655414..2fd6cb220ea3f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala
@@ -34,13 +34,13 @@ class ExecutionListenerManagerSuite extends SparkFunSuite with LocalSparkSession
     spark = SparkSession.builder().master("local").appName("test").config(conf).getOrCreate()
 
     spark.sql("select 1").collect()
-    spark.sparkContext.listenerBus.waitUntilEmpty(1000)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
     assert(INSTANCE_COUNT.get() === 1)
     assert(CALLBACK_COUNT.get() === 1)
 
     val cloned = spark.cloneSession()
     cloned.sql("select 1").collect()
-    spark.sparkContext.listenerBus.waitUntilEmpty(1000)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
     assert(INSTANCE_COUNT.get() === 1)
     assert(CALLBACK_COUNT.get() === 2)
   }
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 5b1352adddd89..9dd927084298a 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -77,15 +77,6 @@
       <groupId>${hive.group}</groupId>
       <artifactId>hive-beeline</artifactId>
     </dependency>
-    <!-- Explicit listing hive-contrib and hive-hcatalog-core. Otherwise the maven test fails. -->
-    <dependency>
-      <groupId>${hive.group}</groupId>
-      <artifactId>hive-contrib</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${hive.group}.hcatalog</groupId>
-      <artifactId>hive-hcatalog-core</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
index 36d4ac095e10c..9517a599be633 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -72,7 +72,7 @@ object HiveThriftServer2 extends Logging {
     server
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // If the arguments contains "-h" or "--help", print out the usage and exit.
     if (args.contains("-h") || args.contains("--help")) {
       HiveServer2.main(args)
@@ -303,7 +303,7 @@ private[hive] class HiveThriftServer2(sqlContext: SQLContext)
   // started, and then once only.
   private val started = new AtomicBoolean(false)
 
-  override def init(hiveConf: HiveConf) {
+  override def init(hiveConf: HiveConf): Unit = {
     val sparkSqlCliService = new SparkSQLCLIService(this, sqlContext)
     setSuperField(this, "cliService", sparkSqlCliService)
     addService(sparkSqlCliService)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala
index 599294dfbb7d7..a4024be67ac9c 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ReflectionUtils.scala
@@ -18,11 +18,11 @@
 package org.apache.spark.sql.hive.thriftserver
 
 private[hive] object ReflectionUtils {
-  def setSuperField(obj : Object, fieldName: String, fieldValue: Object) {
+  def setSuperField(obj : Object, fieldName: String, fieldValue: Object): Unit = {
     setAncestorField(obj, 1, fieldName, fieldValue)
   }
 
-  def setAncestorField(obj: AnyRef, level: Int, fieldName: String, fieldValue: AnyRef) {
+  def setAncestorField(obj: AnyRef, level: Int, fieldName: String, fieldValue: AnyRef): Unit = {
     val ancestor = Iterator.iterate[Class[_]](obj.getClass)(_.getSuperclass).drop(level).next()
     val field = ancestor.getDeclaredField(fieldName)
     field.setAccessible(true)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 69e85484ccf8e..9ca6c39d016ba 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -39,6 +39,7 @@ import org.apache.spark.sql.execution.HiveResult
 import org.apache.spark.sql.execution.command.SetCommand
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.{Utils => SparkUtils}
 
 private[hive] class SparkExecuteStatementOperation(
@@ -77,7 +78,7 @@ private[hive] class SparkExecuteStatementOperation(
     HiveThriftServer2.listener.onOperationClosed(statementId)
   }
 
-  def addNonNullColumnValue(from: SparkRow, to: ArrayBuffer[Any], ordinal: Int) {
+  def addNonNullColumnValue(from: SparkRow, to: ArrayBuffer[Any], ordinal: Int): Unit = {
     dataTypes(ordinal) match {
       case StringType =>
         to += from.getString(ordinal)
@@ -103,6 +104,8 @@ private[hive] class SparkExecuteStatementOperation(
         to += from.getAs[Timestamp](ordinal)
       case BinaryType =>
         to += from.getAs[Array[Byte]](ordinal)
+      case CalendarIntervalType =>
+        to += HiveResult.toHiveString((from.getAs[CalendarInterval](ordinal), CalendarIntervalType))
       case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] =>
         val hiveString = HiveResult.toHiveString((from.get(ordinal), dataTypes(ordinal)))
         to += hiveString
@@ -264,6 +267,13 @@ private[hive] class SparkExecuteStatementOperation(
       // Actually do need to catch Throwable as some failures don't inherit from Exception and
       // HiveServer will silently swallow them.
       case e: Throwable =>
+        // When cancel() or close() is called very quickly after the query is started,
+        // then they may both call cleanup() before Spark Jobs are started. But before background
+        // task interrupted, it may have start some spark job, so we need to cancel again to
+        // make sure job was cancelled when background thread was interrupted
+        if (statementId != null) {
+          sqlContext.sparkContext.cancelJobGroup(statementId)
+        }
         val currentState = getStatus().getState()
         if (currentState.isTerminal) {
           // This may happen if the execution was cancelled, and then closed from another thread.
@@ -300,7 +310,7 @@ private[hive] class SparkExecuteStatementOperation(
     }
   }
 
-  private def cleanup(state: OperationState) {
+  private def cleanup(state: OperationState): Unit = {
     setState(state)
     if (runInBackground) {
       val backgroundHandle = getBackgroundHandle()
@@ -331,7 +341,11 @@ private[hive] class SparkExecuteStatementOperation(
 object SparkExecuteStatementOperation {
   def getTableSchema(structType: StructType): TableSchema = {
     val schema = structType.map { field =>
-      val attrTypeString = if (field.dataType == NullType) "void" else field.dataType.catalogString
+      val attrTypeString = field.dataType match {
+        case NullType => "void"
+        case CalendarIntervalType => StringType.catalogString
+        case other => other.catalogString
+      }
       new FieldSchema(field.name, attrTypeString, field.getComment.getOrElse(""))
     }
     new TableSchema(schema.asJava)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
new file mode 100644
index 0000000000000..7a6a8c59b7216
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.util.UUID
+
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType
+import org.apache.hive.service.cli.{HiveSQLException, OperationState}
+import org.apache.hive.service.cli.operation.GetTypeInfoOperation
+import org.apache.hive.service.cli.session.HiveSession
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.util.{Utils => SparkUtils}
+
+/**
+ * Spark's own GetTypeInfoOperation
+ *
+ * @param sqlContext    SQLContext to use
+ * @param parentSession a HiveSession from SessionManager
+ */
+private[hive] class SparkGetTypeInfoOperation(
+    sqlContext: SQLContext,
+    parentSession: HiveSession)
+  extends GetTypeInfoOperation(parentSession) with Logging {
+
+  private var statementId: String = _
+
+  override def close(): Unit = {
+    super.close()
+    HiveThriftServer2.listener.onOperationClosed(statementId)
+  }
+
+  override def runInternal(): Unit = {
+    statementId = UUID.randomUUID().toString
+    val logMsg = "Listing type info"
+    logInfo(s"$logMsg with $statementId")
+    setState(OperationState.RUNNING)
+    // Always use the latest class loader provided by executionHive's state.
+    val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
+    Thread.currentThread().setContextClassLoader(executionHiveClassLoader)
+
+    if (isAuthV2Enabled) {
+      authorizeMetaGets(HiveOperationType.GET_TYPEINFO, null)
+    }
+
+    HiveThriftServer2.listener.onStatementStart(
+      statementId,
+      parentSession.getSessionHandle.getSessionId.toString,
+      logMsg,
+      statementId,
+      parentSession.getUsername)
+
+    try {
+      ThriftserverShimUtils.supportedType().foreach(typeInfo => {
+        val rowData = Array[AnyRef](
+          typeInfo.getName, // TYPE_NAME
+          typeInfo.toJavaSQLType.asInstanceOf[AnyRef], // DATA_TYPE
+          typeInfo.getMaxPrecision.asInstanceOf[AnyRef], // PRECISION
+          typeInfo.getLiteralPrefix, // LITERAL_PREFIX
+          typeInfo.getLiteralSuffix, // LITERAL_SUFFIX
+          typeInfo.getCreateParams, // CREATE_PARAMS
+          typeInfo.getNullable.asInstanceOf[AnyRef], // NULLABLE
+          typeInfo.isCaseSensitive.asInstanceOf[AnyRef], // CASE_SENSITIVE
+          typeInfo.getSearchable.asInstanceOf[AnyRef], // SEARCHABLE
+          typeInfo.isUnsignedAttribute.asInstanceOf[AnyRef], // UNSIGNED_ATTRIBUTE
+          typeInfo.isFixedPrecScale.asInstanceOf[AnyRef], // FIXED_PREC_SCALE
+          typeInfo.isAutoIncrement.asInstanceOf[AnyRef], // AUTO_INCREMENT
+          typeInfo.getLocalizedName, // LOCAL_TYPE_NAME
+          typeInfo.getMinimumScale.asInstanceOf[AnyRef], // MINIMUM_SCALE
+          typeInfo.getMaximumScale.asInstanceOf[AnyRef], // MAXIMUM_SCALE
+          null, // SQL_DATA_TYPE, unused
+          null, // SQL_DATETIME_SUB, unused
+          typeInfo.getNumPrecRadix // NUM_PREC_RADIX
+        )
+        rowSet.addRow(rowData)
+      })
+      setState(OperationState.FINISHED)
+    } catch {
+      case e: HiveSQLException =>
+        setState(OperationState.ERROR)
+        HiveThriftServer2.listener.onStatementError(
+          statementId, e.getMessage, SparkUtils.exceptionString(e))
+        throw e
+    }
+    HiveThriftServer2.listener.onStatementFinish(statementId)
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index b9614d49eadbd..e3efa2d3ae8c9 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -63,7 +63,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
    * a signal handler will invoke this registered callback if a Ctrl+C signal is detected while
    * a command is being processed by the current thread.
    */
-  def installSignalHandler() {
+  def installSignalHandler(): Unit = {
     HiveInterruptUtils.add(() => {
       // Handle remote execution mode
       if (SparkSQLEnv.sparkContext != null) {
@@ -77,7 +77,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     })
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     val oproc = new OptionsProcessor()
     if (!oproc.process_stage1(args)) {
       System.exit(1)
@@ -111,6 +111,11 @@ private[hive] object SparkSQLCLIDriver extends Logging {
 
     // Set all properties specified via command line.
     val conf: HiveConf = sessionState.getConf
+    // Hive 2.0.0 onwards HiveConf.getClassLoader returns the UDFClassLoader (created by Hive).
+    // Because of this spark cannot find the jars as class loader got changed
+    // Hive changed the class loader because of HIVE-11878, so it is required to use old
+    // classLoader as sparks loaded all the jars in this classLoader
+    conf.setClassLoader(Thread.currentThread().getContextClassLoader)
     sessionState.cmdProperties.entrySet().asScala.foreach { item =>
       val key = item.getKey.toString
       val value = item.getValue.toString
@@ -133,20 +138,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     // Clean up after we exit
     ShutdownHookManager.addShutdownHook { () => SparkSQLEnv.stop() }
 
-    val remoteMode = isRemoteMode(sessionState)
-    // "-h" option has been passed, so connect to Hive thrift server.
-    if (!remoteMode) {
-      // Hadoop-20 and above - we need to augment classpath using hiveconf
-      // components.
-      // See also: code in ExecDriver.java
-      var loader = conf.getClassLoader
-      val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS)
-      if (StringUtils.isNotBlank(auxJars)) {
-        loader = ThriftserverShimUtils.addToClassPath(loader, StringUtils.split(auxJars, ","))
-      }
-      conf.setClassLoader(loader)
-      Thread.currentThread().setContextClassLoader(loader)
-    } else {
+    if (isRemoteMode(sessionState)) {
       // Hive 1.2 + not supported in CLI
       throw new RuntimeException("Remote operations not supported")
     }
@@ -164,6 +156,22 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     val cli = new SparkSQLCLIDriver
     cli.setHiveVariables(oproc.getHiveVariables)
 
+    // In SparkSQL CLI, we may want to use jars augmented by hiveconf
+    // hive.aux.jars.path, here we add jars augmented by hiveconf to
+    // Spark's SessionResourceLoader to obtain these jars.
+    val auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS)
+    if (StringUtils.isNotBlank(auxJars)) {
+      val resourceLoader = SparkSQLEnv.sqlContext.sessionState.resourceLoader
+      StringUtils.split(auxJars, ",").foreach(resourceLoader.addJar(_))
+    }
+
+    // The class loader of CliSessionState's conf is current main thread's class loader
+    // used to load jars passed by --jars. One class loader used by AddJarCommand is
+    // sharedState.jarClassLoader which contain jar path passed by --jars in main thread.
+    // We set CliSessionState's conf class loader to sharedState.jarClassLoader.
+    // Thus we can load all jars passed by --jars and AddJarCommand.
+    sessionState.getConf.setClassLoader(SparkSQLEnv.sqlContext.sharedState.jarClassLoader)
+
     // TODO work around for set the log output to console, because the HiveContext
     // will set the output into an invalid buffer.
     sessionState.in = System.in
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
index c32d908ad1bba..1644ecb2453be 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -43,7 +43,7 @@ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLC
   extends CLIService(hiveServer)
   with ReflectedCompositeService {
 
-  override def init(hiveConf: HiveConf) {
+  override def init(hiveConf: HiveConf): Unit = {
     setSuperField(this, "hiveConf", hiveConf)
 
     val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext)
@@ -105,7 +105,7 @@ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLC
 }
 
 private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
-  def initCompositeService(hiveConf: HiveConf) {
+  def initCompositeService(hiveConf: HiveConf): Unit = {
     // Emulating `CompositeService.init(hiveConf)`
     val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList")
     serviceList.asScala.foreach(_.init(hiveConf))
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
index 960fdd11db15d..362ac362e9718 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
@@ -94,7 +94,7 @@ private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlCont
 
   override def getSchema: Schema = tableSchema
 
-  override def destroy() {
+  override def destroy(): Unit = {
     super.destroy()
     hiveResponse = null
     tableSchema = null
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
index 674da18ca1803..2fda9d0a4f60f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
@@ -33,7 +33,7 @@ private[hive] object SparkSQLEnv extends Logging {
   var sqlContext: SQLContext = _
   var sparkContext: SparkContext = _
 
-  def init() {
+  def init(): Unit = {
     if (sqlContext == null) {
       val sparkConf = new SparkConf(loadDefaults = true)
       // If user doesn't specify the appName, we want to get [SparkSQL::localHostName] instead of
@@ -60,7 +60,7 @@ private[hive] object SparkSQLEnv extends Logging {
   }
 
   /** Cleans up and shuts down the Spark SQL environments. */
-  def stop() {
+  def stop(): Unit = {
     logDebug("Shutting down Spark SQL Environment")
     // Stop the SparkContext
     if (SparkSQLEnv.sparkContext != null) {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
index 13055e0ae1394..c4248bfde38cc 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala
@@ -38,7 +38,7 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
 
   private lazy val sparkSqlOperationManager = new SparkSQLOperationManager()
 
-  override def init(hiveConf: HiveConf) {
+  override def init(hiveConf: HiveConf): Unit = {
     setSuperField(this, "operationManager", sparkSqlOperationManager)
     super.init(hiveConf)
   }
@@ -63,6 +63,9 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
       sqlContext.newSession()
     }
     ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion)
+    val hiveSessionState = session.getSessionState
+    setConfMap(ctx, hiveSessionState.getOverriddenConfigurations)
+    setConfMap(ctx, hiveSessionState.getHiveVariables)
     if (sessionConf != null && sessionConf.containsKey("use:database")) {
       ctx.sql(s"use ${sessionConf.get("use:database")}")
     }
@@ -70,10 +73,18 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext:
     sessionHandle
   }
 
-  override def closeSession(sessionHandle: SessionHandle) {
+  override def closeSession(sessionHandle: SessionHandle): Unit = {
     HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString)
     super.closeSession(sessionHandle)
     sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle)
     sparkSqlOperationManager.sessionToContexts.remove(sessionHandle)
   }
+
+  def setConfMap(conf: SQLContext, confMap: java.util.Map[String, String]): Unit = {
+    val iterator = confMap.entrySet().iterator()
+    while (iterator.hasNext) {
+      val kv = iterator.next()
+      conf.setConf(kv.getKey, kv.getValue)
+    }
+  }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
index 35f92547e7815..3396560f43502 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala
@@ -28,7 +28,6 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.thriftserver._
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Executes queries using Spark SQL, and maintains a list of handles to active queries.
@@ -51,9 +50,6 @@ private[thriftserver] class SparkSQLOperationManager()
     require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" +
       s" initialized or had already closed.")
     val conf = sqlContext.sessionState.conf
-    val hiveSessionState = parentSession.getSessionState
-    setConfMap(conf, hiveSessionState.getOverriddenConfigurations)
-    setConfMap(conf, hiveSessionState.getHiveVariables)
     val runInBackground = async && conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC)
     val operation = new SparkExecuteStatementOperation(parentSession, statement, confOverlay,
       runInBackground)(sqlContext, sessionToActivePool)
@@ -145,11 +141,14 @@ private[thriftserver] class SparkSQLOperationManager()
     operation
   }
 
-  def setConfMap(conf: SQLConf, confMap: java.util.Map[String, String]): Unit = {
-    val iterator = confMap.entrySet().iterator()
-    while (iterator.hasNext) {
-      val kv = iterator.next()
-      conf.setConfString(kv.getKey, kv.getValue)
-    }
+  override def newGetTypeInfoOperation(
+       parentSession: HiveSession): GetTypeInfoOperation = synchronized {
+    val sqlContext = sessionToContexts.get(parentSession.getSessionHandle)
+    require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" +
+      " initialized or had already closed.")
+    val operation = new SparkGetTypeInfoOperation(sqlContext, parentSession)
+    handleToOperation.put(operation.getHandle, operation)
+    logDebug(s"Created GetTypeInfoOperation with session=$parentSession.")
+    operation
   }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
index 261e8fc912eb9..4056be4769d21 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala
@@ -26,6 +26,7 @@ import org.apache.commons.text.StringEscapeUtils
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.{ExecutionInfo, ExecutionState, SessionInfo}
+import org.apache.spark.sql.hive.thriftserver.ui.ToolTips._
 import org.apache.spark.ui._
 import org.apache.spark.ui.UIUtils._
 
@@ -72,6 +73,10 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
     val table = if (numStatement > 0) {
       val headerRow = Seq("User", "JobID", "GroupID", "Start Time", "Finish Time", "Close Time",
         "Execution Time", "Duration", "Statement", "State", "Detail")
+      val tooltips = Seq(None, None, None, None, Some(THRIFT_SERVER_FINISH_TIME),
+        Some(THRIFT_SERVER_CLOSE_TIME), Some(THRIFT_SERVER_EXECUTION),
+        Some(THRIFT_SERVER_DURATION), None, None, None)
+      assert(headerRow.length == tooltips.length)
       val dataRows = listener.getExecutionList.sortBy(_.startTimestamp).reverse
 
       def generateDataRow(info: ExecutionInfo): Seq[Node] = {
@@ -91,8 +96,10 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
           <td>{formatDate(info.startTimestamp)}</td>
           <td>{if (info.finishTimestamp > 0) formatDate(info.finishTimestamp)}</td>
           <td>{if (info.closeTimestamp > 0) formatDate(info.closeTimestamp)}</td>
-          <td>{formatDurationOption(Some(info.totalTime(info.finishTimestamp)))}</td>
-          <td>{formatDurationOption(Some(info.totalTime(info.closeTimestamp)))}</td>
+          <td sorttable_customkey={info.totalTime(info.finishTimestamp).toString}>
+            {formatDurationOption(Some(info.totalTime(info.finishTimestamp)))} </td>
+          <td sorttable_customkey={info.totalTime(info.closeTimestamp).toString}>
+            {formatDurationOption(Some(info.totalTime(info.closeTimestamp)))} </td>
           <td>{info.statement}</td>
           <td>{info.state}</td>
           {errorMessageCell(detail)}
@@ -100,7 +107,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
       }
 
       Some(UIUtils.listingTable(headerRow, generateDataRow,
-        dataRows, false, None, Seq(null), false))
+        dataRows, false, None, Seq(null), false, tooltipHeaders = tooltips))
     } else {
       None
     }
@@ -157,7 +164,8 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends WebUIPage(""
           <td> <a href={sessionLink}> {session.sessionId} </a> </td>
           <td> {formatDate(session.startTimestamp)} </td>
           <td> {if (session.finishTimestamp > 0) formatDate(session.finishTimestamp)} </td>
-          <td> {formatDurationOption(Some(session.totalTime))} </td>
+          <td sorttable_customkey={session.totalTime.toString}>
+            {formatDurationOption(Some(session.totalTime))} </td>
           <td> {session.totalExecution.toString} </td>
         </tr>
       }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
index 81df1304085e8..0aa0a2b8335d8 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
@@ -26,6 +26,7 @@ import org.apache.commons.text.StringEscapeUtils
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.{ExecutionInfo, ExecutionState}
+import org.apache.spark.sql.hive.thriftserver.ui.ToolTips._
 import org.apache.spark.ui._
 import org.apache.spark.ui.UIUtils._
 
@@ -81,6 +82,10 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
     val table = if (numStatement > 0) {
       val headerRow = Seq("User", "JobID", "GroupID", "Start Time", "Finish Time", "Close Time",
         "Execution Time", "Duration", "Statement", "State", "Detail")
+      val tooltips = Seq(None, None, None, None, Some(THRIFT_SERVER_FINISH_TIME),
+        Some(THRIFT_SERVER_CLOSE_TIME), Some(THRIFT_SERVER_EXECUTION),
+        Some(THRIFT_SERVER_DURATION), None, None, None)
+      assert(headerRow.length == tooltips.length)
       val dataRows = executionList.sortBy(_.startTimestamp).reverse
 
       def generateDataRow(info: ExecutionInfo): Seq[Node] = {
@@ -98,10 +103,14 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
           </td>
           <td>{info.groupId}</td>
           <td>{formatDate(info.startTimestamp)}</td>
-          <td>{formatDate(info.finishTimestamp)}</td>
-          <td>{formatDate(info.closeTimestamp)}</td>
-          <td>{formatDurationOption(Some(info.totalTime(info.finishTimestamp)))}</td>
-          <td>{formatDurationOption(Some(info.totalTime(info.closeTimestamp)))}</td>
+          <td>{if (info.finishTimestamp > 0) formatDate(info.finishTimestamp)}</td>
+          <td>{if (info.closeTimestamp > 0) formatDate(info.closeTimestamp)}</td>
+          <td sorttable_customkey={info.totalTime(info.finishTimestamp).toString}>
+            {formatDurationOption(Some(info.totalTime(info.finishTimestamp)))}
+          </td>
+          <td sorttable_customkey={info.totalTime(info.closeTimestamp).toString}>
+            {formatDurationOption(Some(info.totalTime(info.closeTimestamp)))}
+          </td>
           <td>{info.statement}</td>
           <td>{info.state}</td>
           {errorMessageCell(detail)}
@@ -109,7 +118,7 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
       }
 
       Some(UIUtils.listingTable(headerRow, generateDataRow,
-        dataRows, false, None, Seq(null), false))
+        dataRows, false, None, Seq(null), false, tooltipHeaders = tooltips))
     } else {
       None
     }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala
index db2066009b351..8efb2c3311cfe 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerTab.scala
@@ -39,7 +39,7 @@ private[thriftserver] class ThriftServerTab(sparkContext: SparkContext)
   attachPage(new ThriftServerSessionPage(this))
   parent.attachTab(this)
 
-  def detach() {
+  def detach(): Unit = {
     getSparkUI(sparkContext).detachTab(this)
   }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ToolTips.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ToolTips.scala
new file mode 100644
index 0000000000000..1990b8f2d3285
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ToolTips.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver.ui
+
+private[ui] object ToolTips {
+  val THRIFT_SERVER_FINISH_TIME =
+    "Execution finish time, before fetching the results"
+
+  val THRIFT_SERVER_CLOSE_TIME =
+    "Operation close time after fetching the results"
+
+  val THRIFT_SERVER_EXECUTION =
+    "Difference between start time and finish time"
+
+  val THRIFT_SERVER_DURATION =
+    "Difference between start time and close time"
+}
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 6e042ac41d9da..f3063675a79f7 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -27,12 +27,11 @@ import scala.concurrent.Promise
 import scala.concurrent.duration._
 
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.hive.test.HiveTestUtils
+import org.apache.spark.sql.hive.test.HiveTestJars
 import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -202,7 +201,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
   }
 
   test("Commands using SerDe provided in --jars") {
-    val jarFile = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
+    val jarFile = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
 
     val dataFilePath =
       Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
@@ -218,8 +217,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
         -> "",
       "INSERT INTO TABLE t1 SELECT key, val FROM sourceTable;"
         -> "",
-      "SELECT count(key) FROM t1;"
-        -> "5",
+      "SELECT collect_list(array(val)) FROM t1;"
+        -> """[["val_238"],["val_86"],["val_311"],["val_27"],["val_165"]]""",
       "DROP TABLE t1;"
         -> "",
       "DROP TABLE sourceTable;"
@@ -227,6 +226,32 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
     )
   }
 
+  test("SPARK-29022: Commands using SerDe provided in --hive.aux.jars.path") {
+    val dataFilePath =
+      Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
+    val hiveContribJar = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
+    runCliWithin(
+      3.minute,
+      Seq("--conf", s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))(
+      """CREATE TABLE addJarWithHiveAux(key string, val string)
+        |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe';
+      """.stripMargin
+        -> "",
+      "CREATE TABLE sourceTableForWithHiveAux (key INT, val STRING);"
+        -> "",
+      s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE sourceTableForWithHiveAux;"
+        -> "",
+      "INSERT INTO TABLE addJarWithHiveAux SELECT key, val FROM sourceTableForWithHiveAux;"
+        -> "",
+      "SELECT collect_list(array(val)) FROM addJarWithHiveAux;"
+        -> """[["val_238"],["val_86"],["val_311"],["val_27"],["val_165"]]""",
+      "DROP TABLE addJarWithHiveAux;"
+        -> "",
+      "DROP TABLE sourceTableForWithHiveAux;"
+        -> ""
+    )
+  }
+
   test("SPARK-11188 Analysis error reporting") {
     runCliWithin(timeout = 2.minute,
       errorResponses = Seq("AnalysisException"))(
@@ -297,12 +322,66 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
   }
 
   test("Support hive.aux.jars.path") {
-    val hiveContribJar = HiveTestUtils.getHiveContribJar.getCanonicalPath
+    val hiveContribJar = HiveTestJars.getHiveContribJar().getCanonicalPath
     runCliWithin(
       1.minute,
       Seq("--conf", s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))(
-      s"CREATE TEMPORARY FUNCTION example_max AS '${classOf[UDAFExampleMax].getName}';" -> "",
-      "SELECT example_max(1);" -> "1"
+      "CREATE TEMPORARY FUNCTION example_format AS " +
+        "'org.apache.hadoop.hive.contrib.udf.example.UDFExampleFormat';" -> "",
+      "SELECT example_format('%o', 93);" -> "135"
+    )
+  }
+
+  test("SPARK-28840 test --jars command") {
+    val jarFile = new File("../../sql/hive/src/test/resources/SPARK-21101-1.0.jar").getCanonicalPath
+    runCliWithin(
+      1.minute,
+      Seq("--jars", s"$jarFile"))(
+      "CREATE TEMPORARY FUNCTION testjar AS" +
+        " 'org.apache.spark.sql.hive.execution.UDTFStack';" -> "",
+      "SELECT testjar(1,'TEST-SPARK-TEST-jar', 28840);" -> "TEST-SPARK-TEST-jar\t28840"
+    )
+  }
+
+  test("SPARK-28840 test --jars and hive.aux.jars.path command") {
+    val jarFile = new File("../../sql/hive/src/test/resources/SPARK-21101-1.0.jar").getCanonicalPath
+    val hiveContribJar = HiveTestJars.getHiveContribJar().getCanonicalPath
+    runCliWithin(
+      1.minute,
+      Seq("--jars", s"$jarFile", "--conf",
+        s"spark.hadoop.${ConfVars.HIVEAUXJARS}=$hiveContribJar"))(
+      "CREATE TEMPORARY FUNCTION testjar AS" +
+        " 'org.apache.spark.sql.hive.execution.UDTFStack';" -> "",
+      "SELECT testjar(1,'TEST-SPARK-TEST-jar', 28840);" -> "TEST-SPARK-TEST-jar\t28840",
+      "CREATE TEMPORARY FUNCTION example_max AS " +
+        "'org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax';" -> "",
+      "SELECT concat_ws(',', 'First', example_max(1234321), 'Third');" -> "First,1234321,Third"
+    )
+  }
+
+  test("SPARK-29022 Commands using SerDe provided in ADD JAR sql") {
+    val dataFilePath =
+      Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt")
+    val hiveContribJar = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
+    runCliWithin(
+      3.minute)(
+      s"ADD JAR ${hiveContribJar};" -> "",
+      """CREATE TABLE addJarWithSQL(key string, val string)
+        |ROW FORMAT SERDE 'org.apache.hive.hcatalog.data.JsonSerDe';
+      """.stripMargin
+        -> "",
+      "CREATE TABLE sourceTableForWithSQL(key INT, val STRING);"
+        -> "",
+      s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE sourceTableForWithSQL;"
+        -> "",
+      "INSERT INTO TABLE addJarWithSQL SELECT key, val FROM sourceTableForWithSQL;"
+        -> "",
+      "SELECT collect_list(array(val)) FROM addJarWithSQL;"
+        -> """[["val_238"],["val_86"],["val_311"],["val_27"],["val_165"]]""",
+      "DROP TABLE addJarWithSQL;"
+        -> "",
+      "DROP TABLE sourceTableForWithSQL;"
+        -> ""
     )
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index b7185db2f2ae7..8a5526ea780ef 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -43,7 +43,7 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.hive.HiveUtils
-import org.apache.spark.sql.hive.test.HiveTestUtils
+import org.apache.spark.sql.hive.test.HiveTestJars
 import org.apache.spark.sql.internal.StaticSQLConf.HIVE_THRIFT_SERVER_SINGLESESSION
 import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
 import org.apache.spark.util.{ThreadUtils, Utils}
@@ -144,10 +144,17 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
       def executeTest(hiveList: String): Unit = {
         hiveList.split(";").foreach{ m =>
           val kv = m.split("=")
-          // select "${a}"; ---> avalue
-          val resultSet = statement.executeQuery("select \"${" + kv(0) + "}\"")
+          val k = kv(0)
+          val v = kv(1)
+          val modValue = s"${v}_MOD_VALUE"
+          // select '${a}'; ---> avalue
+          val resultSet = statement.executeQuery(s"select '$${$k}'")
           resultSet.next()
-          assert(resultSet.getString(1) === kv(1))
+          assert(resultSet.getString(1) === v)
+          statement.executeQuery(s"set $k=$modValue")
+          val modResultSet = statement.executeQuery(s"select '$${$k}'")
+          modResultSet.next()
+          assert(modResultSet.getString(1) === s"$modValue")
         }
       }
     }
@@ -485,7 +492,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
     withMultipleConnectionJdbcStatement("smallKV", "addJar")(
       {
         statement =>
-          val jarFile = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
+          val jarFile = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
 
           statement.executeQuery(s"ADD JAR $jarFile")
       },
@@ -662,6 +669,21 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
       assert(rs.getBigDecimal(1) === new java.math.BigDecimal("1.000000000000000000"))
     }
   }
+
+  test("Support interval type") {
+    withJdbcStatement() { statement =>
+      val rs = statement.executeQuery("SELECT interval 3 months 1 hours")
+      assert(rs.next())
+      assert(rs.getString(1) === "interval 3 months 1 hours")
+    }
+    // Invalid interval value
+    withJdbcStatement() { statement =>
+      val e = intercept[SQLException] {
+        statement.executeQuery("SELECT interval 3 months 1 hou")
+      }
+      assert(e.getMessage.contains("org.apache.spark.sql.catalyst.parser.ParseException"))
+    }
+  }
 }
 
 class SingleSessionSuite extends HiveThriftJdbcTest {
@@ -820,7 +842,7 @@ abstract class HiveThriftJdbcTest extends HiveThriftServer2Test {
     s"jdbc:hive2://localhost:$serverPort/?${hiveConfList}#${hiveVarList}"
   }
 
-  def withMultipleConnectionJdbcStatement(tableNames: String*)(fs: (Statement => Unit)*) {
+  def withMultipleConnectionJdbcStatement(tableNames: String*)(fs: (Statement => Unit)*): Unit = {
     val user = System.getProperty("user.name")
     val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
     val statements = connections.map(_.createStatement())
@@ -841,7 +863,7 @@ abstract class HiveThriftJdbcTest extends HiveThriftServer2Test {
     }
   }
 
-  def withDatabase(dbNames: String*)(fs: (Statement => Unit)*) {
+  def withDatabase(dbNames: String*)(fs: (Statement => Unit)*): Unit = {
     val user = System.getProperty("user.name")
     val connections = fs.map { _ => DriverManager.getConnection(jdbcUri, user, "") }
     val statements = connections.map(_.createStatement())
@@ -857,7 +879,7 @@ abstract class HiveThriftJdbcTest extends HiveThriftServer2Test {
     }
   }
 
-  def withJdbcStatement(tableNames: String*)(f: Statement => Unit) {
+  def withJdbcStatement(tableNames: String*)(f: Statement => Unit): Unit = {
     withMultipleConnectionJdbcStatement(tableNames: _*)(f)
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index 21870ffd463ec..f7ee3e0a46cd1 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -231,4 +231,20 @@ class SparkMetadataOperationSuite extends HiveThriftJdbcTest {
       assert(!rs.next())
     }
   }
+
+  test("GetTypeInfo Thrift API") {
+    def checkResult(rs: ResultSet, typeNames: Seq[String]): Unit = {
+      for (i <- typeNames.indices) {
+        assert(rs.next())
+        assert(rs.getString("TYPE_NAME") === typeNames(i))
+      }
+      // Make sure there are no more elements
+      assert(!rs.next())
+    }
+
+    withJdbcStatement() { statement =>
+      val metaData = statement.getConnection.getMetaData
+      checkResult(metaData.getTypeInfo, ThriftserverShimUtils.supportedType().map(_.getName))
+    }
+  }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
index f198372a4c998..10ec1ee168303 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
@@ -261,10 +261,10 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftJdbcTest {
       }
     }
 
-    // We do not fully support interval type
-    ignore(s"$version get interval type") {
+    test(s"$version get interval type") {
       testExecuteStatementWithProtocolVersion(version, "SELECT interval '1' year '2' day") { rs =>
         assert(rs.next())
+        assert(rs.getString(1) === "interval 1 years 2 days")
       }
     }
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 1f7b3feae47b5..613c1655727bb 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -18,19 +18,21 @@
 package org.apache.spark.sql.hive.thriftserver
 
 import java.io.File
-import java.sql.{DriverManager, SQLException, Statement, Timestamp}
-import java.util.Locale
+import java.sql.{DriverManager, Statement, Timestamp}
+import java.util.{Locale, MissingFormatArgumentException}
 
 import scala.util.{Random, Try}
 import scala.util.control.NonFatal
 
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.apache.hive.service.cli.HiveSQLException
-import org.scalatest.Ignore
 
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.{AnalysisException, SQLQueryTestSuite}
+import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.util.fileToString
 import org.apache.spark.sql.execution.HiveResult
+import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -43,12 +45,12 @@ import org.apache.spark.sql.types._
  *   2. Support DESC command.
  *   3. Support SHOW command.
  */
-@Ignore
 class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
 
   private var hiveServer2: HiveThriftServer2 = _
 
-  override def beforeEach(): Unit = {
+  override def beforeAll(): Unit = {
+    super.beforeAll()
     // Chooses a random port between 10000 and 19999
     var listeningPort = 10000 + Random.nextInt(10000)
 
@@ -65,36 +67,40 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
     logInfo("HiveThriftServer2 started successfully")
   }
 
-  override def afterEach(): Unit = {
-    hiveServer2.stop()
+  override def afterAll(): Unit = {
+    try {
+      hiveServer2.stop()
+    } finally {
+      super.afterAll()
+    }
   }
 
+  override def sparkConf: SparkConf = super.sparkConf
+    // Hive Thrift server should not executes SQL queries in an asynchronous way
+    // because we may set session configuration.
+    .set(HiveUtils.HIVE_THRIFT_SERVER_ASYNC, false)
+
   override val isTestWithConfigSets = false
 
   /** List of test cases to ignore, in lower cases. */
   override def blackList: Set[String] = Set(
     "blacklist.sql",   // Do NOT remove this one. It is here to test the blacklist functionality.
     // Missing UDF
-    "pgSQL/boolean.sql",
-    "pgSQL/case.sql",
+    "postgreSQL/boolean.sql",
+    "postgreSQL/case.sql",
     // SPARK-28624
     "date.sql",
-    // SPARK-28619
-    "pgSQL/aggregates_part1.sql",
-    "group-by.sql",
     // SPARK-28620
-    "pgSQL/float4.sql",
+    "postgreSQL/float4.sql",
     // SPARK-28636
     "decimalArithmeticOperations.sql",
     "literals.sql",
     "subquery/scalar-subquery/scalar-subquery-predicate.sql",
     "subquery/in-subquery/in-limit.sql",
+    "subquery/in-subquery/in-group-by.sql",
     "subquery/in-subquery/simple-in.sql",
     "subquery/in-subquery/in-order-by.sql",
-    "subquery/in-subquery/in-set-operations.sql",
-    // SPARK-28637
-    "cast.sql",
-    "ansi/interval.sql"
+    "subquery/in-subquery/in-set-operations.sql"
   )
 
   override def runQueries(
@@ -110,8 +116,8 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
         case _: PgSQLTest =>
           // PostgreSQL enabled cartesian product by default.
           statement.execute(s"SET ${SQLConf.CROSS_JOINS_ENABLED.key} = true")
-          statement.execute(s"SET ${SQLConf.ANSI_SQL_PARSER.key} = true")
-          statement.execute(s"SET ${SQLConf.PREFER_INTEGRAL_DIVISION.key} = true")
+          statement.execute(s"SET ${SQLConf.ANSI_ENABLED.key} = true")
+          statement.execute(s"SET ${SQLConf.DIALECT.key} = ${SQLConf.Dialect.POSTGRESQL.toString}")
         case _ =>
       }
 
@@ -166,19 +172,42 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
             || d.sql.toUpperCase(Locale.ROOT).startsWith("DESC\n")
             || d.sql.toUpperCase(Locale.ROOT).startsWith("DESCRIBE ")
             || d.sql.toUpperCase(Locale.ROOT).startsWith("DESCRIBE\n") =>
+
           // Skip show command, see HiveResult.hiveResultString
           case s if s.sql.toUpperCase(Locale.ROOT).startsWith("SHOW ")
             || s.sql.toUpperCase(Locale.ROOT).startsWith("SHOW\n") =>
-          // AnalysisException should exactly match.
-          // SQLException should not exactly match. We only assert the result contains Exception.
-          case _ if output.output.startsWith(classOf[SQLException].getName) =>
+
+          case _ if output.output.startsWith(classOf[NoSuchTableException].getPackage.getName) =>
+            assert(expected.output.startsWith(classOf[NoSuchTableException].getPackage.getName),
+              s"Exception did not match for query #$i\n${expected.sql}, " +
+                s"expected: ${expected.output}, but got: ${output.output}")
+
+          case _ if output.output.startsWith(classOf[SparkException].getName) &&
+            output.output.contains("overflow") =>
+            assert(expected.output.contains(classOf[ArithmeticException].getName) &&
+              expected.output.contains("overflow"),
+              s"Exception did not match for query #$i\n${expected.sql}, " +
+                s"expected: ${expected.output}, but got: ${output.output}")
+
+          case _ if output.output.startsWith(classOf[RuntimeException].getName) =>
             assert(expected.output.contains("Exception"),
               s"Exception did not match for query #$i\n${expected.sql}, " +
                 s"expected: ${expected.output}, but got: ${output.output}")
-          // HiveSQLException is usually a feature that our ThriftServer cannot support.
-          // Please add SQL to blackList.
-          case _ if output.output.startsWith(classOf[HiveSQLException].getName) =>
-            assert(false, s"${output.output} for query #$i\n${expected.sql}")
+
+          case _ if output.output.startsWith(classOf[ArithmeticException].getName) &&
+            output.output.contains("causes overflow") =>
+            assert(expected.output.contains(classOf[ArithmeticException].getName) &&
+              expected.output.contains("causes overflow"),
+              s"Exception did not match for query #$i\n${expected.sql}, " +
+                s"expected: ${expected.output}, but got: ${output.output}")
+
+          case _ if output.output.startsWith(classOf[MissingFormatArgumentException].getName) &&
+            output.output.contains("Format specifier") =>
+            assert(expected.output.contains(classOf[MissingFormatArgumentException].getName) &&
+              expected.output.contains("Format specifier"),
+              s"Exception did not match for query #$i\n${expected.sql}, " +
+                s"expected: ${expected.output}, but got: ${output.output}")
+
           case _ =>
             assertResult(expected.output, s"Result did not match for query #$i\n${expected.sql}") {
               output.output
@@ -209,7 +238,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
 
       if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udf")) {
         Seq.empty
-      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}pgSQL")) {
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
       } else {
         RegularTestCase(testCaseName, absPath, resultFile) :: Nil
@@ -248,8 +277,9 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
         val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage
         Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x")).sorted
       case NonFatal(e) =>
+        val rootCause = ExceptionUtils.getRootCause(e)
         // If there is an exception, put the exception class followed by the message.
-        Seq(e.getClass.getName, e.getMessage)
+        Seq(rootCause.getClass.getName, rootCause.getMessage)
     }
   }
 
@@ -260,7 +290,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
     hiveServer2 = HiveThriftServer2.startWithContext(sqlContext)
   }
 
-  private def withJdbcStatement(fs: (Statement => Unit)*) {
+  private def withJdbcStatement(fs: (Statement => Unit)*): Unit = {
     val user = System.getProperty("user.name")
 
     val serverPort = hiveServer2.getHiveConf.get(ConfVars.HIVE_SERVER2_THRIFT_PORT.varname)
@@ -337,7 +367,7 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite {
     upperCase.startsWith("SELECT ") || upperCase.startsWith("SELECT\n") ||
       upperCase.startsWith("WITH ") || upperCase.startsWith("WITH\n") ||
       upperCase.startsWith("VALUES ") || upperCase.startsWith("VALUES\n") ||
-      // pgSQL/union.sql
+      // postgreSQL/union.sql
       upperCase.startsWith("(")
   }
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
index 47cf4f104d204..7f731f3d05e51 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
@@ -24,8 +24,8 @@ import org.openqa.selenium.WebDriver
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
 import org.scalatest.{BeforeAndAfterAll, Matchers}
 import org.scalatest.concurrent.Eventually._
-import org.scalatest.selenium.WebBrowser
 import org.scalatest.time.SpanSugar._
+import org.scalatestplus.selenium.WebBrowser
 
 import org.apache.spark.ui.SparkUICssErrorHandler
 
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
index 0f72071d7e7d1..3e81f8afbd85f 100644
--- a/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
+++ b/sql/hive-thriftserver/v1.2.1/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
@@ -73,7 +73,7 @@ public class GetTypeInfoOperation extends MetadataOperation {
   .addPrimitiveColumn("NUM_PREC_RADIX", Type.INT_TYPE,
       "Usually 2 or 10");
 
-  private final RowSet rowSet;
+  protected final RowSet rowSet;
 
   protected GetTypeInfoOperation(HiveSession parentSession) {
     super(parentSession, OperationType.GET_TYPE_INFO);
diff --git a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
index 87c0f8f6a571a..fbfc698ecb4bf 100644
--- a/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
+++ b/sql/hive-thriftserver/v1.2.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.hive.thriftserver
 
 import org.apache.commons.logging.LogFactory
-import org.apache.hadoop.hive.ql.exec.Utilities
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema, Type}
+import org.apache.hive.service.cli.Type._
 import org.apache.hive.service.cli.thrift.TProtocolVersion._
 
 /**
@@ -51,10 +51,12 @@ private[thriftserver] object ThriftserverShimUtils {
 
   private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType
 
-  private[thriftserver] def addToClassPath(
-      loader: ClassLoader,
-      auxJars: Array[String]): ClassLoader = {
-    Utilities.addToClassPath(loader, auxJars)
+  private[thriftserver] def supportedType(): Seq[Type] = {
+    Seq(NULL_TYPE, BOOLEAN_TYPE, STRING_TYPE, BINARY_TYPE,
+      TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
+      FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
+      DATE_TYPE, TIMESTAMP_TYPE,
+      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE)
   }
 
   private[thriftserver] val testedProtocolVersions = Seq(
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
index 9612eb145638c..0f57a72e2a1ce 100644
--- a/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
+++ b/sql/hive-thriftserver/v2.3.5/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
@@ -73,7 +73,7 @@ public class GetTypeInfoOperation extends MetadataOperation {
   .addPrimitiveColumn("NUM_PREC_RADIX", Type.INT_TYPE,
       "Usually 2 or 10");
 
-  private final RowSet rowSet;
+  protected final RowSet rowSet;
 
   protected GetTypeInfoOperation(HiveSession parentSession) {
     super(parentSession, OperationType.GET_TYPE_INFO);
diff --git a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
index 124c9937c0fca..850382fe2bfd7 100644
--- a/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
+++ b/sql/hive-thriftserver/v2.3.5/src/main/scala/org/apache/spark/sql/hive/thriftserver/ThriftserverShimUtils.scala
@@ -17,13 +17,9 @@
 
 package org.apache.spark.sql.hive.thriftserver
 
-import java.security.AccessController
-
-import scala.collection.JavaConverters._
-
-import org.apache.hadoop.hive.ql.exec.AddToClassPathAction
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde2.thrift.Type
+import org.apache.hadoop.hive.serde2.thrift.Type._
 import org.apache.hive.service.cli.{RowSet, RowSetFactory, TableSchema}
 import org.apache.hive.service.rpc.thrift.TProtocolVersion._
 import org.slf4j.LoggerFactory
@@ -56,11 +52,12 @@ private[thriftserver] object ThriftserverShimUtils {
 
   private[thriftserver] def toJavaSQLType(s: String): Int = Type.getType(s).toJavaSQLType
 
-  private[thriftserver] def addToClassPath(
-      loader: ClassLoader,
-      auxJars: Array[String]): ClassLoader = {
-    val addAction = new AddToClassPathAction(loader, auxJars.toList.asJava)
-    AccessController.doPrivileged(addAction)
+  private[thriftserver] def supportedType(): Seq[Type] = {
+    Seq(NULL_TYPE, BOOLEAN_TYPE, STRING_TYPE, BINARY_TYPE,
+      TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE,
+      FLOAT_TYPE, DOUBLE_TYPE, DECIMAL_TYPE,
+      DATE_TYPE, TIMESTAMP_TYPE,
+      ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE)
   }
 
   private[thriftserver] val testedProtocolVersions = Seq(
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
index f3044da972497..0c394a340333a 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-hive udaf vs spark af:                   Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-hive udaf w/o group by                        6370 / 6400          0.0       97193.6       1.0X
-spark af w/o group by                           54 /   63          1.2         820.8     118.4X
-hive udaf w/ group by                         4492 / 4507          0.0       68539.5       1.4X
-spark af w/ group by w/o fallback               58 /   64          1.1         881.7     110.2X
-spark af w/ group by w/ fallback               136 /  142          0.5        2075.0      46.8X
+hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+hive udaf w/o group by                             6741           6759          22          0.0      102864.5       1.0X
+spark af w/o group by                                56             66           9          1.2         851.6     120.8X
+hive udaf w/ group by                              4610           4642          25          0.0       70350.3       1.5X
+spark af w/ group by w/o fallback                    60             67           8          1.1         916.7     112.2X
+spark af w/ group by w/ fallback                    135            144           9          0.5        2065.6      49.8X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-object agg v.s. sort agg:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sort agg w/ group by                        41500 / 41630          2.5         395.8       1.0X
-object agg w/ group by w/o fallback         10075 / 10122         10.4          96.1       4.1X
-object agg w/ group by w/ fallback          28131 / 28205          3.7         268.3       1.5X
-sort agg w/o group by                         6182 / 6221         17.0          59.0       6.7X
-object agg w/o group by w/o fallback          5435 / 5468         19.3          51.8       7.6X
+object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort agg w/ group by                              41568          41894         461          2.5         396.4       1.0X
+object agg w/ group by w/o fallback               10314          10494         149         10.2          98.4       4.0X
+object agg w/ group by w/ fallback                26720          26951         326          3.9         254.8       1.6X
+sort agg w/o group by                              6638           6681          38         15.8          63.3       6.3X
+object agg w/o group by w/o fallback               5665           5706          30         18.5          54.0       7.3X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-object agg v.s. sort agg:                Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-sort agg w/ group by                           970 / 1025          2.2         462.5       1.0X
-object agg w/ group by w/o fallback            772 /  798          2.7         368.1       1.3X
-object agg w/ group by w/ fallback            1013 / 1044          2.1         483.1       1.0X
-sort agg w/o group by                          751 /  781          2.8         358.0       1.3X
-object agg w/o group by w/o fallback           772 /  814          2.7         368.0       1.3X
+object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+sort agg w/ group by                                794            862          33          2.6         378.8       1.0X
+object agg w/ group by w/o fallback                 605            622          10          3.5         288.5       1.3X
+object agg w/ group by w/ fallback                  840            860          15          2.5         400.5       0.9X
+sort agg w/o group by                               555            570          12          3.8         264.6       1.4X
+object agg w/o group by w/o fallback                544            562          12          3.9         259.6       1.5X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
index caa78b9a8f102..c47cf27bf617a 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
@@ -2,155 +2,155 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single TINYINT Column Scan:          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1725 / 1759          9.1         109.7       1.0X
-Native ORC Vectorized                          272 /  316         57.8          17.3       6.3X
-Hive built-in ORC                             1970 / 1987          8.0         125.3       0.9X
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      1843           1958         162          8.5         117.2       1.0X
+Native ORC Vectorized                               321            355          31         48.9          20.4       5.7X
+Hive built-in ORC                                  2143           2175          44          7.3         136.3       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single SMALLINT Column Scan:         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1633 / 1672          9.6         103.8       1.0X
-Native ORC Vectorized                          238 /  255         66.0          15.1       6.9X
-Hive built-in ORC                             2293 / 2305          6.9         145.8       0.7X
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      1987           2020          47          7.9         126.3       1.0X
+Native ORC Vectorized                               276            299          25         57.0          17.6       7.2X
+Hive built-in ORC                                  2350           2357          10          6.7         149.4       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single INT Column Scan:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1677 / 1699          9.4         106.6       1.0X
-Native ORC Vectorized                          325 /  342         48.3          20.7       5.2X
-Hive built-in ORC                             2561 / 2569          6.1         162.8       0.7X
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2092           2115          32          7.5         133.0       1.0X
+Native ORC Vectorized                               360            373          18         43.6          22.9       5.8X
+Hive built-in ORC                                  2550           2557           9          6.2         162.2       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single BIGINT Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1791 / 1795          8.8         113.9       1.0X
-Native ORC Vectorized                          400 /  408         39.3          25.4       4.5X
-Hive built-in ORC                             2713 / 2720          5.8         172.5       0.7X
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2173           2188          21          7.2         138.2       1.0X
+Native ORC Vectorized                               435            448          14         36.2          27.7       5.0X
+Hive built-in ORC                                  2683           2690          10          5.9         170.6       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single FLOAT Column Scan:            Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1791 / 1805          8.8         113.8       1.0X
-Native ORC Vectorized                          433 /  438         36.3          27.5       4.1X
-Hive built-in ORC                             2690 / 2803          5.8         171.0       0.7X
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2233           2323         127          7.0         142.0       1.0X
+Native ORC Vectorized                               475            483          13         33.1          30.2       4.7X
+Hive built-in ORC                                  2605           2610           6          6.0         165.7       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-SQL Single DOUBLE Column Scan:           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1911 / 1930          8.2         121.5       1.0X
-Native ORC Vectorized                          543 /  552         29.0          34.5       3.5X
-Hive built-in ORC                             2967 / 3065          5.3         188.6       0.6X
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2367           2384          24          6.6         150.5       1.0X
+Native ORC Vectorized                               600            641          69         26.2          38.1       3.9X
+Hive built-in ORC                                  2860           2877          24          5.5         181.9       0.8X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Int and String Scan:                     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 4160 / 4188          2.5         396.7       1.0X
-Native ORC Vectorized                         2405 / 2406          4.4         229.4       1.7X
-Hive built-in ORC                             5514 / 5562          1.9         525.9       0.8X
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      4253           4330         108          2.5         405.6       1.0X
+Native ORC Vectorized                              2295           2301           8          4.6         218.9       1.9X
+Hive built-in ORC                                  5364           5465         144          2.0         511.5       0.8X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Partitioned Table:                       Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Data column - Native ORC MR                   1863 / 1867          8.4         118.4       1.0X
-Data column - Native ORC Vectorized            411 /  418         38.2          26.2       4.5X
-Data column - Hive built-in ORC               3297 / 3308          4.8         209.6       0.6X
-Partition column - Native ORC MR              1505 / 1506         10.4          95.7       1.2X
-Partition column - Native ORC Vectorized        80 /   93        195.6           5.1      23.2X
-Partition column - Hive built-in ORC          1960 / 1979          8.0         124.6       1.0X
-Both columns - Native ORC MR                  2076 / 2090          7.6         132.0       0.9X
-Both columns - Native ORC Vectorized           450 /  463         34.9          28.6       4.1X
-Both columns - Hive built-in ORC              3528 / 3548          4.5         224.3       0.5X
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column - Native ORC MR                        2443           2448           6          6.4         155.3       1.0X
+Data column - Native ORC Vectorized                 446            473          44         35.3          28.3       5.5X
+Data column - Hive built-in ORC                    2868           2877          12          5.5         182.4       0.9X
+Partition column - Native ORC MR                   1623           1656          47          9.7         103.2       1.5X
+Partition column - Native ORC Vectorized            112            121          14        140.8           7.1      21.9X
+Partition column - Hive built-in ORC               1846           1850           5          8.5         117.4       1.3X
+Both columns - Native ORC MR                       2610           2635          36          6.0         165.9       0.9X
+Both columns - Native ORC Vectorized                492            508          19         32.0          31.3       5.0X
+Both columns - Hive built-in ORC                   2969           2973           4          5.3         188.8       0.8X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Repeated String:                         Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1727 / 1733          6.1         164.7       1.0X
-Native ORC Vectorized                          375 /  379         28.0          35.7       4.6X
-Hive built-in ORC                             2665 / 2666          3.9         254.2       0.6X
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      2056           2064          11          5.1         196.1       1.0X
+Native ORC Vectorized                               415            421           7         25.3          39.6       5.0X
+Hive built-in ORC                                  2710           2722          17          3.9         258.4       0.8X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (0.0%):           Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 3324 / 3325          3.2         317.0       1.0X
-Native ORC Vectorized                         1085 / 1106          9.7         103.4       3.1X
-Hive built-in ORC                             5272 / 5299          2.0         502.8       0.6X
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      3655           3674          27          2.9         348.6       1.0X
+Native ORC Vectorized                              1166           1167           1          9.0         111.2       3.1X
+Hive built-in ORC                                  5268           5305          52          2.0         502.4       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (50.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 3045 / 3046          3.4         290.4       1.0X
-Native ORC Vectorized                         1248 / 1260          8.4         119.0       2.4X
-Hive built-in ORC                             3989 / 3999          2.6         380.4       0.8X
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      3447           3467          27          3.0         328.8       1.0X
+Native ORC Vectorized                              1222           1223           1          8.6         116.6       2.8X
+Hive built-in ORC                                  3947           3959          18          2.7         376.4       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-String with Nulls Scan (95.0%):          Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1692 / 1694          6.2         161.3       1.0X
-Native ORC Vectorized                          471 /  493         22.3          44.9       3.6X
-Hive built-in ORC                             2398 / 2411          4.4         228.7       0.7X
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                      1912           1917           6          5.5         182.4       1.0X
+Native ORC Vectorized                               477            484           5         22.0          45.5       4.0X
+Hive built-in ORC                                  2374           2386          17          4.4         226.4       0.8X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 100 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 1371 / 1379          0.8        1307.5       1.0X
-Native ORC Vectorized                          121 /  135          8.6         115.8      11.3X
-Hive built-in ORC                              521 /  561          2.0         497.1       2.6X
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                       290            350         102          3.6         276.1       1.0X
+Native ORC Vectorized                               155            166          15          6.7         148.2       1.9X
+Hive built-in ORC                                   520            531           8          2.0         495.8       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 200 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 2711 / 2767          0.4        2585.5       1.0X
-Native ORC Vectorized                          210 /  232          5.0         200.5      12.9X
-Hive built-in ORC                              764 /  775          1.4         728.3       3.5X
+Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                       365            406          73          2.9         347.9       1.0X
+Native ORC Vectorized                               232            246          20          4.5         221.6       1.6X
+Hive built-in ORC                                   794            864          62          1.3         757.6       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64
 Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
-Single Column Scan from 300 columns:     Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------
-Native ORC MR                                 3979 / 3988          0.3        3794.4       1.0X
-Native ORC Vectorized                          357 /  366          2.9         340.2      11.2X
-Hive built-in ORC                             1091 / 1095          1.0        1040.5       3.6X
+Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Native ORC MR                                       501            544          40          2.1         477.6       1.0X
+Native ORC Vectorized                               365            386          33          2.9         348.0       1.4X
+Hive built-in ORC                                  1153           1153           0          0.9        1099.8       0.4X
 
 
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index e7ff3a5f4be2b..7a9f5c67fc693 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -46,7 +46,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f)
   }
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     TestHive.setCacheTables(true)
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
@@ -65,7 +65,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     RuleExecutor.resetMetrics()
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       TestHive.setCacheTables(false)
       TimeZone.setDefault(originalTimeZone)
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala
index c7d953a731b9b..b0cf25c3a7813 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveWindowFunctionQuerySuite.scala
@@ -37,7 +37,7 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
   private val originalLocale = Locale.getDefault
   private val testTempDir = Utils.createTempDir()
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     TestHive.setCacheTables(true)
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
@@ -100,7 +100,7 @@ class HiveWindowFunctionQuerySuite extends HiveComparisonTest with BeforeAndAfte
     sql("set mapreduce.jobtracker.address=local")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       TestHive.setCacheTables(false)
       TimeZone.setDefault(originalTimeZone)
@@ -751,7 +751,7 @@ class HiveWindowFunctionQueryFileSuite
   private val originalLocale = Locale.getDefault
   private val testTempDir = Utils.createTempDir()
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     TestHive.setCacheTables(true)
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
@@ -769,7 +769,7 @@ class HiveWindowFunctionQueryFileSuite
     // sql("set mapreduce.jobtracker.address=local")
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       TestHive.setCacheTables(false)
       TimeZone.setDefault(originalTimeZone)
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index d37f0c8573659..f627227aa0380 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -103,14 +103,6 @@
       <groupId>${hive.group}</groupId>
       <artifactId>hive-metastore</artifactId>
     </dependency>
-    <dependency>
-      <groupId>${hive.group}</groupId>
-      <artifactId>hive-contrib</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>${hive.group}.hcatalog</groupId>
-      <artifactId>hive-hcatalog-core</artifactId>
-    </dependency>
     <!--
         <dependency>
           <groupId>${hive.group}</groupId>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
deleted file mode 100644
index 02a5117f005e8..0000000000000
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import org.apache.spark.SparkContext
-import org.apache.spark.api.java.JavaSparkContext
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{SparkSession, SQLContext}
-
-
-/**
- * An instance of the Spark SQL execution engine that integrates with data stored in Hive.
- * Configuration for Hive is read from hive-site.xml on the classpath.
- */
-@deprecated("Use SparkSession.builder.enableHiveSupport instead", "2.0.0")
-class HiveContext private[hive](_sparkSession: SparkSession)
-  extends SQLContext(_sparkSession) with Logging {
-
-  self =>
-
-  def this(sc: SparkContext) = {
-    this(SparkSession.builder().sparkContext(HiveUtils.withHiveExternalCatalog(sc)).getOrCreate())
-  }
-
-  def this(sc: JavaSparkContext) = this(sc.sc)
-
-  /**
-   * Returns a new HiveContext as new session, which will have separated SQLConf, UDF/UDAF,
-   * temporary tables and SessionState, but sharing the same CacheManager, IsolatedClientLoader
-   * and Hive client (both of execution and metadata) with existing HiveContext.
-   */
-  override def newSession(): HiveContext = {
-    new HiveContext(sparkSession.newSession())
-  }
-
-  /**
-   * Invalidate and refresh all the cached the metadata of the given table. For performance reasons,
-   * Spark SQL or the external data source library it uses might cache certain metadata about a
-   * table, such as the location of blocks. When those change outside of Spark SQL, users should
-   * call this function to invalidate the cache.
-   *
-   * @since 1.3.0
-   */
-  def refreshTable(tableName: String): Unit = {
-    sparkSession.catalog.refreshTable(tableName)
-  }
-
-}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 188aedc3640b8..3df77fec20993 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive
 
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.Analyzer
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, ResolveSessionCatalog}
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -67,13 +67,13 @@ class HiveSessionStateBuilder(session: SparkSession, parentState: Option[Session
   /**
    * A logical query plan `Analyzer` with rules specific to Hive.
    */
-  override protected def analyzer: Analyzer = new Analyzer(catalog, conf) {
+  override protected def analyzer: Analyzer = new Analyzer(catalogManager, conf) {
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new ResolveHiveSerdeTable(session) +:
         new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
         new FallBackFileSourceV2(session) +:
-        DataSourceResolution(conf, this.catalogManager) +:
+        new ResolveSessionCatalog(catalogManager, conf, catalog.isView) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
index be4a0c175b6dc..3beef6b1df457 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
@@ -51,7 +51,7 @@ private[hive] object HiveShim {
   /*
    * This function in hive-0.13 become private, but we have to do this to work around hive bug
    */
-  private def appendReadColumnNames(conf: Configuration, cols: Seq[String]) {
+  private def appendReadColumnNames(conf: Configuration, cols: Seq[String]): Unit = {
     val old: String = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR, "")
     val result: StringBuilder = new StringBuilder(old)
     var first: Boolean = old.isEmpty
@@ -70,7 +70,7 @@ private[hive] object HiveShim {
   /*
    * Cannot use ColumnProjectionUtils.appendReadColumns directly, if ids is null
    */
-  def appendReadColumns(conf: Configuration, ids: Seq[Integer], names: Seq[String]) {
+  def appendReadColumns(conf: Configuration, ids: Seq[Integer], names: Seq[String]): Unit = {
     if (ids != null) {
       ColumnProjectionUtils.appendReadColumns(conf, ids.asJava)
     }
@@ -201,7 +201,7 @@ private[hive] object HiveShim {
       }
     }
 
-    def writeExternal(out: java.io.ObjectOutput) {
+    def writeExternal(out: java.io.ObjectOutput): Unit = {
       // output the function name
       out.writeUTF(functionClassName)
 
@@ -220,7 +220,7 @@ private[hive] object HiveShim {
       }
     }
 
-    def readExternal(in: java.io.ObjectInput) {
+    def readExternal(in: java.io.ObjectInput): Unit = {
       // read the function name
       functionClassName = in.readUTF()
 
@@ -279,25 +279,25 @@ private[hive] object HiveShim {
     var compressType: String = _
     var destTableId: Int = _
 
-    def setCompressed(compressed: Boolean) {
+    def setCompressed(compressed: Boolean): Unit = {
       this.compressed = compressed
     }
 
     def getDirName(): String = dir
 
-    def setDestTableId(destTableId: Int) {
+    def setDestTableId(destTableId: Int): Unit = {
       this.destTableId = destTableId
     }
 
-    def setTableInfo(tableInfo: TableDesc) {
+    def setTableInfo(tableInfo: TableDesc): Unit = {
       this.tableInfo = tableInfo
     }
 
-    def setCompressCodec(intermediateCompressorCodec: String) {
+    def setCompressCodec(intermediateCompressorCodec: String): Unit = {
       compressCodec = intermediateCompressorCodec
     }
 
-    def setCompressType(intermediateCompressType: String) {
+    def setCompressType(intermediateCompressType: String): Unit = {
       compressType = intermediateCompressType
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 18feb98519fbe..56df50827f00d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -26,8 +26,8 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoTable, LogicalPlan,
-  ScriptTransformation, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, LogicalPlan, ScriptTransformation, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.sql.InsertIntoStatement
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils}
@@ -143,9 +143,9 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
       if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
       hiveTableWithStats(relation)
 
-    // handles InsertIntoTable specially as the table in InsertIntoTable is not added in its
+    // handles InsertIntoStatement specially as the table in InsertIntoStatement is not added in its
     // children, hence not matched directly by previous HiveTableRelation case.
-    case i @ InsertIntoTable(relation: HiveTableRelation, _, _, _, _)
+    case i @ InsertIntoStatement(relation: HiveTableRelation, _, _, _, _)
       if DDLUtils.isHiveTable(relation.tableMeta) && relation.tableMeta.stats.isEmpty =>
       i.copy(table = hiveTableWithStats(relation))
   }
@@ -159,7 +159,7 @@ class DetermineTableStats(session: SparkSession) extends Rule[LogicalPlan] {
  */
 object HiveAnalysis extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case InsertIntoTable(r: HiveTableRelation, partSpec, query, overwrite, ifPartitionNotExists)
+    case InsertIntoStatement(r: HiveTableRelation, partSpec, query, overwrite, ifPartitionNotExists)
         if DDLUtils.isHiveTable(r.tableMeta) =>
       InsertIntoHiveTable(r.tableMeta, partSpec, query, overwrite,
         ifPartitionNotExists, query.output.map(_.name))
@@ -207,11 +207,12 @@ case class RelationConversions(
   override def apply(plan: LogicalPlan): LogicalPlan = {
     plan resolveOperators {
       // Write path
-      case InsertIntoTable(r: HiveTableRelation, partition, query, overwrite, ifPartitionNotExists)
+      case InsertIntoStatement(
+           r: HiveTableRelation, partition, query, overwrite, ifPartitionNotExists)
           if query.resolved && DDLUtils.isHiveTable(r.tableMeta) &&
             (!r.isPartitioned || SQLConf.get.getConf(HiveUtils.CONVERT_INSERTING_PARTITIONED_TABLE))
             && isConvertible(r) =>
-        InsertIntoTable(metastoreCatalog.convert(r), partition,
+        InsertIntoStatement(metastoreCatalog.convert(r), partition,
           query, overwrite, ifPartitionNotExists)
 
       // Read path
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index d5f3697ce3bf7..9c4b8a5819a33 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -476,6 +476,7 @@ private[spark] object HiveUtils extends Logging {
     // Configuration. But it happens before SparkContext initialized, we need to take them from
     // system properties in the form of regular hadoop configurations.
     SparkHadoopUtil.get.appendSparkHadoopConfigs(sys.props.toMap, propMap)
+    SparkHadoopUtil.get.appendSparkHiveConfigs(sys.props.toMap, propMap)
 
     propMap.toMap
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 3f9925e73705e..b1182b271912f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -83,7 +83,7 @@ class HadoopTableReader(
       sparkSession.sparkContext.defaultMinPartitions)
   }
 
-  SparkHadoopUtil.get.appendS3AndSparkHadoopConfigurations(
+  SparkHadoopUtil.get.appendS3AndSparkHadoopHiveConfigurations(
     sparkSession.sparkContext.conf, hadoopConf)
 
   private val _broadcastedHadoopConf =
@@ -170,7 +170,7 @@ class HadoopTableReader(
         val pathPatternSet = collection.mutable.Set[String]()
         partitionToDeserializer.filter {
           case (partition, partDeserializer) =>
-            def updateExistPathSetByPathPattern(pathPatternStr: String) {
+            def updateExistPathSetByPathPattern(pathPatternStr: String): Unit = {
               val pathPattern = new Path(pathPatternStr)
               val fs = pathPattern.getFileSystem(hadoopConf)
               val matches = fs.globStatus(pathPattern)
@@ -352,7 +352,7 @@ private[hive] object HiveTableUtil {
   // that calls Hive.get() which tries to access metastore, but it's not valid in runtime
   // it would be fixed in next version of hive but till then, we should use this instead
   def configureJobPropertiesForStorageHandler(
-      tableDesc: TableDesc, conf: Configuration, input: Boolean) {
+      tableDesc: TableDesc, conf: Configuration, input: Boolean): Unit = {
     val property = tableDesc.getProperties.getProperty(META_TABLE_STORAGE)
     val storageHandler =
       org.apache.hadoop.hive.ql.metadata.HiveUtils.getStorageHandler(conf, property)
@@ -375,7 +375,7 @@ private[hive] object HadoopTableReader extends HiveInspectors with Logging {
    * Curried. After given an argument for 'path', the resulting JobConf => Unit closure is used to
    * instantiate a HadoopRDD.
    */
-  def initializeLocalJobConfFunc(path: String, tableDesc: TableDesc)(jobConf: JobConf) {
+  def initializeLocalJobConfFunc(path: String, tableDesc: TableDesc)(jobConf: JobConf): Unit = {
     FileInputFormat.setInputPaths(jobConf, Seq[Path](new Path(path)): _*)
     if (tableDesc != null) {
       HiveTableUtil.configureJobPropertiesForStorageHandler(tableDesc, jobConf, true)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index cb015d7301c19..e31dffa4795c5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -111,8 +111,8 @@ private[hive] trait HiveClient {
    * TODO(cloud-fan): it's a little hacky to introduce the schema table properties here in
    * `HiveClient`, but we don't have a cleaner solution now.
    */
-  def alterTableDataSchema(
-    dbName: String, tableName: String, newDataSchema: StructType, schemaProps: Map[String, String])
+  def alterTableDataSchema(dbName: String, tableName: String, newDataSchema: StructType,
+    schemaProps: Map[String, String]): Unit
 
   /** Creates a new database with the given name. */
   def createDatabase(database: CatalogDatabase, ignoreIfExists: Boolean): Unit
@@ -292,4 +292,6 @@ private[hive] trait HiveClient {
   /** Used for testing only.  Removes all metadata from this instance of Hive. */
   def reset(): Unit
 
+  /** Returns the user name which is used as owner for Hive table. */
+  def userName: String
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 5b2eeb2cf34c0..296f9ff5dd0bb 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
+import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
@@ -58,6 +59,7 @@ import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.hive.HiveExternalCatalog.{DATASOURCE_SCHEMA, DATASOURCE_SCHEMA_NUMPARTS, DATASOURCE_SCHEMA_PART_PREFIX}
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.client.HiveClientImpl._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{CircularBuffer, Utils}
 
@@ -161,7 +163,7 @@ private[hive] class HiveClientImpl(
     // HiveConf is a Hadoop Configuration, which has a field of classLoader and
     // the initial value will be the current thread's context class loader
     // (i.e. initClassLoader at here).
-    // We call initialConf.setClassLoader(initClassLoader) at here to make
+    // We call hiveConf.setClassLoader(initClassLoader) at here to make
     // this action explicit.
     hiveConf.setClassLoader(initClassLoader)
 
@@ -175,14 +177,15 @@ private[hive] class HiveClientImpl(
     // has hive-site.xml. So, HiveConf will use that to override its default values.
     // 2: we set all spark confs to this hiveConf.
     // 3: we set all entries in config to this hiveConf.
-    (hadoopConf.iterator().asScala.map(kv => kv.getKey -> kv.getValue)
-      ++ sparkConf.getAll.toMap ++ extraConfig).foreach { case (k, v) =>
+    val confMap = (hadoopConf.iterator().asScala.map(kv => kv.getKey -> kv.getValue) ++
+      sparkConf.getAll.toMap ++ extraConfig).toMap
+    confMap.foreach { case (k, v) => hiveConf.set(k, v) }
+    SQLConf.get.redactOptions(confMap).foreach { case (k, v) =>
       logDebug(
         s"""
            |Applying Hadoop/Hive/Spark and extra properties to Hive Conf:
-           |$k=${if (k.toLowerCase(Locale.ROOT).contains("password")) "xxx" else v}
+           |$k=$v
          """.stripMargin)
-      hiveConf.set(k, v)
     }
     // Disable CBO because we removed the Calcite dependency.
     hiveConf.setBoolean("hive.cbo.enable", false)
@@ -190,6 +193,13 @@ private[hive] class HiveClientImpl(
     if (clientLoader.cachedHive != null) {
       Hive.set(clientLoader.cachedHive.asInstanceOf[Hive])
     }
+    // Hive 2.3 will set UDFClassLoader to hiveConf when initializing SessionState
+    // since HIVE-11878, and ADDJarCommand will add jars to clientLoader.classLoader.
+    // For this reason we cannot load the jars added by ADDJarCommand because of class loader
+    // got changed. We reset it to clientLoader.ClassLoader here.
+    if (HiveUtils.isHive23) {
+      state.getConf.setClassLoader(clientLoader.classLoader)
+    }
     SessionState.start(state)
     state.out = new PrintStream(outputBuffer, true, UTF_8.name())
     state.err = new PrintStream(outputBuffer, true, UTF_8.name())
@@ -221,7 +231,7 @@ private[hive] class HiveClientImpl(
     hiveConf
   }
 
-  private val userName = conf.getUser
+  override val userName = UserGroupInformation.getCurrentUser.getShortUserName
 
   override def getConf(key: String, defaultValue: String): String = {
     conf.get(key, defaultValue)
@@ -362,6 +372,13 @@ private[hive] class HiveClientImpl(
   }
 
   override def alterDatabase(database: CatalogDatabase): Unit = withHiveState {
+    if (!getDatabase(database.name).locationUri.equals(database.locationUri)) {
+      // SPARK-29260: Enable supported versions once it support altering database location.
+      if (!(version.equals(hive.v3_0) || version.equals(hive.v3_1))) {
+        throw new AnalysisException(
+          s"Hive ${version.fullVersion} does not support altering database location")
+      }
+    }
     client.alterDatabase(
       database.name,
       new HiveDatabase(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 6f60bb7c9c74d..be50cb0ded5b5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -36,6 +36,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.internal.NonClosableMutableURLClassLoader
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
 /** Factory for `IsolatedClientLoader` with specific versions of hive. */
@@ -60,9 +61,10 @@ private[hive] object IsolatedClientLoader extends Logging {
     val files = if (resolvedVersions.contains((resolvedVersion, hadoopVersion))) {
       resolvedVersions((resolvedVersion, hadoopVersion))
     } else {
+      val remoteRepos = sparkConf.get(SQLConf.ADDITIONAL_REMOTE_REPOSITORIES)
       val (downloadedFiles, actualHadoopVersion) =
         try {
-          (downloadVersion(resolvedVersion, hadoopVersion, ivyPath), hadoopVersion)
+          (downloadVersion(resolvedVersion, hadoopVersion, ivyPath, remoteRepos), hadoopVersion)
         } catch {
           case e: RuntimeException if e.getMessage.contains("hadoop") =>
             // If the error message contains hadoop, it is probably because the hadoop
@@ -74,7 +76,8 @@ private[hive] object IsolatedClientLoader extends Logging {
               "It is recommended to set jars used by Hive metastore client through " +
               "spark.sql.hive.metastore.jars in the production environment.")
             _sharesHadoopClasses = false
-            (downloadVersion(resolvedVersion, fallbackVersion, ivyPath), fallbackVersion)
+            (downloadVersion(
+              resolvedVersion, fallbackVersion, ivyPath, remoteRepos), fallbackVersion)
         }
       resolvedVersions.put((resolvedVersion, actualHadoopVersion), downloadedFiles)
       resolvedVersions((resolvedVersion, actualHadoopVersion))
@@ -112,7 +115,8 @@ private[hive] object IsolatedClientLoader extends Logging {
   private def downloadVersion(
       version: HiveVersion,
       hadoopVersion: String,
-      ivyPath: Option[String]): Seq[URL] = {
+      ivyPath: Option[String],
+      remoteRepos: String): Seq[URL] = {
     val hiveArtifacts = version.extraDeps ++
       Seq("hive-metastore", "hive-exec", "hive-common", "hive-serde")
         .map(a => s"org.apache.hive:$a:${version.fullVersion}") ++
@@ -123,7 +127,7 @@ private[hive] object IsolatedClientLoader extends Logging {
       SparkSubmitUtils.resolveMavenCoordinates(
         hiveArtifacts.mkString(","),
         SparkSubmitUtils.buildIvySettings(
-          Some("http://www.datanucleus.org/downloads/maven2"),
+          Some(remoteRepos),
           ivyPath),
         exclusions = version.exclusions)
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index d78fc9da9f8a5..05d608a2016a5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -247,7 +247,7 @@ private[hive] case class HiveGenericUDTF(
   protected class UDTFCollector extends Collector {
     var collected = new ArrayBuffer[InternalRow]
 
-    override def collect(input: java.lang.Object) {
+    override def collect(input: java.lang.Object): Unit = {
       // We need to clone the input here because implementations of
       // GenericUDTF reuse the same object. Luckily they are always an array, so
       // it is easy to clone.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala
index db074361ef03c..14276c9b583f2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/package.scala
@@ -23,8 +23,5 @@ package org.apache.spark.sql
  *  - Using HiveQL to express queries.
  *  - Reading metadata from the Hive Metastore using HiveSerDes.
  *  - Hive UDFs, UDAs, UDTs
- *
- * Users that would like access to this functionality should create a
- * [[hive.HiveContext HiveContext]] instead of a [[SQLContext]].
  */
 package object hive
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
index 25bd4d0017bd8..5304052b45a41 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/JavaMetastoreDataSourcesSuite.java
@@ -38,9 +38,6 @@
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.SQLContext;
 import org.apache.spark.sql.hive.test.TestHive$;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.StructField;
-import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.SaveMode;
 import org.apache.spark.sql.catalyst.TableIdentifier;
 import org.apache.spark.util.Utils;
@@ -94,57 +91,6 @@ public void tearDown() throws IOException {
     }
   }
 
-  @Test
-  public void saveExternalTableAndQueryIt() {
-    Map<String, String> options = new HashMap<>();
-    options.put("path", path.toString());
-    df.write()
-      .format("org.apache.spark.sql.json")
-      .mode(SaveMode.Append)
-      .options(options)
-      .saveAsTable("javaSavedTable");
-
-    checkAnswer(
-      sqlContext.sql("SELECT * FROM javaSavedTable"),
-      df.collectAsList());
-
-    Dataset<Row> loadedDF =
-      sqlContext.createExternalTable("externalTable", "org.apache.spark.sql.json", options);
-
-    checkAnswer(loadedDF, df.collectAsList());
-    checkAnswer(
-      sqlContext.sql("SELECT * FROM externalTable"),
-      df.collectAsList());
-  }
-
-  @Test
-  public void saveExternalTableWithSchemaAndQueryIt() {
-    Map<String, String> options = new HashMap<>();
-    options.put("path", path.toString());
-    df.write()
-      .format("org.apache.spark.sql.json")
-      .mode(SaveMode.Append)
-      .options(options)
-      .saveAsTable("javaSavedTable");
-
-    checkAnswer(
-      sqlContext.sql("SELECT * FROM javaSavedTable"),
-      df.collectAsList());
-
-    List<StructField> fields = new ArrayList<>();
-    fields.add(DataTypes.createStructField("b", DataTypes.StringType, true));
-    StructType schema = DataTypes.createStructType(fields);
-    Dataset<Row> loadedDF =
-      sqlContext.createExternalTable("externalTable", "org.apache.spark.sql.json", schema, options);
-
-    checkAnswer(
-      loadedDF,
-      sqlContext.sql("SELECT b FROM javaSavedTable").collectAsList());
-    checkAnswer(
-      sqlContext.sql("SELECT * FROM externalTable"),
-      sqlContext.sql("SELECT b FROM javaSavedTable").collectAsList());
-  }
-
   @Test
   public void saveTableAndQueryIt() {
     Map<String, String> options = new HashMap<>();
diff --git a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
index a75758a0728d5..f92cf24dede8b 100644
--- a/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
+++ b/sql/hive/src/test/resources/ql/src/test/queries/clientpositive/load_fs2.q
@@ -1,4 +1,4 @@
--- HIVE-3300 [jira] LOAD DATA INPATH fails if a hdfs file with same name is added to table
+-- HIVE-3300 [jira] LOAD DATA INPATH fails if an hdfs file with same name is added to table
 -- 'loader' table is used only for uploading kv1.txt to HDFS (!hdfs -put is not working on minMRDriver)
 
 create table result (key string, value string);
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
index 3226e3a5f318a..c475c7b21ab95 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
@@ -21,11 +21,10 @@ import scala.concurrent.duration._
 
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox
 
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{Column, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
-import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.hive.execution.TestingTypedCount
 import org.apache.spark.sql.hive.test.TestHive
 import org.apache.spark.sql.internal.SQLConf
@@ -44,9 +43,10 @@ import org.apache.spark.sql.types.LongType
  *      Results will be written to "benchmarks/ObjectHashAggregateExecBenchmark-results.txt".
  * }}}
  */
-object ObjectHashAggregateExecBenchmark extends BenchmarkBase with SQLHelper {
+object ObjectHashAggregateExecBenchmark extends SqlBasedBenchmark {
+
+  override def getSparkSession: SparkSession = TestHive.sparkSession
 
-  private val spark: SparkSession = TestHive.sparkSession
   private val sql = spark.sql _
   import spark.implicits._
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index fcf0b4591ff84..4cbc03d05c9e3 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -216,7 +216,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
 
     // Drop the table and create it again.
     sql("DROP TABLE refreshTable")
-    sparkSession.catalog.createExternalTable("refreshTable", tempPath.toString, "parquet")
+    sparkSession.catalog.createTable("refreshTable", tempPath.toString, "parquet")
     // It is not cached.
     assert(!isCached("refreshTable"), "refreshTable should not be cached.")
     // Refresh the table. REFRESH TABLE command should not make a uncached
@@ -237,7 +237,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
     tempPath.delete()
     table("src").write.mode(SaveMode.Overwrite).parquet(tempPath.toString)
     sql("DROP TABLE IF EXISTS refreshTable")
-    sparkSession.catalog.createExternalTable("refreshTable", tempPath.toString, "parquet")
+    sparkSession.catalog.createTable("refreshTable", tempPath.toString, "parquet")
     checkAnswer(
       table("refreshTable"),
       table("src").collect())
@@ -256,7 +256,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
 
     // Drop the table and create it again.
     sql("DROP TABLE refreshTable")
-    sparkSession.catalog.createExternalTable("refreshTable", tempPath.toString, "parquet")
+    sparkSession.catalog.createTable("refreshTable", tempPath.toString, "parquet")
     // It is not cached.
     assert(!isCached("refreshTable"), "refreshTable should not be cached.")
     // Refresh the table. REFRESH command should not make a uncached
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala
deleted file mode 100644
index a80db765846e9..0000000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import org.scalatest.BeforeAndAfterEach
-
-import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
-
-
-class HiveContextCompatibilitySuite extends SparkFunSuite with BeforeAndAfterEach {
-
-  override protected val enableAutoThreadAudit = false
-  private var sc: SparkContext = null
-  private var hc: HiveContext = null
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    sc = SparkContext.getOrCreate(new SparkConf().setMaster("local").setAppName("test"))
-    HiveUtils.newTemporaryConfiguration(useInMemoryDerby = true).foreach { case (k, v) =>
-      sc.hadoopConfiguration.set(k, v)
-    }
-    hc = new HiveContext(sc)
-  }
-
-  override def afterEach(): Unit = {
-    try {
-      hc.sharedState.cacheManager.clearCache()
-      hc.sessionState.catalog.reset()
-    } finally {
-      super.afterEach()
-    }
-  }
-
-  override def afterAll(): Unit = {
-    try {
-      sc = null
-      hc = null
-    } finally {
-      super.afterAll()
-    }
-  }
-
-  test("basic operations") {
-    val _hc = hc
-    import _hc.implicits._
-    val df1 = (1 to 20).map { i => (i, i) }.toDF("a", "x")
-    val df2 = (1 to 100).map { i => (i, i % 10, i % 2 == 0) }.toDF("a", "b", "c")
-      .select($"a", $"b")
-      .filter($"a" > 10 && $"b" > 6 && $"c")
-    val df3 = df1.join(df2, "a")
-    val res = df3.collect()
-    val expected = Seq((18, 18, 8)).toDF("a", "x", "b").collect()
-    assert(res.toSeq == expected.toSeq)
-    df3.createOrReplaceTempView("mai_table")
-    val df4 = hc.table("mai_table")
-    val res2 = df4.collect()
-    assert(res2.toSeq == expected.toSeq)
-  }
-
-  test("basic DDLs") {
-    val _hc = hc
-    import _hc.implicits._
-    val databases = hc.sql("SHOW DATABASES").collect().map(_.getString(0))
-    assert(databases.toSeq == Seq("default"))
-    hc.sql("CREATE DATABASE mee_db")
-    hc.sql("USE mee_db")
-    val databases2 = hc.sql("SHOW DATABASES").collect().map(_.getString(0))
-    assert(databases2.toSet == Set("default", "mee_db"))
-    val df = (1 to 10).map { i => ("bob" + i.toString, i) }.toDF("name", "age")
-    df.createOrReplaceTempView("mee_table")
-    hc.sql("CREATE TABLE moo_table (name string, age int)")
-    hc.sql("INSERT INTO moo_table SELECT * FROM mee_table")
-    assert(
-      hc.sql("SELECT * FROM moo_table order by name").collect().toSeq ==
-      df.collect().toSeq.sortBy(_.getString(0)))
-    val tables = hc.sql("SHOW TABLES IN mee_db").select("tableName").collect().map(_.getString(0))
-    assert(tables.toSet == Set("moo_table", "mee_table"))
-    hc.sql("DROP TABLE moo_table")
-    hc.sql("DROP TABLE mee_table")
-    val tables2 = hc.sql("SHOW TABLES IN mee_db").select("tableName").collect().map(_.getString(0))
-    assert(tables2.isEmpty)
-    hc.sql("USE default")
-    hc.sql("DROP DATABASE mee_db CASCADE")
-    val databases3 = hc.sql("SHOW DATABASES").collect().map(_.getString(0))
-    assert(databases3.toSeq == Seq("default"))
-  }
-
-}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
index e71aba72c31fe..94a55b911f092 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
@@ -38,7 +38,7 @@ class HiveMetadataCacheSuite extends QueryTest with SQLTestUtils with TestHiveSi
     checkRefreshView(isTemp = false)
   }
 
-  private def checkRefreshView(isTemp: Boolean) {
+  private def checkRefreshView(isTemp: Boolean): Unit = {
     withView("view_refresh") {
       withTable("view_table") {
         // Create a Parquet directory
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index d23e0f2e0d937..3399a2d925925 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.expressions.Window
-import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHiveContext}
+import org.apache.spark.sql.hive.test.{HiveTestJars, TestHiveContext}
 import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS
 import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
 import org.apache.spark.sql.types.{DecimalType, StructType}
@@ -52,7 +52,7 @@ class HiveSparkSubmitSuite
 
   override protected val enableAutoThreadAudit = false
 
-  override def beforeEach() {
+  override def beforeEach(): Unit = {
     super.beforeEach()
   }
 
@@ -111,8 +111,8 @@ class HiveSparkSubmitSuite
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
     val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB"))
-    val jar3 = HiveTestUtils.getHiveContribJar.getCanonicalPath
-    val jar4 = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath
+    val jar3 = HiveTestJars.getHiveContribJar().getCanonicalPath
+    val jar4 = HiveTestJars.getHiveHcatalogCoreJar().getCanonicalPath
     val jarsString = Seq(jar1, jar2, jar3, jar4).map(j => j.toString).mkString(",")
     val args = Seq(
       "--class", SparkSubmitClassLoaderTest.getClass.getName.stripSuffix("$"),
@@ -321,7 +321,7 @@ class HiveSparkSubmitSuite
       "--master", "local-cluster[2,1,1024]",
       "--conf", "spark.ui.enabled=false",
       "--conf", "spark.master.rest.enabled=false",
-      "--jars", HiveTestUtils.getHiveContribJar.getCanonicalPath,
+      "--jars", HiveTestJars.getHiveContribJar().getCanonicalPath,
       unusedJar.toString)
     runSparkSubmit(argsForCreateTable)
 
@@ -454,7 +454,7 @@ object SetWarehouseLocationTest extends Logging {
 // and use this UDF. We need to run this test in separate JVM to make sure we
 // can load the jar defined with the function.
 object TemporaryHiveUDFTest extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     conf.set(UI_ENABLED, false)
@@ -463,7 +463,7 @@ object TemporaryHiveUDFTest extends Logging {
 
     // Load a Hive UDF from the jar.
     logInfo("Registering a temporary Hive UDF provided in a jar.")
-    val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
+    val jar = HiveTestJars.getHiveContribJar().getCanonicalPath
     hiveContext.sql(
       s"""
          |CREATE TEMPORARY FUNCTION example_max
@@ -492,7 +492,7 @@ object TemporaryHiveUDFTest extends Logging {
 // and use this UDF. We need to run this test in separate JVM to make sure we
 // can load the jar defined with the function.
 object PermanentHiveUDFTest1 extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     conf.set(UI_ENABLED, false)
@@ -501,7 +501,7 @@ object PermanentHiveUDFTest1 extends Logging {
 
     // Load a Hive UDF from the jar.
     logInfo("Registering a permanent Hive UDF provided in a jar.")
-    val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
+    val jar = HiveTestJars.getHiveContribJar().getCanonicalPath
     hiveContext.sql(
       s"""
          |CREATE FUNCTION example_max
@@ -530,7 +530,7 @@ object PermanentHiveUDFTest1 extends Logging {
 // resources can be used. We need to run this test in separate JVM to make sure we
 // can load the jar defined with the function.
 object PermanentHiveUDFTest2 extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     conf.set(UI_ENABLED, false)
@@ -538,7 +538,7 @@ object PermanentHiveUDFTest2 extends Logging {
     val hiveContext = new TestHiveContext(sc)
     // Load a Hive UDF from the jar.
     logInfo("Write the metadata of a permanent Hive UDF into metastore.")
-    val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath
+    val jar = HiveTestJars.getHiveContribJar().getCanonicalPath
     val function = CatalogFunction(
       FunctionIdentifier("example_max"),
       "org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax",
@@ -565,7 +565,7 @@ object PermanentHiveUDFTest2 extends Logging {
 // This object is used for testing SPARK-8368: https://issues.apache.org/jira/browse/SPARK-8368.
 // We test if we can load user jars in both driver and executors when HiveContext is used.
 object SparkSubmitClassLoaderTest extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     val conf = new SparkConf()
     val hiveWarehouseLocation = Utils.createTempDir()
@@ -635,7 +635,7 @@ object SparkSubmitClassLoaderTest extends Logging {
 // This object is used for testing SPARK-8020: https://issues.apache.org/jira/browse/SPARK-8020.
 // We test if we can correctly set spark sql configurations when HiveContext is used.
 object SparkSQLConfTest extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j("INFO")
     // We override the SparkConf to add spark.sql.hive.metastore.version and
     // spark.sql.hive.metastore.jars to the beginning of the conf entry array.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
index daf06645abc24..4ad97eaa2b1c8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
@@ -54,6 +54,15 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton
     }
   }
 
+  test("newTemporaryConfiguration respect spark.hive.foo=bar in SparkConf") {
+    sys.props.put("spark.hive.foo", "bar")
+    Seq(true, false) foreach { useInMemoryDerby =>
+      val hiveConf = HiveUtils.newTemporaryConfiguration(useInMemoryDerby)
+      assert(!hiveConf.contains("spark.hive.foo"))
+      assert(hiveConf("hive.foo") === "bar")
+    }
+  }
+
   test("ChildFirstURLClassLoader's parent is null, get spark classloader instead") {
     val conf = new SparkConf
     val contextClassLoader = Thread.currentThread().getContextClassLoader
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index 73f5bbd88624e..2add537eb7933 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -25,7 +25,6 @@ import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{QueryTest, _}
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.catalyst.plans.logical.InsertIntoTable
 import org.apache.spark.sql.hive.execution.InsertIntoHiveTable
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index ba807fb58fe40..41a26344f7c21 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -516,13 +516,13 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         }
 
         withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "json") {
-          sparkSession.catalog.createExternalTable("createdJsonTable", tempPath.toString)
+          sparkSession.catalog.createTable("createdJsonTable", tempPath.toString)
           assert(table("createdJsonTable").schema === df.schema)
           checkAnswer(sql("SELECT * FROM createdJsonTable"), df)
 
           assert(
             intercept[AnalysisException] {
-              sparkSession.catalog.createExternalTable("createdJsonTable", jsonFilePath.toString)
+              sparkSession.catalog.createTable("createdJsonTable", jsonFilePath.toString)
             }.getMessage.contains("Table createdJsonTable already exists."),
             "We should complain that createdJsonTable already exists")
         }
@@ -534,7 +534,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         // Try to specify the schema.
         withSQLConf(SQLConf.DEFAULT_DATA_SOURCE_NAME.key -> "not a source name") {
           val schema = StructType(StructField("b", StringType, true) :: Nil)
-          sparkSession.catalog.createExternalTable(
+          sparkSession.catalog.createTable(
             "createdJsonTable",
             "org.apache.spark.sql.json",
             schema,
@@ -553,7 +553,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
   test("path required error") {
     assert(
       intercept[AnalysisException] {
-        sparkSession.catalog.createExternalTable(
+        sparkSession.catalog.createTable(
           "createdJsonTable",
           "org.apache.spark.sql.json",
           Map.empty[String, String])
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
index 9060ce2e0eb4b..2d3e462531245 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
@@ -66,14 +66,14 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
     }
   }
 
-  test(s"createExternalTable() to non-default database - with USE") {
+  test(s"createTable() to non-default database - with USE") {
     withTempDatabase { db =>
       activateDatabase(db) {
         withTempPath { dir =>
           val path = dir.getCanonicalPath
           df.write.format("parquet").mode(SaveMode.Overwrite).save(path)
 
-          spark.catalog.createExternalTable("t", path, "parquet")
+          spark.catalog.createTable("t", path, "parquet")
           assert(getTableNames(Option(db)).contains("t"))
           checkAnswer(spark.table("t"), df)
 
@@ -92,12 +92,12 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
     }
   }
 
-  test(s"createExternalTable() to non-default database - without USE") {
+  test(s"createTable() to non-default database - without USE") {
     withTempDatabase { db =>
       withTempPath { dir =>
         val path = dir.getCanonicalPath
         df.write.format("parquet").mode(SaveMode.Overwrite).save(path)
-        spark.catalog.createExternalTable(s"$db.t", path, "parquet")
+        spark.catalog.createTable(s"$db.t", path, "parquet")
 
         assert(getTableNames(Option(db)).contains("t"))
         checkAnswer(spark.table(s"$db.t"), df)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala
new file mode 100644
index 0000000000000..77956f4fe69da
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuite.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.client
+
+import java.security.PrivilegedExceptionAction
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.security.UserGroupInformation
+import org.scalatest.{BeforeAndAfterAll, PrivateMethodTester}
+
+import org.apache.spark.util.Utils
+
+class HiveClientUserNameSuite(version: String) extends HiveVersionSuite(version) {
+
+  test("username of HiveClient - no UGI") {
+    // Assuming we're not faking System username
+    assert(getUserNameFromHiveClient === System.getProperty("user.name"))
+  }
+
+  test("username of HiveClient - UGI") {
+    val ugi = UserGroupInformation.createUserForTesting(
+      "fakeprincipal@EXAMPLE.COM", Array.empty)
+    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
+      override def run(): Unit = {
+        assert(getUserNameFromHiveClient === ugi.getShortUserName)
+      }
+    })
+  }
+
+  test("username of HiveClient - Proxy user") {
+    val ugi = UserGroupInformation.createUserForTesting(
+      "fakeprincipal@EXAMPLE.COM", Array.empty)
+    val proxyUgi = UserGroupInformation.createProxyUserForTesting(
+      "proxyprincipal@EXAMPLE.COM", ugi, Array.empty)
+    proxyUgi.doAs(new PrivilegedExceptionAction[Unit]() {
+      override def run(): Unit = {
+        assert(getUserNameFromHiveClient === proxyUgi.getShortUserName)
+      }
+    })
+  }
+
+  private def getUserNameFromHiveClient: String = {
+    val hadoopConf = new Configuration()
+    hadoopConf.set("hive.metastore.warehouse.dir", Utils.createTempDir().toURI().toString())
+    val client = buildClient(hadoopConf)
+    client.userName
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuites.scala
new file mode 100644
index 0000000000000..e076c01c08980
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuites.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.client
+
+import scala.collection.immutable.IndexedSeq
+
+import org.scalatest.Suite
+
+class HiveClientUserNameSuites extends Suite with HiveClientVersions {
+  override def nestedSuites: IndexedSeq[Suite] = {
+    versions.map(new HiveClientUserNameSuite(_))
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
similarity index 98%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
rename to sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index bda711200acdb..2d615f6fdc261 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -31,8 +31,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types.{BooleanType, IntegerType, LongType, StructType}
 import org.apache.spark.util.Utils
 
-// TODO: Refactor this to `HivePartitionFilteringSuite`
-class HiveClientSuite(version: String)
+class HivePartitionFilteringSuite(version: String)
     extends HiveVersionSuite(version) with BeforeAndAfterAll {
 
   private val tryDirectSqlKey = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname
@@ -95,7 +94,7 @@ class HiveClientSuite(version: String)
     }
   }
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     client = init(true)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuites.scala
similarity index 87%
rename from sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
rename to sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuites.scala
index de1be2115b2d8..a43e778b13b92 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuites.scala
@@ -21,9 +21,9 @@ import scala.collection.immutable.IndexedSeq
 
 import org.scalatest.Suite
 
-class HiveClientSuites extends Suite with HiveClientVersions {
+class HivePartitionFilteringSuites extends Suite with HiveClientVersions {
   override def nestedSuites: IndexedSeq[Suite] = {
     // Hive 0.12 does not provide the partition filtering API we call
-    versions.filterNot(_ == "0.12").map(new HiveClientSuite(_))
+    versions.filterNot(_ == "0.12").map(new HivePartitionFilteringSuite(_))
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index da2acdc4aa378..1c82c7e86faab 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -22,6 +22,7 @@ import java.net.URI
 
 import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.common.StatsSetupConst
 import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
@@ -201,6 +202,22 @@ class VersionsSuite extends SparkFunSuite with Logging {
       val database = client.getDatabase("temporary").copy(properties = Map("flag" -> "true"))
       client.alterDatabase(database)
       assert(client.getDatabase("temporary").properties.contains("flag"))
+
+      // test alter database location
+      val tempDatabasePath2 = Utils.createTempDir().toURI
+      // Hive support altering database location since HIVE-8472.
+      if (version == "3.0" || version == "3.1") {
+        client.alterDatabase(database.copy(locationUri = tempDatabasePath2))
+        val uriInCatalog = client.getDatabase("temporary").locationUri
+        assert("file" === uriInCatalog.getScheme)
+        assert(new Path(tempDatabasePath2.getPath).toUri.getPath === uriInCatalog.getPath,
+          "Failed to alter database location")
+      } else {
+        val e = intercept[AnalysisException] {
+          client.alterDatabase(database.copy(locationUri = tempDatabasePath2))
+        }
+        assert(e.getMessage.contains("does not support altering database location"))
+      }
     }
 
     test(s"$version: dropDatabase") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index d06cc1c0a88ac..4a3277f5a7e49 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -24,7 +24,7 @@ import test.org.apache.spark.sql.MyDoubleAvg
 import test.org.apache.spark.sql.MyDoubleSum
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, UnsafeRow}
 import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -1018,6 +1018,31 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
     val agg2 = agg1.groupBy($"text").agg(sum($"avg_res"))
     checkAnswer(agg2, Row("a", BigDecimal("11.9999999994857142860000")))
   }
+
+  test("SPARK-29122: hash-based aggregates for unfixed-length decimals in the interpreter mode") {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
+        SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) {
+      withTempView("t") {
+        spark.range(3).selectExpr("CAST(id AS decimal(38, 0)) a").createOrReplaceTempView("t")
+        checkAnswer(sql("SELECT SUM(a) FROM t"), Row(java.math.BigDecimal.valueOf(3)))
+      }
+    }
+  }
+
+  test("SPARK-29140: HashAggregateExec aggregating binary type doesn't break codegen compilation") {
+    val schema = new StructType().add("id", IntegerType, nullable = false)
+      .add("c1", BinaryType, nullable = true)
+
+    withSQLConf(
+      SQLConf.CODEGEN_SPLIT_AGGREGATE_FUNC.key -> "true",
+      SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> "1") {
+      val emptyRows = spark.sparkContext.parallelize(Seq.empty[Row], 1)
+      val aggDf = spark.createDataFrame(emptyRows, schema)
+        .groupBy($"id" % 10 as "group")
+        .agg(countDistinct($"c1"))
+      checkAnswer(aggDf, Seq.empty[Row])
+    }
+  }
 }
 
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
index 9147a98c94457..b2bcdeaba6cf0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.execution.command.LoadDataCommand
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
 
@@ -289,7 +290,29 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       }
       checkAnswer(
         sql("SELECT employeeID, employeeName FROM part_table WHERE c = '2' AND d = '1'"),
-        sql("SELECT * FROM non_part_table").collect())
+        sql("SELECT * FROM non_part_table"))
+    }
+  }
+
+  test("SPARK-28084 case insensitive names of static partitioning in INSERT commands") {
+    withTable("part_table") {
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        sql("CREATE TABLE part_table (price int, qty int) partitioned by (year int, month int)")
+        sql("INSERT INTO part_table PARTITION(YEar = 2015, month = 1) SELECT 1, 1")
+        checkAnswer(sql("SELECT * FROM part_table"), Row(1, 1, 2015, 1))
+      }
+    }
+  }
+
+  test("SPARK-28084 case insensitive names of dynamic partitioning in INSERT commands") {
+    withTable("part_table") {
+      withSQLConf(
+        SQLConf.CASE_SENSITIVE.key -> "false",
+        "hive.exec.dynamic.partition.mode" -> "nonstrict") {
+        sql("CREATE TABLE part_table (price int) partitioned by (year int)")
+        sql("INSERT INTO part_table PARTITION(YEar) SELECT 1, 2019")
+        checkAnswer(sql("SELECT * FROM part_table"), Row(1, 2019))
+      }
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index c0158f1947d99..f627974a62144 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -229,7 +229,7 @@ abstract class HiveComparisonTest
       sql: String,
       reset: Boolean = true,
       tryWithoutResettingFirst: Boolean = false,
-      skip: Boolean = false) {
+      skip: Boolean = false): Unit = {
     // testCaseName must not contain ':', which is not allowed to appear in a filename of Windows
     assert(!testCaseName.contains(":"))
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index cd8e2eaa2b4dc..4253fe2e1edcb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -1749,7 +1749,7 @@ class HiveDDLSuite
   test("create hive serde table with Catalog") {
     withTable("t") {
       withTempDir { dir =>
-        val df = spark.catalog.createExternalTable(
+        val df = spark.catalog.createTable(
           "t",
           "hive",
           new StructType().add("i", "int"),
@@ -2366,10 +2366,11 @@ class HiveDDLSuite
             checkAnswer(spark.table("t"), Row(1))
             val maybeFile = path.listFiles().find(_.getName.startsWith("part"))
 
-            val reader = getReader(maybeFile.head.getCanonicalPath)
-            assert(reader.getCompressionKind.name === "ZLIB")
-            assert(reader.getCompressionSize == 1001)
-            assert(reader.getRowIndexStride == 2002)
+            Utils.tryWithResource(getReader(maybeFile.head.getCanonicalPath)) { reader =>
+              assert(reader.getCompressionKind.name === "ZLIB")
+              assert(reader.getCompressionSize == 1001)
+              assert(reader.getRowIndexStride == 2002)
+            }
           }
         }
       }
@@ -2435,7 +2436,7 @@ class HiveDDLSuite
         .select("data_type")
       // check if the last access time doesnt have the default date of year
       // 1970 as its a wrong access time
-      assert(!(desc.first.toString.contains("1970")))
+      assert((desc.first.toString.contains("UNKNOWN")))
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 53798e0ac2727..5e84d9513adf5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec
 import org.apache.spark.sql.hive._
-import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHive}
+import org.apache.spark.sql.hive.test.{HiveTestJars, TestHive}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
@@ -56,7 +56,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
 
   def spark: SparkSession = sparkSession
 
-  override def beforeAll() {
+  override def beforeAll(): Unit = {
     super.beforeAll()
     TestHive.setCacheTables(true)
     // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
@@ -67,7 +67,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true)
   }
 
-  override def afterAll() {
+  override def afterAll(): Unit = {
     try {
       TestHive.setCacheTables(false)
       TimeZone.setDefault(originalTimeZone)
@@ -817,7 +817,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
 
   test("ADD JAR command 2") {
     // this is a test case from mapjoin_addjar.q
-    val testJar = HiveTestUtils.getHiveHcatalogCoreJar.toURI
+    val testJar = HiveTestJars.getHiveHcatalogCoreJar().toURI
     val testData = TestHive.getHiveFile("data/files/sample.json").toURI
     sql(s"ADD JAR $testJar")
     sql(
@@ -827,9 +827,9 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     sql("select * from src join t1 on src.key = t1.a")
     sql("DROP TABLE t1")
     assert(sql("list jars").
-      filter(_.getString(0).contains(HiveTestUtils.getHiveHcatalogCoreJar.getName)).count() > 0)
+      filter(_.getString(0).contains(HiveTestJars.getHiveHcatalogCoreJar().getName)).count() > 0)
     assert(sql("list jar").
-      filter(_.getString(0).contains(HiveTestUtils.getHiveHcatalogCoreJar.getName)).count() > 0)
+      filter(_.getString(0).contains(HiveTestJars.getHiveHcatalogCoreJar().getName)).count() > 0)
     val testJar2 = TestHive.getHiveFile("TestUDTF.jar").getCanonicalPath
     sql(s"ADD JAR $testJar2")
     assert(sql(s"list jar $testJar").count() == 1)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 1638f6cd91808..736a2dcfad297 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.command.LoadDataCommand
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
-import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHiveSingleton}
+import org.apache.spark.sql.hive.test.{HiveTestJars, TestHiveSingleton}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.test.SQLTestUtils
@@ -1103,10 +1103,10 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   test("Call add jar in a different thread (SPARK-8306)") {
     @volatile var error: Option[Throwable] = None
     val thread = new Thread {
-      override def run() {
+      override def run(): Unit = {
         // To make sure this test works, this jar should not be loaded in another place.
         sql(
-          s"ADD JAR ${HiveTestUtils.getHiveContribJar.getCanonicalPath}")
+          s"ADD JAR ${HiveTestJars.getHiveContribJar().getCanonicalPath}")
         try {
           sql(
             """
@@ -2012,6 +2012,26 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     }
   }
 
+  test("SPARK-28084 check for case insensitive property of partition column name in load command") {
+    withTempDir { dir =>
+      val path = dir.toURI.toString.stripSuffix("/")
+      val dirPath = dir.getAbsoluteFile
+      Files.append("1", new File(dirPath, "part-r-000011"), StandardCharsets.UTF_8)
+      withTable("part_table") {
+        withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+          sql(
+            """
+              |CREATE TABLE part_table (c STRING)
+              |PARTITIONED BY (d STRING)
+            """.stripMargin)
+          sql(s"LOAD DATA LOCAL INPATH '$path/part-r-000011' " +
+            "INTO TABLE part_table PARTITION(D ='1')")
+          checkAnswer(sql("SELECT * FROM part_table"), Seq(Row("1", "1")))
+        }
+      }
+    }
+  }
+
   test("SPARK-25738: defaultFs can have a port") {
     val defaultURI = new URI("hdfs://fizz.buzz.com:8020")
     val r = LoadDataCommand.makeQualified(defaultURI, new Path("/foo/bar"), new Path("/flim/flam"))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
index c03ae144a1595..f28b9be60d3cb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
@@ -22,9 +22,9 @@ import java.io.File
 import scala.util.Random
 
 import org.apache.spark.SparkConf
-import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{DataFrame, SparkSession}
-import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -43,18 +43,23 @@ import org.apache.spark.sql.types._
  * This is in `sql/hive` module in order to compare `sql/core` and `sql/hive` ORC data sources.
  */
 // scalastyle:off line.size.limit
-object OrcReadBenchmark extends BenchmarkBase with SQLHelper {
-  val conf = new SparkConf()
-  conf.set("orc.compression", "snappy")
-
-  private val spark = SparkSession.builder()
-    .master("local[1]")
-    .appName("OrcReadBenchmark")
-    .config(conf)
-    .getOrCreate()
-
-  // Set default configs. Individual cases will change them if necessary.
-  spark.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true")
+object OrcReadBenchmark extends SqlBasedBenchmark {
+
+  override def getSparkSession: SparkSession = {
+    val conf = new SparkConf()
+    conf.set("orc.compression", "snappy")
+
+    val sparkSession = SparkSession.builder()
+      .master("local[1]")
+      .appName("OrcReadBenchmark")
+      .config(conf)
+      .getOrCreate()
+
+    // Set default configs. Individual cases will change them if necessary.
+    sparkSession.conf.set(SQLConf.ORC_FILTER_PUSHDOWN_ENABLED.key, "true")
+
+    sparkSession
+  }
 
   def withTempTable(tableNames: String*)(f: => Unit): Unit = {
     try f finally tableNames.foreach(spark.catalog.dropTempView)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index d68a47053f18c..790d50500d782 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -489,7 +489,7 @@ private[hive] class TestHiveSparkSession(
 
   def getLoadedTables: collection.mutable.HashSet[String] = sharedState.loadedTables
 
-  def loadTestTable(name: String) {
+  def loadTestTable(name: String): Unit = {
     if (!sharedState.loadedTables.contains(name)) {
       // Marks the table as loaded first to prevent infinite mutually recursive table loading.
       sharedState.loadedTables += name
@@ -523,7 +523,7 @@ private[hive] class TestHiveSparkSession(
   /**
    * Resets the test instance by deleting any table, view, temp view, and UDF that have been created
    */
-  def reset() {
+  def reset(): Unit = {
     try {
       // HACK: Hive is too noisy by default.
       org.apache.log4j.LogManager.getCurrentLoggers.asScala.foreach { log =>
@@ -647,3 +647,24 @@ private[sql] class TestHiveSessionStateBuilder(
 
   override protected def newBuilder: NewBuilder = new TestHiveSessionStateBuilder(_, _)
 }
+
+private[hive] object HiveTestJars {
+  private val repository = SQLConf.ADDITIONAL_REMOTE_REPOSITORIES.defaultValueString
+  private val hiveTestJarsDir = Utils.createTempDir()
+
+  def getHiveContribJar(version: String = HiveUtils.builtinHiveVersion): File =
+    getJarFromUrl(s"${repository}org/apache/hive/hive-contrib/" +
+      s"$version/hive-contrib-$version.jar")
+  def getHiveHcatalogCoreJar(version: String = HiveUtils.builtinHiveVersion): File =
+    getJarFromUrl(s"${repository}org/apache/hive/hcatalog/hive-hcatalog-core/" +
+      s"$version/hive-hcatalog-core-$version.jar")
+
+  private def getJarFromUrl(urlString: String): File = {
+    val fileName = urlString.split("/").last
+    val targetFile = new File(hiveTestJarsDir, fileName)
+    if (!targetFile.exists()) {
+      Utils.doFetchFile(urlString, hiveTestJarsDir, fileName, new SparkConf, null, null)
+    }
+    targetFile
+  }
+}
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 1d1ea469f7d18..87af6388e1118 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -131,4 +131,16 @@
       </plugin>
     </plugins>
   </build>
+
+  <profiles>
+    <profile>
+      <id>scala-2.13</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
 </project>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index c66ba2c170675..8617434d4d740 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -90,7 +90,7 @@ class Checkpoint(ssc: StreamingContext, val checkpointTime: Time)
     newSparkConf
   }
 
-  def validate() {
+  def validate(): Unit = {
     assert(master != null, "Checkpoint.master is null")
     assert(framework != null, "Checkpoint.framework is null")
     assert(graph != null, "Checkpoint.graph is null")
@@ -213,7 +213,7 @@ class CheckpointWriter(
       checkpointTime: Time,
       bytes: Array[Byte],
       clearCheckpointDataLater: Boolean) extends Runnable {
-    def run() {
+    def run(): Unit = {
       if (latestCheckpointTime == null || latestCheckpointTime < checkpointTime) {
         latestCheckpointTime = checkpointTime
       }
@@ -288,7 +288,7 @@ class CheckpointWriter(
     }
   }
 
-  def write(checkpoint: Checkpoint, clearCheckpointDataLater: Boolean) {
+  def write(checkpoint: Checkpoint, clearCheckpointDataLater: Boolean): Unit = {
     try {
       val bytes = Checkpoint.serialize(checkpoint, conf)
       executor.execute(new CheckpointWriteHandler(
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index dce2028b48878..683db21d3f0e1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -20,6 +20,7 @@ package org.apache.spark.streaming
 import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 
 import scala.collection.mutable.ArrayBuffer
+import scala.collection.parallel.immutable.ParVector
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.streaming.dstream.{DStream, InputDStream, ReceiverInputDStream}
@@ -41,7 +42,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
   var batchDuration: Duration = null
   @volatile private var numReceivers: Int = 0
 
-  def start(time: Time) {
+  def start(time: Time): Unit = {
     this.synchronized {
       require(zeroTime == null, "DStream graph computation already started")
       zeroTime = time
@@ -50,28 +51,28 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
       outputStreams.foreach(_.remember(rememberDuration))
       outputStreams.foreach(_.validateAtStart())
       numReceivers = inputStreams.count(_.isInstanceOf[ReceiverInputDStream[_]])
-      inputStreamNameAndID = inputStreams.map(is => (is.name, is.id))
-      inputStreams.par.foreach(_.start())
+      inputStreamNameAndID = inputStreams.map(is => (is.name, is.id)).toSeq
+      new ParVector(inputStreams.toVector).foreach(_.start())
     }
   }
 
-  def restart(time: Time) {
+  def restart(time: Time): Unit = {
     this.synchronized { startTime = time }
   }
 
-  def stop() {
+  def stop(): Unit = {
     this.synchronized {
-      inputStreams.par.foreach(_.stop())
+      new ParVector(inputStreams.toVector).foreach(_.stop())
     }
   }
 
-  def setContext(ssc: StreamingContext) {
+  def setContext(ssc: StreamingContext): Unit = {
     this.synchronized {
       outputStreams.foreach(_.setContext(ssc))
     }
   }
 
-  def setBatchDuration(duration: Duration) {
+  def setBatchDuration(duration: Duration): Unit = {
     this.synchronized {
       require(batchDuration == null,
         s"Batch duration already set as $batchDuration. Cannot set it again.")
@@ -79,7 +80,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     }
   }
 
-  def remember(duration: Duration) {
+  def remember(duration: Duration): Unit = {
     this.synchronized {
       require(rememberDuration == null,
         s"Remember duration already set as $rememberDuration. Cannot set it again.")
@@ -87,14 +88,14 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     }
   }
 
-  def addInputStream(inputStream: InputDStream[_]) {
+  def addInputStream(inputStream: InputDStream[_]): Unit = {
     this.synchronized {
       inputStream.setGraph(this)
       inputStreams += inputStream
     }
   }
 
-  def addOutputStream(outputStream: DStream[_]) {
+  def addOutputStream(outputStream: DStream[_]): Unit = {
     this.synchronized {
       outputStream.setGraph(this)
       outputStreams += outputStream
@@ -128,7 +129,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     jobs
   }
 
-  def clearMetadata(time: Time) {
+  def clearMetadata(time: Time): Unit = {
     logDebug("Clearing metadata for time " + time)
     this.synchronized {
       outputStreams.foreach(_.clearMetadata(time))
@@ -136,7 +137,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     logDebug("Cleared old metadata for time " + time)
   }
 
-  def updateCheckpointData(time: Time) {
+  def updateCheckpointData(time: Time): Unit = {
     logInfo("Updating checkpoint data for time " + time)
     this.synchronized {
       outputStreams.foreach(_.updateCheckpointData(time))
@@ -144,7 +145,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     logInfo("Updated checkpoint data for time " + time)
   }
 
-  def clearCheckpointData(time: Time) {
+  def clearCheckpointData(time: Time): Unit = {
     logInfo("Clearing checkpoint data for time " + time)
     this.synchronized {
       outputStreams.foreach(_.clearCheckpointData(time))
@@ -152,7 +153,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     logInfo("Cleared checkpoint data for time " + time)
   }
 
-  def restoreCheckpointData() {
+  def restoreCheckpointData(): Unit = {
     logInfo("Restoring checkpoint data")
     this.synchronized {
       outputStreams.foreach(_.restoreCheckpointData())
@@ -160,7 +161,7 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
     logInfo("Restored checkpoint data")
   }
 
-  def validate() {
+  def validate(): Unit = {
     this.synchronized {
       require(batchDuration != null, "Batch duration has not been set")
       // assert(batchDuration >= Milliseconds(100), "Batch duration of " + batchDuration +
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 589dd877c8c97..440b653e45de1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -26,7 +26,6 @@ import scala.collection.mutable.Queue
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
-import org.apache.commons.lang3.SerializationUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.{BytesWritable, LongWritable, Text}
@@ -222,7 +221,7 @@ class StreamingContext private[streaming] (
    * if the developer wishes to query old data outside the DStream computation).
    * @param duration Minimum duration that each DStream should remember its RDDs
    */
-  def remember(duration: Duration) {
+  def remember(duration: Duration): Unit = {
     graph.remember(duration)
   }
 
@@ -232,7 +231,7 @@ class StreamingContext private[streaming] (
    * @param directory HDFS-compatible directory where the checkpoint data will be reliably stored.
    *                  Note that this must be a fault-tolerant file system like HDFS.
    */
-  def checkpoint(directory: String) {
+  def checkpoint(directory: String): Unit = {
     if (directory != null) {
       val path = new Path(directory)
       val fs = path.getFileSystem(sparkContext.hadoopConfiguration)
@@ -505,7 +504,7 @@ class StreamingContext private[streaming] (
    * Add a [[org.apache.spark.streaming.scheduler.StreamingListener]] object for
    * receiving system events related to streaming.
    */
-  def addStreamingListener(streamingListener: StreamingListener) {
+  def addStreamingListener(streamingListener: StreamingListener): Unit = {
     scheduler.listenerBus.addListener(streamingListener)
   }
 
@@ -513,7 +512,7 @@ class StreamingContext private[streaming] (
     scheduler.listenerBus.removeListener(streamingListener)
   }
 
-  private def validate() {
+  private def validate(): Unit = {
     assert(graph != null, "Graph is null")
     graph.validate()
 
@@ -586,7 +585,7 @@ class StreamingContext private[streaming] (
               sparkContext.setCallSite(startSite.get)
               sparkContext.clearJobGroup()
               sparkContext.setLocalProperty(SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL, "false")
-              savedProperties.set(SerializationUtils.clone(sparkContext.localProperties.get()))
+              savedProperties.set(Utils.cloneProperties(sparkContext.localProperties.get()))
               scheduler.start()
             }
             state = StreamingContextState.ACTIVE
@@ -621,7 +620,7 @@ class StreamingContext private[streaming] (
    * Wait for the execution to stop. Any exceptions that occurs during the execution
    * will be thrown in this thread.
    */
-  def awaitTermination() {
+  def awaitTermination(): Unit = {
     waiter.waitForStopOrError()
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
index 4a0ec31b5f3c8..51141212f9ecb 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStreamLike.scala
@@ -268,7 +268,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
-  def foreachRDD(foreachFunc: JVoidFunction[R]) {
+  def foreachRDD(foreachFunc: JVoidFunction[R]): Unit = {
     dstream.foreachRDD(rdd => foreachFunc.call(wrapRDD(rdd)))
   }
 
@@ -276,7 +276,7 @@ trait JavaDStreamLike[T, This <: JavaDStreamLike[T, This, R], R <: JavaRDDLike[T
    * Apply a function to each RDD in this DStream. This is an output operator, so
    * 'this' DStream will be registered as an output stream and therefore materialized.
    */
-  def foreachRDD(foreachFunc: JVoidFunction2[R, Time]) {
+  def foreachRDD(foreachFunc: JVoidFunction2[R, Time]): Unit = {
     dstream.foreachRDD((rdd, time) => foreachFunc.call(wrapRDD(rdd), time))
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index 3f88fe0817c57..650d8c7f4d1a7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -759,7 +759,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * Save each RDD in `this` DStream as a Hadoop file. The file name at each batch interval is
    * generated based on `prefix` and `suffix`: "prefix-TIME_IN_MS.suffix".
    */
-  def saveAsHadoopFiles(prefix: String, suffix: String) {
+  def saveAsHadoopFiles(prefix: String, suffix: String): Unit = {
     dstream.saveAsHadoopFiles(prefix, suffix)
   }
 
@@ -772,7 +772,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       suffix: String,
       keyClass: Class[_],
       valueClass: Class[_],
-      outputFormatClass: Class[F]) {
+      outputFormatClass: Class[F]): Unit = {
     dstream.saveAsHadoopFiles(prefix, suffix, keyClass, valueClass, outputFormatClass)
   }
 
@@ -786,7 +786,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[F],
-      conf: JobConf) {
+      conf: JobConf): Unit = {
     dstream.saveAsHadoopFiles(prefix, suffix, keyClass, valueClass, outputFormatClass, conf)
   }
 
@@ -794,7 +794,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
    * Save each RDD in `this` DStream as a Hadoop file. The file name at each batch interval is
    * generated based on `prefix` and `suffix`: "prefix-TIME_IN_MS.suffix".
    */
-  def saveAsNewAPIHadoopFiles(prefix: String, suffix: String) {
+  def saveAsNewAPIHadoopFiles(prefix: String, suffix: String): Unit = {
     dstream.saveAsNewAPIHadoopFiles(prefix, suffix)
   }
 
@@ -807,7 +807,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       suffix: String,
       keyClass: Class[_],
       valueClass: Class[_],
-      outputFormatClass: Class[F]) {
+      outputFormatClass: Class[F]): Unit = {
     dstream.saveAsNewAPIHadoopFiles(prefix, suffix, keyClass, valueClass, outputFormatClass)
   }
 
@@ -821,7 +821,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
       keyClass: Class[_],
       valueClass: Class[_],
       outputFormatClass: Class[F],
-      conf: Configuration = dstream.context.sparkContext.hadoopConfiguration) {
+      conf: Configuration = dstream.context.sparkContext.hadoopConfiguration): Unit = {
     dstream.saveAsNewAPIHadoopFiles(prefix, suffix, keyClass, valueClass, outputFormatClass, conf)
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index d4f03bedc7ed6..2d53a1b4c78b6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -505,7 +505,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * fault-tolerance. The graph will be checkpointed every batch interval.
    * @param directory HDFS-compatible directory where the checkpoint data will be reliably stored
    */
-  def checkpoint(directory: String) {
+  def checkpoint(directory: String): Unit = {
     ssc.checkpoint(directory)
   }
 
@@ -516,7 +516,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * if the developer wishes to query old data outside the DStream computation).
    * @param duration Minimum duration that each DStream should remember its RDDs
    */
-  def remember(duration: Duration) {
+  def remember(duration: Duration): Unit = {
     ssc.remember(duration)
   }
 
@@ -524,7 +524,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * Add a [[org.apache.spark.streaming.scheduler.StreamingListener]] object for
    * receiving system events related to streaming.
    */
-  def addStreamingListener(streamingListener: StreamingListener) {
+  def addStreamingListener(streamingListener: StreamingListener): Unit = {
     ssc.addStreamingListener(streamingListener)
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala
index 28cb86c9f31fd..ce1afad7a91d8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala
@@ -22,33 +22,33 @@ import org.apache.spark.streaming.Time
 private[streaming] trait PythonStreamingListener{
 
   /** Called when the streaming has been started */
-  def onStreamingStarted(streamingStarted: JavaStreamingListenerStreamingStarted) { }
+  def onStreamingStarted(streamingStarted: JavaStreamingListenerStreamingStarted): Unit = { }
 
   /** Called when a receiver has been started */
-  def onReceiverStarted(receiverStarted: JavaStreamingListenerReceiverStarted) { }
+  def onReceiverStarted(receiverStarted: JavaStreamingListenerReceiverStarted): Unit = { }
 
   /** Called when a receiver has reported an error */
-  def onReceiverError(receiverError: JavaStreamingListenerReceiverError) { }
+  def onReceiverError(receiverError: JavaStreamingListenerReceiverError): Unit = { }
 
   /** Called when a receiver has been stopped */
-  def onReceiverStopped(receiverStopped: JavaStreamingListenerReceiverStopped) { }
+  def onReceiverStopped(receiverStopped: JavaStreamingListenerReceiverStopped): Unit = { }
 
   /** Called when a batch of jobs has been submitted for processing. */
-  def onBatchSubmitted(batchSubmitted: JavaStreamingListenerBatchSubmitted) { }
+  def onBatchSubmitted(batchSubmitted: JavaStreamingListenerBatchSubmitted): Unit = { }
 
   /** Called when processing of a batch of jobs has started.  */
-  def onBatchStarted(batchStarted: JavaStreamingListenerBatchStarted) { }
+  def onBatchStarted(batchStarted: JavaStreamingListenerBatchStarted): Unit = { }
 
   /** Called when processing of a batch of jobs has completed. */
-  def onBatchCompleted(batchCompleted: JavaStreamingListenerBatchCompleted) { }
+  def onBatchCompleted(batchCompleted: JavaStreamingListenerBatchCompleted): Unit = { }
 
   /** Called when processing of a job of a batch has started. */
   def onOutputOperationStarted(
-      outputOperationStarted: JavaStreamingListenerOutputOperationStarted) { }
+      outputOperationStarted: JavaStreamingListenerOutputOperationStarted): Unit = { }
 
   /** Called when processing of a job of a batch has completed. */
   def onOutputOperationCompleted(
-      outputOperationCompleted: JavaStreamingListenerOutputOperationCompleted) { }
+      outputOperationCompleted: JavaStreamingListenerOutputOperationCompleted): Unit = { }
 }
 
 private[streaming] class PythonStreamingListenerWrapper(listener: PythonStreamingListener)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
index 46bfc60856453..570663c6f6ad3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala
@@ -174,7 +174,7 @@ private[streaming] object PythonDStream {
    * helper function for DStream.foreachRDD(),
    * cannot be `foreachRDD`, it will confusing py4j
    */
-  def callForeachRDD(jdstream: JavaDStream[Array[Byte]], pfunc: PythonTransformFunction) {
+  def callForeachRDD(jdstream: JavaDStream[Array[Byte]], pfunc: PythonTransformFunction): Unit = {
     val func = new TransformFunction((pfunc))
     jdstream.dstream.foreachRDD((rdd, time) => func(Some(rdd), time))
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
index 995470ec8deae..ed2ddf9e25572 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala
@@ -31,9 +31,9 @@ class ConstantInputDStream[T: ClassTag](_ssc: StreamingContext, rdd: RDD[T])
   require(rdd != null,
     "parameter rdd null is illegal, which will lead to NPE in the following transformation")
 
-  override def start() {}
+  override def start(): Unit = {}
 
-  override def stop() {}
+  override def stop(): Unit = {}
 
   override def compute(validTime: Time): Option[RDD[T]] = {
     Some(rdd)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 41374b5e370f8..e360b4a86d9d3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -189,7 +189,7 @@ abstract class DStream[T: ClassTag] (
    * the validity of future times is calculated. This method also recursively initializes
    * its parent DStreams.
    */
-  private[streaming] def initialize(time: Time) {
+  private[streaming] def initialize(time: Time): Unit = {
     if (zeroTime != null && zeroTime != time) {
       throw new SparkException(s"ZeroTime is already initialized to $zeroTime"
         + s", cannot initialize it again to $time")
@@ -231,7 +231,7 @@ abstract class DStream[T: ClassTag] (
     }
   }
 
-  private[streaming] def validateAtStart() {
+  private[streaming] def validateAtStart(): Unit = {
     require(rememberDuration != null, "Remember duration is set to null")
 
     require(
@@ -282,7 +282,7 @@ abstract class DStream[T: ClassTag] (
     logInfo(s"Initialized and validated $this")
   }
 
-  private[streaming] def setContext(s: StreamingContext) {
+  private[streaming] def setContext(s: StreamingContext): Unit = {
     if (ssc != null && ssc != s) {
       throw new SparkException(s"Context must not be set again for $this")
     }
@@ -291,7 +291,7 @@ abstract class DStream[T: ClassTag] (
     dependencies.foreach(_.setContext(ssc))
   }
 
-  private[streaming] def setGraph(g: DStreamGraph) {
+  private[streaming] def setGraph(g: DStreamGraph): Unit = {
     if (graph != null && graph != g) {
       throw new SparkException(s"Graph must not be set again for $this")
     }
@@ -299,7 +299,7 @@ abstract class DStream[T: ClassTag] (
     dependencies.foreach(_.setGraph(graph))
   }
 
-  private[streaming] def remember(duration: Duration) {
+  private[streaming] def remember(duration: Duration): Unit = {
     if (duration != null && (rememberDuration == null || duration > rememberDuration)) {
       rememberDuration = duration
       logInfo(s"Duration for remembering RDDs set to $rememberDuration for $this")
@@ -446,7 +446,7 @@ abstract class DStream[T: ClassTag] (
    * implementation clears the old generated RDDs. Subclasses of DStream may override
    * this to clear their own metadata along with the generated RDDs.
    */
-  private[streaming] def clearMetadata(time: Time) {
+  private[streaming] def clearMetadata(time: Time): Unit = {
     val unpersistData = ssc.conf.getBoolean("spark.streaming.unpersist", true)
     val oldRDDs = generatedRDDs.filter(_._1 <= (time - rememberDuration))
     logDebug("Clearing references to old RDDs: [" +
@@ -477,14 +477,14 @@ abstract class DStream[T: ClassTag] (
    * checkpointData. Subclasses of DStream (especially those of InputDStream) may override
    * this method to save custom checkpoint data.
    */
-  private[streaming] def updateCheckpointData(currentTime: Time) {
+  private[streaming] def updateCheckpointData(currentTime: Time): Unit = {
     logDebug(s"Updating checkpoint data for time $currentTime")
     checkpointData.update(currentTime)
     dependencies.foreach(_.updateCheckpointData(currentTime))
     logDebug(s"Updated checkpoint data for time $currentTime: $checkpointData")
   }
 
-  private[streaming] def clearCheckpointData(time: Time) {
+  private[streaming] def clearCheckpointData(time: Time): Unit = {
     logDebug("Clearing checkpoint data")
     checkpointData.cleanup(time)
     dependencies.foreach(_.clearCheckpointData(time))
@@ -497,7 +497,7 @@ abstract class DStream[T: ClassTag] (
    * from the checkpoint file names stored in checkpointData. Subclasses of DStream that
    * override the updateCheckpointData() method would also need to override this method.
    */
-  private[streaming] def restoreCheckpointData() {
+  private[streaming] def restoreCheckpointData(): Unit = {
     if (!restoredFromCheckpointData) {
       // Create RDDs from the checkpoint data
       logInfo("Restoring checkpoint data")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
index b35f7d97233e2..667edf3713d43 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
@@ -46,7 +46,7 @@ class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
    * the graph checkpoint is initiated. Default implementation records the
    * checkpoint files at which the generated RDDs of the DStream have been saved.
    */
-  def update(time: Time) {
+  def update(time: Time): Unit = {
 
     // Get the checkpointed RDDs from the generated RDDs
     val checkpointFiles = dstream.generatedRDDs.filter(_._2.getCheckpointFile.isDefined)
@@ -69,7 +69,7 @@ class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
    * Cleanup old checkpoint data. This gets called after a checkpoint of `time` has been
    * written to the checkpoint directory.
    */
-  def cleanup(time: Time) {
+  def cleanup(time: Time): Unit = {
     // Get the time of the oldest checkpointed RDD that was written as part of the
     // checkpoint of `time`
     timeToOldestCheckpointFileTime.remove(time) match {
@@ -109,7 +109,7 @@ class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
    * (along with its output DStreams) is being restored from a graph checkpoint file.
    * Default implementation restores the RDDs from their checkpoint files.
    */
-  def restore() {
+  def restore(): Unit = {
     // Create RDDs from the checkpoint data
     currentCheckpointFiles.foreach {
       case(time, file) =>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
index 438847caf0c3a..d46c9a22379d3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
@@ -128,9 +128,9 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
   @transient private var _path: Path = null
   @transient private var _fs: FileSystem = null
 
-  override def start() { }
+  override def start(): Unit = { }
 
-  override def stop() { }
+  override def stop(): Unit = { }
 
   /**
    * Finds the files that were modified since the last time this method was called and makes
@@ -160,7 +160,7 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
   }
 
   /** Clear the old time-to-files mappings along with old RDDs */
-  protected[streaming] override def clearMetadata(time: Time) {
+  protected[streaming] override def clearMetadata(time: Time): Unit = {
     super.clearMetadata(time)
     batchTimeToSelectedFiles.synchronized {
       val oldFiles = batchTimeToSelectedFiles.filter(_._1 < (time - rememberDuration))
@@ -306,7 +306,7 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
     _fs
   }
 
-  private def reset()  {
+  private def reset(): Unit = {
     _fs = null
   }
 
@@ -328,14 +328,14 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
 
     private def hadoopFiles = data.asInstanceOf[mutable.HashMap[Time, Array[String]]]
 
-    override def update(time: Time) {
+    override def update(time: Time): Unit = {
       hadoopFiles.clear()
       batchTimeToSelectedFiles.synchronized { hadoopFiles ++= batchTimeToSelectedFiles }
     }
 
-    override def cleanup(time: Time) { }
+    override def cleanup(time: Time): Unit = { }
 
-    override def restore() {
+    override def restore(): Unit = {
       hadoopFiles.toSeq.sortBy(_._1)(Time.ordering).foreach {
         case (t, f) =>
           // Restore the metadata in both files and generatedRDDs
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
index 6495c91247047..5a75b77659960 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.Utils
  * Input streams that can generate RDDs from new data by running a service/thread only on
  * the driver node (that is, without running a receiver on worker nodes), can be
  * implemented by directly inheriting this InputDStream. For example,
- * FileInputDStream, a subclass of InputDStream, monitors a HDFS directory from the driver for
+ * FileInputDStream, a subclass of InputDStream, monitors an HDFS directory from the driver for
  * new files and generates RDDs with the new files. For implementing input streams
  * that requires running a receiver on the worker nodes, use
  * [[org.apache.spark.streaming.dstream.ReceiverInputDStream]] as the parent class.
@@ -48,7 +48,7 @@ abstract class InputDStream[T: ClassTag](_ssc: StreamingContext)
 
   ssc.graph.addInputStream(this)
 
-  /** This is an unique identifier for the input stream. */
+  /** This is a unique identifier for the input stream. */
   val id = ssc.getNewInputStreamId()
 
   // Keep track of the freshest rate for this stream using the rateEstimator
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
index f9c78699164ab..d3e6e766bea4a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala
@@ -33,9 +33,9 @@ class QueueInputDStream[T: ClassTag](
     defaultRDD: RDD[T]
   ) extends InputDStream[T](ssc) {
 
-  override def start() { }
+  override def start(): Unit = { }
 
-  override def stop() { }
+  override def stop(): Unit = { }
 
   private def readObject(in: ObjectInputStream): Unit = {
     throw new NotSerializableException("queueStream doesn't support checkpointing. " +
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
index b22bbb79a5cc9..671ac7b97f9d2 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala
@@ -55,7 +55,7 @@ class RawNetworkReceiver(host: String, port: Int, storageLevel: StorageLevel)
 
   var blockPushingThread: Thread = null
 
-  def onStart() {
+  def onStart(): Unit = {
     // Open a socket to the target address and keep reading from it
     logInfo("Connecting to " + host + ":" + port)
     val channel = SocketChannel.open()
@@ -67,7 +67,7 @@ class RawNetworkReceiver(host: String, port: Int, storageLevel: StorageLevel)
 
     blockPushingThread = new Thread {
       setDaemon(true)
-      override def run() {
+      override def run(): Unit = {
         var nextBlockNumber = 0
         while (true) {
           val buffer = queue.take()
@@ -92,12 +92,12 @@ class RawNetworkReceiver(host: String, port: Int, storageLevel: StorageLevel)
     }
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     if (blockPushingThread != null) blockPushingThread.interrupt()
   }
 
   /** Read a buffer fully from a given Channel */
-  private def readFully(channel: ReadableByteChannel, dest: ByteBuffer) {
+  private def readFully(channel: ReadableByteChannel, dest: ByteBuffer): Unit = {
     while (dest.position() < dest.limit()) {
       if (channel.read(dest) == -1) {
         throw new EOFException("End of channel")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
index fd3e72e41be26..983ac09cd435e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala
@@ -60,9 +60,9 @@ abstract class ReceiverInputDStream[T: ClassTag](_ssc: StreamingContext)
   def getReceiver(): Receiver[T]
 
   // Nothing to start or stop as both taken care of by the ReceiverTracker.
-  def start() {}
+  def start(): Unit = {}
 
-  def stop() {}
+  def stop(): Unit = {}
 
   /**
    * Generates RDDs with blocks received by the receiver of this stream. */
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
index 7853af562368e..9d3facc68e0c6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
@@ -54,7 +54,7 @@ class SocketReceiver[T: ClassTag](
 
   private var socket: Socket = _
 
-  def onStart() {
+  def onStart(): Unit = {
 
     logInfo(s"Connecting to $host:$port")
     try {
@@ -69,11 +69,11 @@ class SocketReceiver[T: ClassTag](
     // Start the thread that receives data over a connection
     new Thread("Socket Receiver") {
       setDaemon(true)
-      override def run() { receive() }
+      override def run(): Unit = { receive() }
     }.start()
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     // in case restart thread close it twice
     synchronized {
       if (socket != null) {
@@ -85,7 +85,7 @@ class SocketReceiver[T: ClassTag](
   }
 
   /** Create a socket connection and receive data until receiver is stopped */
-  def receive() {
+  def receive(): Unit = {
     try {
       val iterator = bytesToObjects(socket.getInputStream())
       while(!isStopped && iterator.hasNext) {
@@ -125,7 +125,7 @@ object SocketReceiver  {
         nextValue
       }
 
-      protected override def close() {
+      protected override def close(): Unit = {
         dataInputStream.close()
       }
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
index 90309c0145ae1..2533c53883cac 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
@@ -107,7 +107,8 @@ private[streaming] class BlockGenerator(
     new RecurringTimer(clock, blockIntervalMs, updateCurrentBuffer, "BlockGenerator")
   private val blockQueueSize = conf.getInt("spark.streaming.blockQueueSize", 10)
   private val blocksForPushing = new ArrayBlockingQueue[Block](blockQueueSize)
-  private val blockPushingThread = new Thread() { override def run() { keepPushingBlocks() } }
+  private val blockPushingThread =
+    new Thread() { override def run(): Unit = keepPushingBlocks() }
 
   @volatile private var currentBuffer = new ArrayBuffer[Any]
   @volatile private var state = Initialized
@@ -255,7 +256,7 @@ private[streaming] class BlockGenerator(
   }
 
   /** Keep pushing blocks to the BlockManager. */
-  private def keepPushingBlocks() {
+  private def keepPushingBlocks(): Unit = {
     logInfo("Started block pushing thread")
 
     def areBlocksBeingGenerated: Boolean = synchronized {
@@ -288,12 +289,12 @@ private[streaming] class BlockGenerator(
     }
   }
 
-  private def reportError(message: String, t: Throwable) {
+  private def reportError(message: String, t: Throwable): Unit = {
     logError(message, t)
     listener.onError(message, t)
   }
 
-  private def pushBlock(block: Block) {
+  private def pushBlock(block: Block): Unit = {
     listener.onPushBlock(block.id, block.buffer)
     logInfo("Pushed block " + block.id)
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
index fbac4880bdf65..c620074b4e44d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/RateLimiter.scala
@@ -40,7 +40,7 @@ private[receiver] abstract class RateLimiter(conf: SparkConf) extends Logging {
   private val maxRateLimit = conf.getLong("spark.streaming.receiver.maxRate", Long.MaxValue)
   private lazy val rateLimiter = GuavaRateLimiter.create(getInitialRateLimit().toDouble)
 
-  def waitToPush() {
+  def waitToPush(): Unit = {
     rateLimiter.acquire()
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
index eb70232a7452e..12ed8015117e5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -98,7 +98,7 @@ private[streaming] class BlockManagerBasedBlockHandler(
     BlockManagerBasedStoreResult(blockId, numRecords)
   }
 
-  def cleanupOldBlocks(threshTime: Long) {
+  def cleanupOldBlocks(threshTime: Long): Unit = {
     // this is not used as blocks inserted into the BlockManager are cleared by DStream's clearing
     // of BlockRDDs.
   }
@@ -210,11 +210,11 @@ private[streaming] class WriteAheadLogBasedBlockHandler(
     WriteAheadLogBasedStoreResult(blockId, numRecords, walRecordHandle)
   }
 
-  def cleanupOldBlocks(threshTime: Long) {
+  def cleanupOldBlocks(threshTime: Long): Unit = {
     writeAheadLog.clean(threshTime, false)
   }
 
-  def stop() {
+  def stop(): Unit = {
     writeAheadLog.close()
     executionContext.shutdown()
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
index 31a88730d163e..dde074c7e324b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala
@@ -115,12 +115,12 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * These single items will be aggregated together into data blocks before
    * being pushed into Spark's memory.
    */
-  def store(dataItem: T) {
+  def store(dataItem: T): Unit = {
     supervisor.pushSingle(dataItem)
   }
 
   /** Store an ArrayBuffer of received data as a data block into Spark's memory. */
-  def store(dataBuffer: ArrayBuffer[T]) {
+  def store(dataBuffer: ArrayBuffer[T]): Unit = {
     supervisor.pushArrayBuffer(dataBuffer, None, None)
   }
 
@@ -129,12 +129,12 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * The metadata will be associated with this block of data
    * for being used in the corresponding InputDStream.
    */
-  def store(dataBuffer: ArrayBuffer[T], metadata: Any) {
+  def store(dataBuffer: ArrayBuffer[T], metadata: Any): Unit = {
     supervisor.pushArrayBuffer(dataBuffer, Some(metadata), None)
   }
 
   /** Store an iterator of received data as a data block into Spark's memory. */
-  def store(dataIterator: Iterator[T]) {
+  def store(dataIterator: Iterator[T]): Unit = {
     supervisor.pushIterator(dataIterator, None, None)
   }
 
@@ -143,12 +143,12 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * The metadata will be associated with this block of data
    * for being used in the corresponding InputDStream.
    */
-  def store(dataIterator: java.util.Iterator[T], metadata: Any) {
+  def store(dataIterator: java.util.Iterator[T], metadata: Any): Unit = {
     supervisor.pushIterator(dataIterator.asScala, Some(metadata), None)
   }
 
   /** Store an iterator of received data as a data block into Spark's memory. */
-  def store(dataIterator: java.util.Iterator[T]) {
+  def store(dataIterator: java.util.Iterator[T]): Unit = {
     supervisor.pushIterator(dataIterator.asScala, None, None)
   }
 
@@ -157,7 +157,7 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * The metadata will be associated with this block of data
    * for being used in the corresponding InputDStream.
    */
-  def store(dataIterator: Iterator[T], metadata: Any) {
+  def store(dataIterator: Iterator[T], metadata: Any): Unit = {
     supervisor.pushIterator(dataIterator, Some(metadata), None)
   }
 
@@ -166,7 +166,7 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * that the data in the ByteBuffer must be serialized using the same serializer
    * that Spark is configured to use.
    */
-  def store(bytes: ByteBuffer) {
+  def store(bytes: ByteBuffer): Unit = {
     supervisor.pushBytes(bytes, None, None)
   }
 
@@ -175,12 +175,12 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * The metadata will be associated with this block of data
    * for being used in the corresponding InputDStream.
    */
-  def store(bytes: ByteBuffer, metadata: Any) {
+  def store(bytes: ByteBuffer, metadata: Any): Unit = {
     supervisor.pushBytes(bytes, Some(metadata), None)
   }
 
   /** Report exceptions in receiving data. */
-  def reportError(message: String, throwable: Throwable) {
+  def reportError(message: String, throwable: Throwable): Unit = {
     supervisor.reportError(message, throwable)
   }
 
@@ -192,7 +192,7 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * is defined by the Spark configuration `spark.streaming.receiverRestartDelay`.
    * The `message` will be reported to the driver.
    */
-  def restart(message: String) {
+  def restart(message: String): Unit = {
     supervisor.restartReceiver(message)
   }
 
@@ -204,7 +204,7 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * is defined by the Spark configuration `spark.streaming.receiverRestartDelay`.
    * The `message` and `exception` will be reported to the driver.
    */
-  def restart(message: String, error: Throwable) {
+  def restart(message: String, error: Throwable): Unit = {
     supervisor.restartReceiver(message, Some(error))
   }
 
@@ -214,17 +214,17 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
    * (by calling `onStop()` and `onStart()`) is performed asynchronously
    * in a background thread.
    */
-  def restart(message: String, error: Throwable, millisecond: Int) {
+  def restart(message: String, error: Throwable, millisecond: Int): Unit = {
     supervisor.restartReceiver(message, Some(error), millisecond)
   }
 
   /** Stop the receiver completely. */
-  def stop(message: String) {
+  def stop(message: String): Unit = {
     supervisor.stop(message, None)
   }
 
   /** Stop the receiver completely due to an exception */
-  def stop(message: String, error: Throwable) {
+  def stop(message: String, error: Throwable): Unit = {
     supervisor.stop(message, Some(error))
   }
 
@@ -260,12 +260,12 @@ abstract class Receiver[T](val storageLevel: StorageLevel) extends Serializable
   @transient private var _supervisor: ReceiverSupervisor = null
 
   /** Set the ID of the DStream that this receiver is associated with. */
-  private[streaming] def setReceiverId(_id: Int) {
+  private[streaming] def setReceiverId(_id: Int): Unit = {
     id = _id
   }
 
   /** Attach Network Receiver executor to this receiver. */
-  private[streaming] def attachSupervisor(exec: ReceiverSupervisor) {
+  private[streaming] def attachSupervisor(exec: ReceiverSupervisor): Unit = {
     assert(_supervisor == null)
     _supervisor = exec
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
index faf6db82d5b18..b464dccb760f6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
@@ -110,29 +110,29 @@ private[streaming] abstract class ReceiverSupervisor(
    * Note that this must be called before the receiver.onStart() is called to ensure
    * things like [[BlockGenerator]]s are started before the receiver starts sending data.
    */
-  protected def onStart() { }
+  protected def onStart(): Unit = { }
 
   /**
    * Called when supervisor is stopped.
    * Note that this must be called after the receiver.onStop() is called to ensure
    * things like [[BlockGenerator]]s are cleaned up after the receiver stops sending data.
    */
-  protected def onStop(message: String, error: Option[Throwable]) { }
+  protected def onStop(message: String, error: Option[Throwable]): Unit = { }
 
   /** Called when receiver is started. Return true if the driver accepts us */
   protected def onReceiverStart(): Boolean
 
   /** Called when receiver is stopped */
-  protected def onReceiverStop(message: String, error: Option[Throwable]) { }
+  protected def onReceiverStop(message: String, error: Option[Throwable]): Unit = { }
 
   /** Start the supervisor */
-  def start() {
+  def start(): Unit = {
     onStart()
     startReceiver()
   }
 
   /** Mark the supervisor and the receiver for stopping */
-  def stop(message: String, error: Option[Throwable]) {
+  def stop(message: String, error: Option[Throwable]): Unit = {
     stoppingError = error.orNull
     stopReceiver(message, error)
     onStop(message, error)
@@ -180,12 +180,12 @@ private[streaming] abstract class ReceiverSupervisor(
   }
 
   /** Restart receiver with delay */
-  def restartReceiver(message: String, error: Option[Throwable] = None) {
+  def restartReceiver(message: String, error: Option[Throwable] = None): Unit = {
     restartReceiver(message, error, defaultRestartDelay)
   }
 
   /** Restart receiver with delay */
-  def restartReceiver(message: String, error: Option[Throwable], delay: Int) {
+  def restartReceiver(message: String, error: Option[Throwable], delay: Int): Unit = {
     Future {
       // This is a blocking action so we should use "futureExecutionContext" which is a cached
       // thread pool.
@@ -214,7 +214,7 @@ private[streaming] abstract class ReceiverSupervisor(
 
 
   /** Wait the thread until the supervisor is stopped */
-  def awaitTermination() {
+  def awaitTermination(): Unit = {
     logInfo("Waiting for receiver to be stopped")
     stopLatch.await()
     if (stoppingError != null) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
index 5d38c56aa5873..13c80841d4d14 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
@@ -102,11 +102,11 @@ private[streaming] class ReceiverSupervisorImpl(
 
     def onGenerateBlock(blockId: StreamBlockId): Unit = { }
 
-    def onError(message: String, throwable: Throwable) {
+    def onError(message: String, throwable: Throwable): Unit = {
       reportError(message, throwable)
     }
 
-    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]) {
+    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]): Unit = {
       pushArrayBuffer(arrayBuffer, None, Some(blockId))
     }
   }
@@ -116,7 +116,7 @@ private[streaming] class ReceiverSupervisorImpl(
   override private[streaming] def getCurrentRateLimit: Long = defaultBlockGenerator.getCurrentLimit
 
   /** Push a single record of received data into block generator. */
-  def pushSingle(data: Any) {
+  def pushSingle(data: Any): Unit = {
     defaultBlockGenerator.addData(data)
   }
 
@@ -125,7 +125,7 @@ private[streaming] class ReceiverSupervisorImpl(
       arrayBuffer: ArrayBuffer[_],
       metadataOption: Option[Any],
       blockIdOption: Option[StreamBlockId]
-    ) {
+    ): Unit = {
     pushAndReportBlock(ArrayBufferBlock(arrayBuffer), metadataOption, blockIdOption)
   }
 
@@ -134,7 +134,7 @@ private[streaming] class ReceiverSupervisorImpl(
       iterator: Iterator[_],
       metadataOption: Option[Any],
       blockIdOption: Option[StreamBlockId]
-    ) {
+    ): Unit = {
     pushAndReportBlock(IteratorBlock(iterator), metadataOption, blockIdOption)
   }
 
@@ -143,7 +143,7 @@ private[streaming] class ReceiverSupervisorImpl(
       bytes: ByteBuffer,
       metadataOption: Option[Any],
       blockIdOption: Option[StreamBlockId]
-    ) {
+    ): Unit = {
     pushAndReportBlock(ByteBufferBlock(bytes), metadataOption, blockIdOption)
   }
 
@@ -152,7 +152,7 @@ private[streaming] class ReceiverSupervisorImpl(
       receivedBlock: ReceivedBlock,
       metadataOption: Option[Any],
       blockIdOption: Option[StreamBlockId]
-    ) {
+    ): Unit = {
     val blockId = blockIdOption.getOrElse(nextBlockId)
     val time = System.currentTimeMillis
     val blockStoreResult = receivedBlockHandler.storeBlock(blockId, receivedBlock)
@@ -166,17 +166,17 @@ private[streaming] class ReceiverSupervisorImpl(
   }
 
   /** Report error to the receiver tracker */
-  def reportError(message: String, error: Throwable) {
+  def reportError(message: String, error: Throwable): Unit = {
     val errorString = Option(error).map(Throwables.getStackTraceAsString).getOrElse("")
     trackerEndpoint.send(ReportError(streamId, message, errorString))
     logWarning("Reported error " + message + " - " + error)
   }
 
-  override protected def onStart() {
+  override protected def onStart(): Unit = {
     registeredBlockGenerators.asScala.foreach { _.start() }
   }
 
-  override protected def onStop(message: String, error: Option[Throwable]) {
+  override protected def onStop(message: String, error: Option[Throwable]): Unit = {
     receivedBlockHandler match {
       case handler: WriteAheadLogBasedBlockHandler =>
         // Write ahead log should be closed.
@@ -193,7 +193,7 @@ private[streaming] class ReceiverSupervisorImpl(
     trackerEndpoint.askSync[Boolean](msg)
   }
 
-  override protected def onReceiverStop(message: String, error: Option[Throwable]) {
+  override protected def onReceiverStop(message: String, error: Option[Throwable]): Unit = {
     logInfo("Deregistering receiver " + streamId)
     val errorString = error.map(Throwables.getStackTraceAsString).getOrElse("")
     trackerEndpoint.askSync[Boolean](DeregisterReceiver(streamId, message, errorString))
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala
index 7050d7ef45240..88e7b56895993 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala
@@ -35,7 +35,7 @@ class Job(val time: Time, func: () => _) {
   private var _startTime: Option[Long] = None
   private var _endTime: Option[Long] = None
 
-  def run() {
+  def run(): Unit = {
     _result = Try(func())
   }
 
@@ -66,7 +66,7 @@ class Job(val time: Time, func: () => _) {
     _outputOpId
   }
 
-  def setOutputOpId(outputOpId: Int) {
+  def setOutputOpId(outputOpId: Int): Unit = {
     if (isSet) {
       throw new IllegalStateException("Cannot call setOutputOpId more than once")
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index ddeb3d4547c55..7e8449ee5aa7e 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -77,7 +77,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   private var eventLoop: EventLoop[JobGeneratorEvent] = null
 
   // last batch whose completion,checkpointing and metadata cleanup has been completed
-  private var lastProcessedBatch: Time = null
+  @volatile private[streaming] var lastProcessedBatch: Time = null
 
   /** Start generation of jobs */
   def start(): Unit = synchronized {
@@ -166,21 +166,21 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   /**
    * Callback called when a batch has been completely processed.
    */
-  def onBatchCompletion(time: Time) {
+  def onBatchCompletion(time: Time): Unit = {
     eventLoop.post(ClearMetadata(time))
   }
 
   /**
    * Callback called when the checkpoint of a batch has been written.
    */
-  def onCheckpointCompletion(time: Time, clearCheckpointDataLater: Boolean) {
+  def onCheckpointCompletion(time: Time, clearCheckpointDataLater: Boolean): Unit = {
     if (clearCheckpointDataLater) {
       eventLoop.post(ClearCheckpointData(time))
     }
   }
 
   /** Processes all events */
-  private def processEvent(event: JobGeneratorEvent) {
+  private def processEvent(event: JobGeneratorEvent): Unit = {
     logDebug("Got event " + event)
     event match {
       case GenerateJobs(time) => generateJobs(time)
@@ -192,7 +192,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Starts the generator for the first time */
-  private def startFirstTime() {
+  private def startFirstTime(): Unit = {
     val startTime = new Time(timer.getStartTime())
     graph.start(startTime - graph.batchDuration)
     timer.start(startTime.milliseconds)
@@ -200,7 +200,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Restarts the generator based on the information in checkpoint */
-  private def restart() {
+  private def restart(): Unit = {
     // If manual clock is being used for testing, then
     // either set the manual clock to the last checkpointed time,
     // or if the property is defined set it to that time
@@ -243,7 +243,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Generate jobs and perform checkpointing for the given `time`.  */
-  private def generateJobs(time: Time) {
+  private def generateJobs(time: Time): Unit = {
     // Checkpoint all RDDs marked for checkpointing to ensure their lineages are
     // truncated periodically. Otherwise, we may run into stack overflows (SPARK-6847).
     ssc.sparkContext.setLocalProperty(RDD.CHECKPOINT_ALL_MARKED_ANCESTORS, "true")
@@ -262,7 +262,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Clear DStream metadata for the given `time`. */
-  private def clearMetadata(time: Time) {
+  private def clearMetadata(time: Time): Unit = {
     ssc.graph.clearMetadata(time)
 
     // If checkpointing is enabled, then checkpoint,
@@ -281,7 +281,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Clear DStream checkpoint data for the given `time`. */
-  private def clearCheckpointData(time: Time) {
+  private def clearCheckpointData(time: Time): Unit = {
     ssc.graph.clearCheckpointData(time)
 
     // All the checkpoint information about which batches have been processed, etc have
@@ -293,7 +293,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
   }
 
   /** Perform checkpoint for the given `time`. */
-  private def doCheckpoint(time: Time, clearCheckpointDataLater: Boolean) {
+  private def doCheckpoint(time: Time, clearCheckpointDataLater: Boolean): Unit = {
     if (shouldCheckpoint && (time - graph.zeroTime).isMultipleOf(ssc.checkpointDuration)) {
       logInfo("Checkpointing graph for time " + time)
       ssc.graph.updateCheckpointData(time)
@@ -303,7 +303,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
     }
   }
 
-  private def markBatchFullyProcessed(time: Time) {
+  private def markBatchFullyProcessed(time: Time): Unit = {
     lastProcessedBatch = time
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
index 2fa3bf7d5230b..5d543c5fe1051 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
@@ -22,8 +22,6 @@ import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 import scala.collection.JavaConverters._
 import scala.util.Failure
 
-import org.apache.commons.lang3.SerializationUtils
-
 import org.apache.spark.ExecutorAllocationClient
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.io.SparkHadoopWriterUtils
@@ -31,7 +29,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming._
 import org.apache.spark.streaming.api.python.PythonDStream
 import org.apache.spark.streaming.ui.UIUtils
-import org.apache.spark.util.{EventLoop, ThreadUtils}
+import org.apache.spark.util.{EventLoop, ThreadUtils, Utils}
 
 
 private[scheduler] sealed trait JobSchedulerEvent
@@ -52,8 +50,9 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
   private val numConcurrentJobs = ssc.conf.getInt("spark.streaming.concurrentJobs", 1)
   private val jobExecutor =
     ThreadUtils.newDaemonFixedThreadPool(numConcurrentJobs, "streaming-job-executor")
-  private val jobGenerator = new JobGenerator(this)
+  private[streaming] val jobGenerator = new JobGenerator(this)
   val clock = jobGenerator.clock
+
   val listenerBus = new StreamingListenerBus(ssc.sparkContext.listenerBus)
 
   // These two are created only when scheduler starts.
@@ -144,7 +143,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     logInfo("Stopped JobScheduler")
   }
 
-  def submitJobSet(jobSet: JobSet) {
+  def submitJobSet(jobSet: JobSet): Unit = {
     if (jobSet.jobs.isEmpty) {
       logInfo("No jobs added for time " + jobSet.time)
     } else {
@@ -159,7 +158,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     jobSets.asScala.keys.toSeq
   }
 
-  def reportError(msg: String, e: Throwable) {
+  def reportError(msg: String, e: Throwable): Unit = {
     eventLoop.post(ErrorReported(msg, e))
   }
 
@@ -167,7 +166,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     eventLoop != null
   }
 
-  private def processEvent(event: JobSchedulerEvent) {
+  private def processEvent(event: JobSchedulerEvent): Unit = {
     try {
       event match {
         case JobStarted(job, startTime) => handleJobStart(job, startTime)
@@ -180,7 +179,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     }
   }
 
-  private def handleJobStart(job: Job, startTime: Long) {
+  private def handleJobStart(job: Job, startTime: Long): Unit = {
     val jobSet = jobSets.get(job.time)
     val isFirstJobOfJobSet = !jobSet.hasStarted
     jobSet.handleJobStart(job)
@@ -194,7 +193,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     logInfo("Starting job " + job.id + " from job set of time " + jobSet.time)
   }
 
-  private def handleJobCompletion(job: Job, completedTime: Long) {
+  private def handleJobCompletion(job: Job, completedTime: Long): Unit = {
     val jobSet = jobSets.get(job.time)
     jobSet.handleJobCompletion(job)
     job.setEndTime(completedTime)
@@ -218,7 +217,7 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
     }
   }
 
-  private def handleError(msg: String, e: Throwable) {
+  private def handleError(msg: String, e: Throwable): Unit = {
     logError(msg, e)
     ssc.waiter.notifyError(e)
     PythonDStream.stopStreamingContextIfPythonProcessIsDead(e)
@@ -227,10 +226,10 @@ class JobScheduler(val ssc: StreamingContext) extends Logging {
   private class JobHandler(job: Job) extends Runnable with Logging {
     import JobScheduler._
 
-    def run() {
+    def run(): Unit = {
       val oldProps = ssc.sparkContext.getLocalProperties
       try {
-        ssc.sparkContext.setLocalProperties(SerializationUtils.clone(ssc.savedProperties.get()))
+        ssc.sparkContext.setLocalProperties(Utils.cloneProperties(ssc.savedProperties.get()))
         val formattedTime = UIUtils.formatBatchTime(
           job.time.milliseconds, ssc.graph.batchDuration.milliseconds, showYYYYMMSS = false)
         val batchUrl = s"/streaming/batch/?id=${job.time.milliseconds}"
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
index 0baedaf275d67..5a5469ac6543a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
@@ -39,11 +39,11 @@ case class JobSet(
   jobs.zipWithIndex.foreach { case (job, i) => job.setOutputOpId(i) }
   incompleteJobs ++= jobs
 
-  def handleJobStart(job: Job) {
+  def handleJobStart(job: Job): Unit = {
     if (processingStartTime < 0) processingStartTime = System.currentTimeMillis()
   }
 
-  def handleJobCompletion(job: Job) {
+  def handleJobCompletion(job: Job): Unit = {
     incompleteJobs -= job
     if (hasCompleted) processingEndTime = System.currentTimeMillis()
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/RateController.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/RateController.scala
index a46c0c1b25e74..7774e85f778a6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/RateController.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/RateController.scala
@@ -47,7 +47,7 @@ private[streaming] abstract class RateController(val streamUID: Int, rateEstimat
   /**
    * An initialization method called both from the constructor and Serialization code.
    */
-  private def init() {
+  private def init(): Unit = {
     executionContext = ExecutionContext.fromExecutorService(
       ThreadUtils.newDaemonSingleThreadExecutor("stream-rate-update"))
     rateLimit = new AtomicLong(-1L)
@@ -72,7 +72,7 @@ private[streaming] abstract class RateController(val streamUID: Int, rateEstimat
 
   def getLatestRate(): Long = rateLimit.get()
 
-  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) {
+  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = {
     val elements = batchCompleted.batchInfo.streamIdToInputInfo
 
     for {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
index a9763cfe04539..6c71b18b46213 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
@@ -182,7 +182,7 @@ private[streaming] class ReceivedBlockTracker(
   }
 
   /** Stop the block tracker. */
-  def stop() {
+  def stop(): Unit = {
     writeAheadLogOption.foreach { _.close() }
   }
 
@@ -192,7 +192,7 @@ private[streaming] class ReceivedBlockTracker(
    */
   private def recoverPastEvents(): Unit = synchronized {
     // Insert the recovered block information
-    def insertAddedBlock(receivedBlockInfo: ReceivedBlockInfo) {
+    def insertAddedBlock(receivedBlockInfo: ReceivedBlockInfo): Unit = {
       logTrace(s"Recovery: Inserting added block $receivedBlockInfo")
       receivedBlockInfo.setBlockIdInvalid()
       getReceivedBlockQueue(receivedBlockInfo.streamId) += receivedBlockInfo
@@ -200,7 +200,7 @@ private[streaming] class ReceivedBlockTracker(
 
     // Insert the recovered block-to-batch allocations and removes them from queue of
     // received blocks.
-    def insertAllocatedBatch(batchTime: Time, allocatedBlocks: AllocatedBlocks) {
+    def insertAllocatedBatch(batchTime: Time, allocatedBlocks: AllocatedBlocks): Unit = {
       logTrace(s"Recovery: Inserting allocated batch for time $batchTime to " +
         s"${allocatedBlocks.streamIdToAllocatedBlocks}")
       allocatedBlocks.streamIdToAllocatedBlocks.foreach {
@@ -212,7 +212,7 @@ private[streaming] class ReceivedBlockTracker(
     }
 
     // Cleanup the batch allocations
-    def cleanupBatches(batchTimes: Seq[Time]) {
+    def cleanupBatches(batchTimes: Seq[Time]): Unit = {
       logTrace(s"Recovery: Cleaning up batches $batchTimes")
       timeToAllocatedBlocks --= batchTimes
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
index 551d376fbc1e7..47c4b5ac0e12d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -223,7 +223,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
    * Clean up the data and metadata of blocks and batches that are strictly
    * older than the threshold time. Note that this does not
    */
-  def cleanupOldBlocksAndBatches(cleanupThreshTime: Time) {
+  def cleanupOldBlocksAndBatches(cleanupThreshTime: Time): Unit = {
     // Clean up old block and batch metadata
     receivedBlockTracker.cleanupOldBatches(cleanupThreshTime, waitForCompletion = false)
 
@@ -309,7 +309,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
   }
 
   /** Deregister a receiver */
-  private def deregisterReceiver(streamId: Int, message: String, error: String) {
+  private def deregisterReceiver(streamId: Int, message: String, error: String): Unit = {
     val lastErrorTime =
       if (error == null || error == "") -1 else ssc.scheduler.clock.getTimeMillis()
     val errorInfo = ReceiverErrorInfo(
@@ -345,7 +345,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
   }
 
   /** Report error sent by a receiver */
-  private def reportError(streamId: Int, message: String, error: String) {
+  private def reportError(streamId: Int, message: String, error: String): Unit = {
     val newReceiverTrackingInfo = receiverTrackingInfos.get(streamId) match {
       case Some(oldInfo) =>
         val errorInfo = ReceiverErrorInfo(lastErrorMessage = message, lastError = error,
@@ -653,7 +653,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
     }
 
     /** Send stop signal to the receivers. */
-    private def stopReceivers() {
+    private def stopReceivers(): Unit = {
       receiverTrackingInfos.values.flatMap(_.endpoint).foreach { _.send(StopReceiver) }
       logInfo("Sent stop signal to all " + receiverTrackingInfos.size + " receivers")
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
index b57f9b772f8c6..cc961bb268c9d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala
@@ -70,33 +70,33 @@ case class StreamingListenerReceiverStopped(receiverInfo: ReceiverInfo)
 trait StreamingListener {
 
   /** Called when the streaming has been started */
-  def onStreamingStarted(streamingStarted: StreamingListenerStreamingStarted) { }
+  def onStreamingStarted(streamingStarted: StreamingListenerStreamingStarted): Unit = { }
 
   /** Called when a receiver has been started */
-  def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted) { }
+  def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted): Unit = { }
 
   /** Called when a receiver has reported an error */
-  def onReceiverError(receiverError: StreamingListenerReceiverError) { }
+  def onReceiverError(receiverError: StreamingListenerReceiverError): Unit = { }
 
   /** Called when a receiver has been stopped */
-  def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped) { }
+  def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped): Unit = { }
 
   /** Called when a batch of jobs has been submitted for processing. */
-  def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted) { }
+  def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted): Unit = { }
 
   /** Called when processing of a batch of jobs has started.  */
-  def onBatchStarted(batchStarted: StreamingListenerBatchStarted) { }
+  def onBatchStarted(batchStarted: StreamingListenerBatchStarted): Unit = { }
 
   /** Called when processing of a batch of jobs has completed. */
-  def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) { }
+  def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = { }
 
   /** Called when processing of a job of a batch has started. */
   def onOutputOperationStarted(
-      outputOperationStarted: StreamingListenerOutputOperationStarted) { }
+      outputOperationStarted: StreamingListenerOutputOperationStarted): Unit = { }
 
   /** Called when processing of a job of a batch has completed. */
   def onOutputOperationCompleted(
-      outputOperationCompleted: StreamingListenerOutputOperationCompleted) { }
+      outputOperationCompleted: StreamingListenerOutputOperationCompleted): Unit = { }
 }
 
 
@@ -110,18 +110,18 @@ class StatsReportListener(numBatchInfos: Int = 10) extends StreamingListener {
   // Queue containing latest completed batches
   val batchInfos = new Queue[BatchInfo]()
 
-  override def onBatchCompleted(batchStarted: StreamingListenerBatchCompleted) {
+  override def onBatchCompleted(batchStarted: StreamingListenerBatchCompleted): Unit = {
     batchInfos.enqueue(batchStarted.batchInfo)
     if (batchInfos.size > numBatchInfos) batchInfos.dequeue()
     printStats()
   }
 
-  def printStats() {
+  def printStats(): Unit = {
     showMillisDistribution("Total delay: ", _.totalDelay)
     showMillisDistribution("Processing time: ", _.processingDelay)
   }
 
-  def showMillisDistribution(heading: String, getMetric: BatchInfo => Option[Long]) {
+  def showMillisDistribution(heading: String, getMetric: BatchInfo => Option[Long]): Unit = {
     org.apache.spark.scheduler.StatsReportListener.showMillisDistribution(
       heading, extractDistribution(getMetric))
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
index 6a70bf7406b3c..8a10a62f0180b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListenerBus.scala
@@ -33,7 +33,7 @@ private[streaming] class StreamingListenerBus(sparkListenerBus: LiveListenerBus)
    * Post a StreamingListenerEvent to the Spark listener bus asynchronously. This event will be
    * dispatched to all StreamingListeners in the thread of the Spark listener bus.
    */
-  def post(event: StreamingListenerEvent) {
+  def post(event: StreamingListenerEvent): Unit = {
     sparkListenerBus.post(new WrappedStreamingListenerEvent(event))
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
index ed4c1e484efd2..de73762beb860 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
@@ -68,23 +68,23 @@ private[spark] class StreamingJobProgressListener(ssc: StreamingContext)
 
   val batchDuration = ssc.graph.batchDuration.milliseconds
 
-  override def onStreamingStarted(streamingStarted: StreamingListenerStreamingStarted) {
+  override def onStreamingStarted(streamingStarted: StreamingListenerStreamingStarted): Unit = {
     _startTime = streamingStarted.time
   }
 
-  override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted) {
+  override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted): Unit = {
     synchronized {
       receiverInfos(receiverStarted.receiverInfo.streamId) = receiverStarted.receiverInfo
     }
   }
 
-  override def onReceiverError(receiverError: StreamingListenerReceiverError) {
+  override def onReceiverError(receiverError: StreamingListenerReceiverError): Unit = {
     synchronized {
       receiverInfos(receiverError.receiverInfo.streamId) = receiverError.receiverInfo
     }
   }
 
-  override def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped) {
+  override def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped): Unit = {
     synchronized {
       receiverInfos(receiverStopped.receiverInfo.streamId) = receiverStopped.receiverInfo
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
index 13357db728701..3ecc448e46c27 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala
@@ -36,12 +36,12 @@ private[spark] class StreamingTab(val ssc: StreamingContext, sparkUI: SparkUI)
   attachPage(new StreamingPage(this))
   attachPage(new BatchPage(this))
 
-  def attach() {
+  def attach(): Unit = {
     parent.attachTab(this)
     parent.addStaticHandler(STATIC_RESOURCE_DIR, "/static/streaming")
   }
 
-  def detach() {
+  def detach(): Unit = {
     parent.detachTab(this)
     parent.detachHandler("/static/streaming")
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
index 21f3bbe40bfab..d33f83c819086 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
@@ -24,6 +24,7 @@ import java.util.concurrent.RejectedExecutionException
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.parallel.ExecutionContextTaskSupport
+import scala.collection.parallel.immutable.ParVector
 import scala.concurrent.{Await, ExecutionContext, Future}
 
 import org.apache.hadoop.conf.Configuration
@@ -313,7 +314,7 @@ private[streaming] object FileBasedWriteAheadLog {
     val groupSize = taskSupport.parallelismLevel.max(8)
 
     source.grouped(groupSize).flatMap { group =>
-      val parallelCollection = group.par
+      val parallelCollection = new ParVector(group.toVector)
       parallelCollection.tasksupport = taskSupport
       parallelCollection.map(handler)
     }.flatten
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogRandomReader.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogRandomReader.scala
index 56d4977da0b51..7af018f6d7561 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogRandomReader.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogRandomReader.scala
@@ -48,7 +48,7 @@ private[streaming] class FileBasedWriteAheadLogRandomReader(path: String, conf:
     instream.close()
   }
 
-  private def assertOpen() {
+  private def assertOpen(): Unit = {
     HdfsUtils.checkState(!closed, "Stream is closed. Create a new Reader to read from the file.")
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala
index 1f5c1d4369b53..40d8865b146db 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala
@@ -53,13 +53,13 @@ private[streaming] class FileBasedWriteAheadLogWriter(path: String, hadoopConf:
     stream.close()
   }
 
-  private def flush() {
+  private def flush(): Unit = {
     stream.hflush()
     // Useful for local file system where hflush/sync does not work (HADOOP-7844)
     stream.getWrappedStream.flush()
   }
 
-  private def assertOpen() {
+  private def assertOpen(): Unit = {
     HdfsUtils.checkState(!closed, "Stream is closed. Create a new Writer to write to file.")
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
index 8cb68b2be4ecf..146577214de17 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/HdfsUtils.scala
@@ -40,7 +40,7 @@ private[streaming] object HdfsUtils {
         }
       } else {
         // we dont' want to use hdfs erasure coding, as that lacks support for append and hflush
-        SparkHadoopUtil.createNonECFile(dfs, dfsPath)
+        SparkHadoopUtil.createFile(dfs, dfsPath, false)
       }
     }
     stream
@@ -62,7 +62,7 @@ private[streaming] object HdfsUtils {
     }
   }
 
-  def checkState(state: Boolean, errorMsg: => String) {
+  def checkState(state: Boolean, errorMsg: => String): Unit = {
     if (!state) {
       throw new IllegalStateException(errorMsg)
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
index 342f20f47a39e..af1f19e9cd98b 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala
@@ -36,17 +36,17 @@ class RateLimitedOutputStream(out: OutputStream, desiredBytesPerSec: Int)
   private var lastSyncTime = System.nanoTime
   private var bytesWrittenSinceSync = 0L
 
-  override def write(b: Int) {
+  override def write(b: Int): Unit = {
     waitToWrite(1)
     out.write(b)
   }
 
-  override def write(bytes: Array[Byte]) {
+  override def write(bytes: Array[Byte]): Unit = {
     write(bytes, 0, bytes.length)
   }
 
   @tailrec
-  override final def write(bytes: Array[Byte], offset: Int, length: Int) {
+  override final def write(bytes: Array[Byte], offset: Int, length: Int): Unit = {
     val writeSize = math.min(length - offset, CHUNK_SIZE)
     if (writeSize > 0) {
       waitToWrite(writeSize)
@@ -55,16 +55,16 @@ class RateLimitedOutputStream(out: OutputStream, desiredBytesPerSec: Int)
     }
   }
 
-  override def flush() {
+  override def flush(): Unit = {
     out.flush()
   }
 
-  override def close() {
+  override def close(): Unit = {
     out.close()
   }
 
   @tailrec
-  private def waitToWrite(numBytes: Int) {
+  private def waitToWrite(numBytes: Int): Unit = {
     val now = System.nanoTime
     val elapsedNanosecs = math.max(now - lastSyncTime, 1)
     val rate = bytesWrittenSinceSync.toDouble * 1000000000 / elapsedNanosecs
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
index eb9996ece3779..9cdfdb8374322 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
@@ -96,7 +96,7 @@ object RawTextHelper {
    * Warms up the SparkContext in master and slave by running tasks to force JIT kick in
    * before real workload starts.
    */
-  def warmUp(sc: SparkContext) {
+  def warmUp(sc: SparkContext): Unit = {
     for (i <- 0 to 1) {
       sc.parallelize(1 to 200000, 1000)
         .map(_ % 1331).map(_.toString)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
index 9667af97f03bc..5d4fcf8bd1596 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.IntParam
  */
 private[streaming]
 object RawTextSender extends Logging {
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     if (args.length != 4) {
       // scalastyle:off println
       System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
index 62e681e3e9646..3ffb2c12fb2dc 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
@@ -26,7 +26,7 @@ class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name:
 
   private val thread = new Thread("RecurringTimer - " + name) {
     setDaemon(true)
-    override def run() { loop }
+    override def run(): Unit = { loop }
   }
 
   @volatile private var prevTime = -1L
@@ -100,7 +100,7 @@ class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name:
   /**
    * Repeatedly call the callback every interval.
    */
-  private def loop() {
+  private def loop(): Unit = {
     try {
       while (!stopped) {
         triggerActionForNextInterval()
@@ -115,11 +115,11 @@ class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name:
 private[streaming]
 object RecurringTimer extends Logging {
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     var lastRecurTime = 0L
     val period = 1000
 
-    def onRecur(time: Long) {
+    def onRecur(time: Long): Unit = {
       val currentTime = System.currentTimeMillis()
       logInfo("" + currentTime + ": " + (currentTime - lastRecurTime))
       lastRecurTime = currentTime
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index 287a43ac689ed..742eae50e159b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -732,7 +732,7 @@ class BasicOperationsSuite extends TestSuiteBase {
         val blockRdds = new mutable.HashMap[Time, BlockRDD[_]]
         val persistentRddIds = new mutable.HashMap[Time, Int]
 
-        def collectRddInfo() { // get all RDD info required for verification
+        def collectRddInfo(): Unit = { // get all RDD info required for verification
           networkStream.generatedRDDs.foreach { case (time, rdd) =>
             blockRdds(time) = rdd.asInstanceOf[BlockRDD[_]]
           }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index ff5e3ffa35d79..25c0796585366 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -39,8 +39,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.dstream._
 import org.apache.spark.streaming.scheduler._
-import org.apache.spark.util.{Clock, ManualClock, MutableURLClassLoader, ResetSystemProperties,
-  Utils}
+import org.apache.spark.util.{Clock, ManualClock, MutableURLClassLoader, ResetSystemProperties, Utils}
 
 /**
  * A input stream that records the times of restore() invoked
@@ -55,7 +54,7 @@ class CheckpointInputDStream(_ssc: StreamingContext) extends InputDStream[Int](_
   class FileInputDStreamCheckpointData extends DStreamCheckpointData(this) {
     @transient
     var restoredTimes = 0
-    override def restore() {
+    override def restore(): Unit = {
       restoredTimes += 1
       super.restore()
     }
@@ -85,7 +84,7 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
       numBatchesBeforeRestart: Int,
       batchDuration: Duration = Milliseconds(500),
       stopSparkContextAfterTest: Boolean = true
-    ) {
+    ): Unit = {
     require(numBatchesBeforeRestart < expectedOutput.size,
       "Number of batches before context restart less than number of expected output " +
         "(i.e. number of total batches to run)")
@@ -206,24 +205,21 @@ trait DStreamCheckpointTester { self: SparkFunSuite =>
  * the checkpointing of a DStream's RDDs as well as the checkpointing of
  * the whole DStream graph.
  */
-class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
+class CheckpointSuite extends TestSuiteBase with LocalStreamingContext with DStreamCheckpointTester
   with ResetSystemProperties {
 
-  var ssc: StreamingContext = null
-
   override def batchDuration: Duration = Milliseconds(500)
 
-  override def beforeFunction() {
-    super.beforeFunction()
+  override def beforeEach(): Unit = {
+    super.beforeEach()
     Utils.deleteRecursively(new File(checkpointDir))
   }
 
-  override def afterFunction() {
+  override def afterEach(): Unit = {
     try {
-      if (ssc != null) { ssc.stop() }
       Utils.deleteRecursively(new File(checkpointDir))
     } finally {
-      super.afterFunction()
+      super.afterEach()
     }
   }
 
@@ -241,8 +237,8 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
     val stateStreamCheckpointInterval = Seconds(1)
     val fs = FileSystem.getLocal(new Configuration())
     // this ensure checkpointing occurs at least once
-    val firstNumBatches = (stateStreamCheckpointInterval / batchDuration).toLong * 2
-    val secondNumBatches = firstNumBatches
+    val firstNumBatches = (stateStreamCheckpointInterval / batchDuration).toLong
+    val secondNumBatches = firstNumBatches * 2
 
     // Setup the streams
     val input = (1 to 10).map(_ => Seq("a")).toSeq
@@ -255,17 +251,28 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
       .checkpoint(stateStreamCheckpointInterval)
       .map(t => (t._1, t._2))
     }
-    var ssc = setupStreams(input, operation)
+    ssc = setupStreams(input, operation)
     var stateStream = ssc.graph.getOutputStreams().head.dependencies.head.dependencies.head
 
+    def waitForCompletionOfBatch(numBatches: Long): Unit = {
+      eventually(timeout(10.seconds), interval(50.millis)) {
+        val lastProcessed = ssc.scheduler.jobGenerator.lastProcessedBatch
+        assert(lastProcessed != null &&
+          lastProcessed >= Time(batchDuration.milliseconds * numBatches))
+      }
+    }
+
     // Run till a time such that at least one RDD in the stream should have been checkpointed,
     // then check whether some RDD has been checkpointed or not
     ssc.start()
     advanceTimeWithRealDelay(ssc, firstNumBatches)
+    waitForCompletionOfBatch(firstNumBatches)
+
     logInfo("Checkpoint data of state stream = \n" + stateStream.checkpointData)
-    assert(!stateStream.checkpointData.currentCheckpointFiles.isEmpty,
+    var currCheckpointFiles = stateStream.checkpointData.currentCheckpointFiles
+    assert(!currCheckpointFiles.isEmpty,
       "No checkpointed RDDs in state stream before first failure")
-    stateStream.checkpointData.currentCheckpointFiles.foreach {
+    currCheckpointFiles.foreach {
       case (time, file) =>
         assert(fs.exists(new Path(file)), "Checkpoint file '" + file +"' for time " + time +
             " for state stream before first failure does not exist")
@@ -273,8 +280,10 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
 
     // Run till a further time such that previous checkpoint files in the stream would be deleted
     // and check whether the earlier checkpoint files are deleted
-    val checkpointFiles = stateStream.checkpointData.currentCheckpointFiles.map(x => new File(x._2))
-    advanceTimeWithRealDelay(ssc, secondNumBatches)
+    currCheckpointFiles = stateStream.checkpointData.currentCheckpointFiles
+    val checkpointFiles = currCheckpointFiles.map(x => new File(x._2))
+    advanceTimeWithRealDelay(ssc, secondNumBatches - firstNumBatches)
+    waitForCompletionOfBatch(secondNumBatches)
     checkpointFiles.foreach(file =>
       assert(!file.exists, "Checkpoint file '" + file + "' was not deleted"))
     ssc.stop()
@@ -287,14 +296,15 @@ class CheckpointSuite extends TestSuiteBase with DStreamCheckpointTester
     assert(!stateStream.generatedRDDs.isEmpty,
       "No restored RDDs in state stream after recovery from first failure")
 
-
     // Run one batch to generate a new checkpoint file and check whether some RDD
     // is present in the checkpoint data or not
     ssc.start()
     advanceTimeWithRealDelay(ssc, 1)
-    assert(!stateStream.checkpointData.currentCheckpointFiles.isEmpty,
+    waitForCompletionOfBatch(secondNumBatches + 1)
+    currCheckpointFiles = stateStream.checkpointData.currentCheckpointFiles
+    assert(!currCheckpointFiles.isEmpty,
       "No checkpointed RDDs in state stream before second failure")
-    stateStream.checkpointData.currentCheckpointFiles.foreach {
+    currCheckpointFiles.foreach {
       case (time, file) =>
         assert(fs.exists(new Path(file)), "Checkpoint file '" + file +"' for time " + time +
           " for state stream before seconds failure does not exist")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
index 2ab600ab817e0..0576bf560f30e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
@@ -29,24 +29,14 @@ import org.apache.spark.util.ReturnStatementInClosureException
 /**
  * Test that closures passed to DStream operations are actually cleaned.
  */
-class DStreamClosureSuite extends SparkFunSuite with BeforeAndAfterAll {
-  private var ssc: StreamingContext = null
+class DStreamClosureSuite extends SparkFunSuite with LocalStreamingContext with BeforeAndAfterAll {
+  override protected def beforeEach(): Unit = {
+    super.beforeEach()
 
-  override def beforeAll(): Unit = {
-    super.beforeAll()
     val sc = new SparkContext("local", "test")
     ssc = new StreamingContext(sc, Seconds(1))
   }
 
-  override def afterAll(): Unit = {
-    try {
-      ssc.stop(stopSparkContext = true)
-      ssc = null
-    } finally {
-      super.afterAll()
-    }
-  }
-
   test("user provided closures are actually cleaned") {
     val dstream = new DummyInputDStream(ssc)
     val pairDstream = dstream.map { i => (i, i) }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
index 94f1bcebc3a39..1bb41164d362e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamScopeSuite.scala
@@ -30,28 +30,29 @@ import org.apache.spark.util.ManualClock
 /**
  * Tests whether scope information is passed from DStream operations to RDDs correctly.
  */
-class DStreamScopeSuite extends SparkFunSuite with BeforeAndAfter with BeforeAndAfterAll {
-  private var ssc: StreamingContext = null
-  private val batchDuration: Duration = Seconds(1)
+class DStreamScopeSuite
+  extends SparkFunSuite
+  with LocalStreamingContext {
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
 
-  override def beforeAll(): Unit = {
-    super.beforeAll()
     val conf = new SparkConf().setMaster("local").setAppName("test")
     conf.set("spark.streaming.clock", classOf[ManualClock].getName())
+    val batchDuration: Duration = Seconds(1)
     ssc = new StreamingContext(new SparkContext(conf), batchDuration)
+
+    assertPropertiesNotSet()
   }
 
-  override def afterAll(): Unit = {
+  override def afterEach(): Unit = {
     try {
-      ssc.stop(stopSparkContext = true)
+      assertPropertiesNotSet()
     } finally {
-      super.afterAll()
+      super.afterEach()
     }
   }
 
-  before { assertPropertiesNotSet() }
-  after { assertPropertiesNotSet() }
-
   test("dstream without scope") {
     val dummyStream = new DummyDStream(ssc)
     dummyStream.initialize(Time(0))
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 0792770442055..53ef840864bce 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -449,9 +449,9 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
 
       class TestInputDStream extends InputDStream[String](ssc) {
-        def start() {}
+        def start(): Unit = {}
 
-        def stop() {}
+        def stop(): Unit = {}
 
         def compute(validTime: Time): Option[RDD[String]] = None
       }
@@ -473,7 +473,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
     }
   }
 
-  def testFileStream(newFilesOnly: Boolean) {
+  def testFileStream(newFilesOnly: Boolean): Unit = {
     withTempDir { testDir =>
       val batchDuration = Seconds(2)
       // Create a file that exists before the StreamingContext is created:
@@ -537,7 +537,7 @@ class TestServer(portToBind: Int = 0) extends Logging with Assertions {
   private val startLatch = new CountDownLatch(1)
 
   val servingThread = new Thread() {
-    override def run() {
+    override def run(): Unit = {
       try {
         while (true) {
           logInfo("Accepting connections on port " + port)
@@ -608,9 +608,9 @@ class TestServer(portToBind: Int = 0) extends Logging with Assertions {
     }
   }
 
-  def send(msg: String) { queue.put(msg) }
+  def send(msg: String): Unit = { queue.put(msg) }
 
-  def stop() { servingThread.interrupt() }
+  def stop(): Unit = { servingThread.interrupt() }
 
   def port: Int = serverSocket.getLocalPort
 }
@@ -621,10 +621,10 @@ class MultiThreadTestReceiver(numThreads: Int, numRecordsPerThread: Int)
   lazy val executorPool = Executors.newFixedThreadPool(numThreads)
   lazy val finishCount = new AtomicInteger(0)
 
-  def onStart() {
+  def onStart(): Unit = {
     (1 to numThreads).map(threadId => {
       val runnable = new Runnable {
-        def run() {
+        def run(): Unit = {
           (1 to numRecordsPerThread).foreach(i =>
             store(threadId * numRecordsPerThread + i) )
           if (finishCount.incrementAndGet == numThreads) {
@@ -637,7 +637,7 @@ class MultiThreadTestReceiver(numThreads: Int, numRecordsPerThread: Int)
     })
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     executorPool.shutdown()
   }
 }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/LocalStreamingContext.scala b/streaming/src/test/scala/org/apache/spark/streaming/LocalStreamingContext.scala
new file mode 100644
index 0000000000000..5bf24a9705dc9
--- /dev/null
+++ b/streaming/src/test/scala/org/apache/spark/streaming/LocalStreamingContext.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming
+
+import org.scalatest.{BeforeAndAfterEach, Suite}
+
+import org.apache.spark.SparkContext
+import org.apache.spark.internal.Logging
+
+/**
+ * Manages a local `ssc` `StreamingContext` variable, correctly stopping it after each test.
+ * Note that it also stops active SparkContext if `stopSparkContext` is set to true (default).
+ * In most cases you may want to leave it, to isolate environment for SparkContext in each test.
+ */
+trait LocalStreamingContext extends BeforeAndAfterEach { self: Suite =>
+
+  @transient var ssc: StreamingContext = _
+  @transient var stopSparkContext: Boolean = true
+
+  override def afterEach(): Unit = {
+    try {
+      resetStreamingContext()
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  def resetStreamingContext(): Unit = {
+    LocalStreamingContext.stop(ssc, stopSparkContext)
+    ssc = null
+  }
+}
+
+object LocalStreamingContext extends Logging {
+  def stop(ssc: StreamingContext, stopSparkContext: Boolean): Unit = {
+    try {
+      if (ssc != null) {
+        ssc.stop(stopSparkContext = stopSparkContext)
+      }
+    } finally {
+      if (stopSparkContext) {
+        ensureNoActiveSparkContext()
+      }
+    }
+  }
+
+  /**
+   * Clean up active SparkContext: try to stop first if there's an active SparkContext.
+   * If it fails to stop, log warning message and clear active SparkContext to avoid
+   * interfere between tests.
+   */
+  def ensureNoActiveSparkContext(): Unit = {
+    // if SparkContext is still active, try to clean up
+    SparkContext.getActive match {
+      case Some(sc) =>
+        try {
+          sc.stop()
+        } catch {
+          case e: Throwable =>
+            logError("Exception trying to stop SparkContext, clear active SparkContext...", e)
+            SparkContext.clearActiveContext()
+            throw e
+        }
+      case _ =>
+    }
+  }
+
+}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
index 06c0c2aa97ee1..f4b1e3ecbd928 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
@@ -23,46 +23,36 @@ import java.util.concurrent.ConcurrentLinkedQueue
 import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
 import org.scalatest.PrivateMethodTester._
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.streaming.dstream.{DStream, InternalMapWithStateDStream, MapWithStateDStream, MapWithStateDStreamImpl}
 import org.apache.spark.util.{ManualClock, Utils}
 
-class MapWithStateSuite extends SparkFunSuite
-  with DStreamCheckpointTester with BeforeAndAfterAll with BeforeAndAfter {
+class MapWithStateSuite extends SparkFunSuite with LocalStreamingContext
+  with DStreamCheckpointTester {
 
   private var sc: SparkContext = null
   protected var checkpointDir: File = null
   protected val batchDuration = Seconds(1)
 
-  before {
-    StreamingContext.getActive().foreach { _.stop(stopSparkContext = false) }
-    checkpointDir = Utils.createTempDir(namePrefix = "checkpoint")
-  }
+  override def beforeEach(): Unit = {
+    super.beforeEach()
 
-  after {
-    StreamingContext.getActive().foreach { _.stop(stopSparkContext = false) }
-    if (checkpointDir != null) {
-      Utils.deleteRecursively(checkpointDir)
-    }
-  }
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
     val conf = new SparkConf().setMaster("local").setAppName("MapWithStateSuite")
     conf.set("spark.streaming.clock", classOf[ManualClock].getName())
     sc = new SparkContext(conf)
+
+    checkpointDir = Utils.createTempDir(namePrefix = "checkpoint")
   }
 
-  override def afterAll(): Unit = {
+  override def afterEach(): Unit = {
     try {
-      if (sc != null) {
-        sc.stop()
+      if (checkpointDir != null) {
+        Utils.deleteRecursively(checkpointDir)
       }
     } finally {
-      super.afterAll()
+      super.afterEach()
     }
   }
 
@@ -571,7 +561,7 @@ class MapWithStateSuite extends SparkFunSuite
     (collectedOutputs.asScala.toSeq, collectedStateSnapshots.asScala.toSeq)
   }
 
-  private def assert[U](expected: Seq[Seq[U]], collected: Seq[Seq[U]], typ: String) {
+  private def assert[U](expected: Seq[Seq[U]], collected: Seq[Seq[U]], typ: String): Unit = {
     val debugString = "\nExpected:\n" + expected.mkString("\n") +
       "\nCollected:\n" + collected.mkString("\n")
     assert(expected.size === collected.size,
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
index cf8dd10571f47..2615e7c3bc509 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
@@ -42,7 +42,7 @@ object MasterFailureTest extends Logging {
   @volatile var killCount = 0
   @volatile var setupCalled = false
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     // scalastyle:off println
     if (args.size < 2) {
       println(
@@ -64,7 +64,7 @@ object MasterFailureTest extends Logging {
     // scalastyle:on println
   }
 
-  def testMap(directory: String, numBatches: Int, batchDuration: Duration) {
+  def testMap(directory: String, numBatches: Int, batchDuration: Duration): Unit = {
     // Input: time=1 ==> [ 1 ] , time=2 ==> [ 2 ] , time=3 ==> [ 3 ] , ...
     val input = (1 to numBatches).map(_.toString).toSeq
     // Expected output: time=1 ==> [ 1 ] , time=2 ==> [ 2 ] , time=3 ==> [ 3 ] , ...
@@ -86,7 +86,7 @@ object MasterFailureTest extends Logging {
   }
 
 
-  def testUpdateStateByKey(directory: String, numBatches: Int, batchDuration: Duration) {
+  def testUpdateStateByKey(directory: String, numBatches: Int, batchDuration: Duration): Unit = {
     // Input: time=1 ==> [ a ] , time=2 ==> [ a, a ] , time=3 ==> [ a, a, a ] , ...
     val input = (1 to numBatches).map(i => (1 to i).map(_ => "a").mkString(" ")).toSeq
     // Expected output: time=1 ==> [ (a, 1) ] , time=2 ==> [ (a, 3) ] , time=3 ==> [ (a,6) ] , ...
@@ -293,7 +293,7 @@ object MasterFailureTest extends Logging {
    * duplicate batch outputs of values from the `output`. As a result, the
    * expected output should not have consecutive batches with the same values as output.
    */
-  private def verifyOutput[T: ClassTag](output: Seq[T], expectedOutput: Seq[T]) {
+  private def verifyOutput[T: ClassTag](output: Seq[T], expectedOutput: Seq[T]): Unit = {
     // Verify whether expected outputs do not consecutive batches with same output
     for (i <- 0 until expectedOutput.size - 1) {
       assert(expectedOutput(i) != expectedOutput(i + 1),
@@ -315,7 +315,7 @@ object MasterFailureTest extends Logging {
   }
 
   /** Resets counter to prepare for the test */
-  private def reset() {
+  private def reset(): Unit = {
     killed = false
     killCount = 0
     setupCalled = false
@@ -328,7 +328,7 @@ object MasterFailureTest extends Logging {
 private[streaming]
 class KillingThread(ssc: StreamingContext, maxKillWaitTime: Long) extends Thread with Logging {
 
-  override def run() {
+  override def run(): Unit = {
     try {
       // If it is the first killing, then allow the first checkpoint to be created
       var minKillWaitTime = if (MasterFailureTest.killCount == 0) 5000 else 2000
@@ -362,7 +362,7 @@ private[streaming]
 class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
   extends Thread with Logging {
 
-  override def run() {
+  override def run(): Unit = {
     val localTestDir = Utils.createTempDir()
     var fs = testDir.getFileSystem(new Configuration())
     val maxTries = 3
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index c8f424af9af01..286095e4ee0d7 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -242,7 +242,8 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
     }
   }
 
-  private def testCountWithBlockManagerBasedBlockHandler(isBlockManagerBasedBlockHandler: Boolean) {
+  private def testCountWithBlockManagerBasedBlockHandler(
+      isBlockManagerBasedBlockHandler: Boolean): Unit = {
     // ByteBufferBlock-MEMORY_ONLY
     testRecordcount(isBlockManagerBasedBlockHandler, StorageLevel.MEMORY_ONLY,
       ByteBufferBlock(ByteBuffer.wrap(Array.tabulate(100)(i => i.toByte))), blockManager, None)
@@ -298,7 +299,7 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
       receivedBlock: ReceivedBlock,
       bManager: BlockManager,
       expectedNumRecords: Option[Long]
-      ) {
+      ): Unit = {
     blockManager = bManager
     storageLevel = sLevel
     var bId: StreamBlockId = null
@@ -335,10 +336,11 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
    * using the given verification function
    */
   private def testBlockStoring(receivedBlockHandler: ReceivedBlockHandler)
-      (verifyFunc: (Seq[String], Seq[StreamBlockId], Seq[ReceivedBlockStoreResult]) => Unit) {
+      (verifyFunc: (Seq[String], Seq[StreamBlockId], Seq[ReceivedBlockStoreResult]) => Unit)
+      : Unit = {
     val data = Seq.tabulate(100) { _.toString }
 
-    def storeAndVerify(blocks: Seq[ReceivedBlock]) {
+    def storeAndVerify(blocks: Seq[ReceivedBlock]): Unit = {
       blocks should not be empty
       val (blockIds, storeResults) = storeBlocks(receivedBlockHandler, blocks)
       withClue(s"Testing with ${blocks.head.getClass.getSimpleName}s:") {
@@ -361,7 +363,7 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
   }
 
   /** Test error handling when blocks that cannot be stored */
-  private def testErrorHandling(receivedBlockHandler: ReceivedBlockHandler) {
+  private def testErrorHandling(receivedBlockHandler: ReceivedBlockHandler): Unit = {
     // Handle error in iterator (e.g. divide-by-zero error)
     intercept[Exception] {
       val iterator = (10 to (-10, -1)).toIterator.map { _ / 0 }
@@ -376,12 +378,14 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean)
   }
 
   /** Instantiate a BlockManagerBasedBlockHandler and run a code with it */
-  private def withBlockManagerBasedBlockHandler(body: BlockManagerBasedBlockHandler => Unit) {
+  private def withBlockManagerBasedBlockHandler(
+      body: BlockManagerBasedBlockHandler => Unit): Unit = {
     body(new BlockManagerBasedBlockHandler(blockManager, storageLevel))
   }
 
   /** Instantiate a WriteAheadLogBasedBlockHandler and run a code with it */
-  private def withWriteAheadLogBasedBlockHandler(body: WriteAheadLogBasedBlockHandler => Unit) {
+  private def withWriteAheadLogBasedBlockHandler(
+      body: WriteAheadLogBasedBlockHandler => Unit): Unit = {
     require(WriteAheadLogUtils.getRollingIntervalSecs(conf, isDriver = false) === 1)
     val receivedBlockHandler = new WriteAheadLogBasedBlockHandler(blockManager, serializerManager,
       1, storageLevel, conf, hadoopConf, tempDirectory.toString, manualClock)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
index 0b15f00eba499..368411cc2214b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
@@ -184,7 +184,7 @@ class ReceivedBlockTrackerSuite
     // Set the time increment level to twice the rotation interval so that every increment creates
     // a new log file
 
-    def incrementTime() {
+    def incrementTime(): Unit = {
       val timeIncrementMillis = 2000L
       manualClock.advance(timeIncrementMillis)
     }
@@ -197,7 +197,7 @@ class ReceivedBlockTrackerSuite
     }
 
     // Print the data present in the log ahead files in the log directory
-    def printLogFiles(message: String) {
+    def printLogFiles(message: String): Unit = {
       val fileContents = getWriteAheadLogFiles().map { file =>
         (s"\n>>>>> $file: <<<<<\n${getWrittenLogData(file).mkString("\n")}")
       }.mkString("\n")
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
index 0349e11224cfc..5e2ce25c7c441 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.streaming
 
 import scala.util.Random
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.rdd.BlockRDD
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
@@ -30,15 +28,9 @@ import org.apache.spark.streaming.receiver.{BlockManagerBasedStoreResult, Receiv
 import org.apache.spark.streaming.scheduler.ReceivedBlockInfo
 import org.apache.spark.streaming.util.{WriteAheadLogRecordHandle, WriteAheadLogUtils}
 
-class ReceiverInputDStreamSuite extends TestSuiteBase with BeforeAndAfterAll {
-
-  override def afterAll(): Unit = {
-    try {
-      StreamingContext.getActive().foreach(_.stop())
-    } finally {
-      super.afterAll()
-    }
-  }
+class ReceiverInputDStreamSuite
+  extends TestSuiteBase
+  with LocalStreamingContext {
 
   testWithoutWAL("createBlockRDD creates empty BlockRDD when no block info") { receiverStream =>
     val rdd = receiverStream.createBlockRDD(Time(0), Seq.empty)
@@ -127,7 +119,7 @@ class ReceiverInputDStreamSuite extends TestSuiteBase with BeforeAndAfterAll {
     conf.setMaster("local[4]").setAppName("ReceiverInputDStreamSuite")
     conf.set(WriteAheadLogUtils.RECEIVER_WAL_ENABLE_CONF_KEY, enableWAL.toString)
     require(WriteAheadLogUtils.enableReceiverLog(conf) === enableWAL)
-    val ssc = new StreamingContext(conf, Seconds(1))
+    ssc = new StreamingContext(conf, Seconds(1))
     val receiverStream = new ReceiverInputDStream[Int](ssc) {
       override def getReceiver(): Receiver[Int] = null
     }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
index 6b664b7a7dfd4..b07fd733953db 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
@@ -52,7 +52,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
 
     // Thread that runs the executor
     val executingThread = new Thread() {
-      override def run() {
+      override def run(): Unit = {
         executor.start()
         executorStarted.release(1)
         executor.awaitTermination()
@@ -73,7 +73,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     executorStarted.acquire()
 
     // Verify that receiver was started
-    assert(receiver.onStartCalled)
+    assert(receiver.callsRecorder.calls === Seq("onStart"))
     assert(executor.isReceiverStarted)
     assert(receiver.isStarted)
     assert(!receiver.isStopped())
@@ -106,19 +106,22 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     assert(executor.errors.head.eq(exception))
 
     // Verify restarting actually stops and starts the receiver
-    receiver.restart("restarting", null, 600)
-    eventually(timeout(300.milliseconds), interval(10.milliseconds)) {
-      // receiver will be stopped async
-      assert(receiver.isStopped)
-      assert(receiver.onStopCalled)
-    }
-    eventually(timeout(1.second), interval(10.milliseconds)) {
-      // receiver will be started async
-      assert(receiver.onStartCalled)
-      assert(executor.isReceiverStarted)
+    executor.callsRecorder.reset()
+    receiver.callsRecorder.reset()
+    receiver.restart("restarting", null, 100)
+    eventually(timeout(10.seconds), interval(10.milliseconds)) {
+      // below verification ensures for now receiver is already restarted
       assert(receiver.isStarted)
       assert(!receiver.isStopped)
       assert(receiver.receiving)
+
+      // both receiver supervisor and receiver should be stopped first, and started
+      assert(executor.callsRecorder.calls === Seq("onReceiverStop", "onReceiverStart"))
+      assert(receiver.callsRecorder.calls === Seq("onStop", "onStart"))
+
+      // check whether the delay between stop and start is respected
+      assert(executor.callsRecorder.timestamps.reverse.reduceLeft { _ - _ } >= 100)
+      assert(receiver.callsRecorder.timestamps.reverse.reduceLeft { _ - _ } >= 100)
     }
 
     // Verify that stopping actually stops the thread
@@ -229,7 +232,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
       }
     }
 
-    def printLogFiles(message: String, files: Seq[String]) {
+    def printLogFiles(message: String, files: Seq[String]): Unit = {
       logInfo(s"$message (${files.size} files):\n" + files.mkString("\n"))
     }
 
@@ -290,42 +293,53 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     val arrayBuffers = new ArrayBuffer[ArrayBuffer[_]]
     val errors = new ArrayBuffer[Throwable]
 
+    // tracks calls of "onReceiverStart", "onReceiverStop"
+    val callsRecorder = new MethodsCallRecorder()
+
     /** Check if all data structures are clean */
     def isAllEmpty: Boolean = {
       singles.isEmpty && byteBuffers.isEmpty && iterators.isEmpty &&
         arrayBuffers.isEmpty && errors.isEmpty
     }
 
-    def pushSingle(data: Any) {
+    def pushSingle(data: Any): Unit = {
       singles += data
     }
 
     def pushBytes(
         bytes: ByteBuffer,
         optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]) {
+        optionalBlockId: Option[StreamBlockId]): Unit = {
       byteBuffers += bytes
     }
 
     def pushIterator(
         iterator: Iterator[_],
         optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]) {
+        optionalBlockId: Option[StreamBlockId]): Unit = {
       iterators += iterator
     }
 
     def pushArrayBuffer(
         arrayBuffer: ArrayBuffer[_],
         optionalMetadata: Option[Any],
-        optionalBlockId: Option[StreamBlockId]) {
+        optionalBlockId: Option[StreamBlockId]): Unit = {
       arrayBuffers +=  arrayBuffer
     }
 
-    def reportError(message: String, throwable: Throwable) {
+    def reportError(message: String, throwable: Throwable): Unit = {
       errors += throwable
     }
 
-    override protected def onReceiverStart(): Boolean = true
+    override protected def onReceiverStart(): Boolean = {
+      callsRecorder.record()
+      true
+    }
+
+    override protected def onReceiverStop(message: String, error: Option[Throwable]): Unit = {
+      callsRecorder.record()
+      super.onReceiverStop(message, error)
+    }
 
     override def createBlockGenerator(
         blockGeneratorListener: BlockGeneratorListener): BlockGenerator = {
@@ -341,17 +355,17 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     val arrayBuffers = new ArrayBuffer[ArrayBuffer[Int]]
     val errors = new ArrayBuffer[Throwable]
 
-    def onAddData(data: Any, metadata: Any) { }
+    def onAddData(data: Any, metadata: Any): Unit = { }
 
-    def onGenerateBlock(blockId: StreamBlockId) { }
+    def onGenerateBlock(blockId: StreamBlockId): Unit = { }
 
-    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]) {
+    def onPushBlock(blockId: StreamBlockId, arrayBuffer: ArrayBuffer[_]): Unit = {
       val bufferOfInts = arrayBuffer.map(_.asInstanceOf[Int])
       arrayBuffers += bufferOfInts
       Thread.sleep(0)
     }
 
-    def onError(message: String, throwable: Throwable) {
+    def onError(message: String, throwable: Throwable): Unit = {
       errors += throwable
     }
   }
@@ -363,36 +377,55 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
 class FakeReceiver(sendData: Boolean = false) extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
   @volatile var otherThread: Thread = null
   @volatile var receiving = false
-  @volatile var onStartCalled = false
-  @volatile var onStopCalled = false
 
-  def onStart() {
+  // tracks calls of "onStart", "onStop"
+  @transient lazy val callsRecorder = new MethodsCallRecorder()
+
+  def onStart(): Unit = {
     otherThread = new Thread() {
-      override def run() {
+      override def run(): Unit = {
         receiving = true
-        var count = 0
-        while(!isStopped()) {
-          if (sendData) {
-            store(count)
-            count += 1
+        try {
+          var count = 0
+          while(!isStopped()) {
+            if (sendData) {
+              store(count)
+              count += 1
+            }
+            Thread.sleep(10)
           }
-          Thread.sleep(10)
+        } finally {
+          receiving = false
         }
       }
     }
-    onStartCalled = true
+    callsRecorder.record()
     otherThread.start()
   }
 
-  def onStop() {
-    onStopCalled = true
+  def onStop(): Unit = {
+    callsRecorder.record()
     otherThread.join()
   }
+}
+
+class MethodsCallRecorder {
+  // tracks calling methods as (timestamp, methodName)
+  private val records = new ArrayBuffer[(Long, String)]
+
+  def record(): Unit = records.append((System.currentTimeMillis(), callerMethodName))
+
+  def reset(): Unit = records.clear()
 
-  def reset() {
-    receiving = false
-    onStartCalled = false
-    onStopCalled = false
+  def callsWithTimestamp: scala.collection.immutable.Seq[(Long, String)] = records.toList
+
+  def calls: scala.collection.immutable.Seq[String] = records.map(_._2).toList
+
+  def timestamps: scala.collection.immutable.Seq[Long] = records.map(_._1).toList
+
+  private def callerMethodName: String = {
+    val stackTrace = new Throwable().getStackTrace
+    // it should return method name of two levels deeper
+    stackTrace(2).getMethodName
   }
 }
-
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index c4424b3cff877..f0d4492c91fe4 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -26,7 +26,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.Queue
 
 import org.apache.commons.io.FileUtils
-import org.scalatest.{Assertions, BeforeAndAfter, PrivateMethodTester}
+import org.scalatest.{Assertions, PrivateMethodTester}
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.exceptions.TestFailedDueToTimeoutException
@@ -44,7 +44,11 @@ import org.apache.spark.streaming.receiver.Receiver
 import org.apache.spark.util.{ManualClock, Utils}
 
 
-class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeLimits with Logging {
+class StreamingContextSuite
+  extends SparkFunSuite
+  with LocalStreamingContext
+  with TimeLimits
+  with Logging {
 
   // Necessary to make ScalaTest 3.x interrupt a thread on the JVM like ScalaTest 2.2.x
   implicit val signaler: Signaler = ThreadSignaler
@@ -56,20 +60,6 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
   val envPair = "key" -> "value"
   val conf = new SparkConf().setMaster(master).setAppName(appName)
 
-  var sc: SparkContext = null
-  var ssc: StreamingContext = null
-
-  after {
-    if (ssc != null) {
-      ssc.stop()
-      ssc = null
-    }
-    if (sc != null) {
-      sc.stop()
-      sc = null
-    }
-  }
-
   test("from no conf constructor") {
     ssc = new StreamingContext(master, appName, batchDuration)
     assert(ssc.sparkContext.conf.get("spark.master") === master)
@@ -95,7 +85,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
   }
 
   test("from existing SparkContext") {
-    sc = new SparkContext(master, appName)
+    val sc = new SparkContext(master, appName)
     ssc = new StreamingContext(sc, batchDuration)
   }
 
@@ -272,7 +262,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
 
     // Explicitly do not stop SparkContext
     ssc = new StreamingContext(conf, batchDuration)
-    sc = ssc.sparkContext
+    var sc = ssc.sparkContext
     addInputStream(ssc).register()
     ssc.start()
     ssc.stop(stopSparkContext = false)
@@ -306,7 +296,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
   test("stop gracefully") {
     val conf = new SparkConf().setMaster(master).setAppName(appName)
     conf.set("spark.dummyTimeConfig", "3600s")
-    sc = new SparkContext(conf)
+    val sc = new SparkContext(conf)
     for (i <- 1 to 4) {
       logInfo("==================================\n\n\n")
       ssc = new StreamingContext(sc, Milliseconds(100))
@@ -338,7 +328,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     // This is not a deterministic unit. But if this unit test is flaky, then there is definitely
     // something wrong. See SPARK-5681
     val conf = new SparkConf().setMaster(master).setAppName(appName)
-    sc = new SparkContext(conf)
+    val sc = new SparkContext(conf)
     ssc = new StreamingContext(sc, Milliseconds(100))
     val input = ssc.receiverStream(new TestReceiver)
     input.foreachRDD(_ => {})
@@ -352,11 +342,10 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
   test("stop slow receiver gracefully") {
     val conf = new SparkConf().setMaster(master).setAppName(appName)
     conf.set("spark.streaming.gracefulStopTimeout", "20000s")
-    sc = new SparkContext(conf)
+    val sc = new SparkContext(conf)
     logInfo("==================================\n\n\n")
     ssc = new StreamingContext(sc, Milliseconds(100))
     var runningCount = 0
-    SlowTestReceiver.receivedAllRecords = false
     // Create test receiver that sleeps in onStop()
     val totalNumRecords = 15
     val recordsPerSecond = 1
@@ -368,6 +357,9 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     }
     ssc.start()
     ssc.awaitTerminationOrTimeout(500)
+    eventually(timeout(10.seconds), interval(10.millis)) {
+      assert(SlowTestReceiver.initialized)
+    }
     ssc.stop(stopSparkContext = false, stopGracefully = true)
     logInfo("Running count = " + runningCount)
     assert(runningCount > 0)
@@ -445,7 +437,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     // test whether wait exits if context is stopped
     failAfter(10.seconds) { // 10 seconds because spark takes a long time to shutdown
       t = new Thread() {
-        override def run() {
+        override def run(): Unit = {
           Thread.sleep(500)
           ssc.stop()
         }
@@ -512,7 +504,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     // test whether awaitTerminationOrTimeout() return true if context is stopped
     failAfter(10.seconds) { // 10 seconds because spark takes a long time to shutdown
       t = new Thread() {
-        override def run() {
+        override def run(): Unit = {
           Thread.sleep(500)
           ssc.stop()
         }
@@ -591,7 +583,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
 
     // getOrCreate should recover StreamingContext with existing SparkContext
     testGetOrCreate {
-      sc = new SparkContext(conf)
+      val sc = new SparkContext(conf)
       ssc = StreamingContext.getOrCreate(checkpointPath, () => creatingFunction())
       assert(ssc != null, "no context created")
       assert(!newContextCreated, "old context not recovered")
@@ -603,7 +595,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     require(StreamingContext.getActive().isEmpty, "context exists from before")
     var newContextCreated = false
 
-    def creatingFunc(): StreamingContext = {
+    def creatingFunc(sc: SparkContext)(): StreamingContext = {
       newContextCreated = true
       val newSsc = new StreamingContext(sc, batchDuration)
       val input = addInputStream(newSsc)
@@ -627,8 +619,8 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
     // getActiveOrCreate should create new context and getActive should return it only
     // after starting the context
     testGetActiveOrCreate {
-      sc = new SparkContext(conf)
-      ssc = StreamingContext.getActiveOrCreate(creatingFunc _)
+      val sc = new SparkContext(conf)
+      ssc = StreamingContext.getActiveOrCreate(creatingFunc(sc))
       assert(ssc != null, "no context created")
       assert(newContextCreated, "new context not created")
       assert(StreamingContext.getActive().isEmpty,
@@ -636,25 +628,25 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
       ssc.start()
       assert(StreamingContext.getActive() === Some(ssc),
         "active context not returned")
-      assert(StreamingContext.getActiveOrCreate(creatingFunc _) === ssc,
+      assert(StreamingContext.getActiveOrCreate(creatingFunc(sc)) === ssc,
         "active context not returned")
       ssc.stop()
       assert(StreamingContext.getActive().isEmpty,
         "inactive context returned")
-      assert(StreamingContext.getActiveOrCreate(creatingFunc _) !== ssc,
+      assert(StreamingContext.getActiveOrCreate(creatingFunc(sc)) !== ssc,
         "inactive context returned")
     }
 
     // getActiveOrCreate and getActive should return independently created context after activating
     testGetActiveOrCreate {
-      sc = new SparkContext(conf)
-      ssc = creatingFunc()  // Create
+      val sc = new SparkContext(conf)
+      ssc = creatingFunc(sc)  // Create
       assert(StreamingContext.getActive().isEmpty,
         "new initialized context returned before starting")
       ssc.start()
       assert(StreamingContext.getActive() === Some(ssc),
         "active context not returned")
-      assert(StreamingContext.getActiveOrCreate(creatingFunc _) === ssc,
+      assert(StreamingContext.getActiveOrCreate(creatingFunc(sc)) === ssc,
         "active context not returned")
       ssc.stop()
       assert(StreamingContext.getActive().isEmpty,
@@ -736,7 +728,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with TimeL
   }
 
   test("multiple streaming contexts") {
-    sc = new SparkContext(
+    val sc = new SparkContext(
       conf.clone.set("spark.streaming.clock", "org.apache.spark.util.ManualClock"))
     ssc = new StreamingContext(sc, Seconds(1))
     val input = addInputStream(ssc)
@@ -930,9 +922,9 @@ class TestReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) with Logging
 
   var receivingThreadOption: Option[Thread] = None
 
-  def onStart() {
+  def onStart(): Unit = {
     val thread = new Thread() {
-      override def run() {
+      override def run(): Unit = {
         logInfo("Receiving started")
         while (!isStopped) {
           store(TestReceiver.counter.getAndIncrement)
@@ -944,7 +936,7 @@ class TestReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) with Logging
     thread.start()
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     // no clean to be done, the receiving thread should stop on it own, so just wait for it.
     receivingThreadOption.foreach(_.join())
   }
@@ -959,26 +951,28 @@ class SlowTestReceiver(totalRecords: Int, recordsPerSecond: Int)
   extends Receiver[Int](StorageLevel.MEMORY_ONLY) with Logging {
 
   var receivingThreadOption: Option[Thread] = None
+  @volatile var receivedAllRecords = false
 
-  def onStart() {
+  def onStart(): Unit = {
     val thread = new Thread() {
-      override def run() {
+      override def run(): Unit = {
         logInfo("Receiving started")
         for(i <- 1 to totalRecords) {
           Thread.sleep(1000 / recordsPerSecond)
           store(i)
         }
-        SlowTestReceiver.receivedAllRecords = true
+        receivedAllRecords = true
         logInfo(s"Received all $totalRecords records")
       }
     }
     receivingThreadOption = Some(thread)
     thread.start()
+    SlowTestReceiver.initialized = true
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     // Simulate slow receiver by waiting for all records to be produced
-    while (!SlowTestReceiver.receivedAllRecords) {
+    while (!receivedAllRecords) {
       Thread.sleep(100)
     }
     // no clean to be done, the receiving thread should stop on it own
@@ -986,12 +980,12 @@ class SlowTestReceiver(totalRecords: Int, recordsPerSecond: Int)
 }
 
 object SlowTestReceiver {
-  var receivedAllRecords = false
+  var initialized = false
 }
 
 /** Streaming application for testing DStream and RDD creation sites */
 package object testPackage extends Assertions {
-  def test() {
+  def test(): Unit = {
     val conf = new SparkConf().setMaster("local").setAppName("CreationSite test")
     val ssc = new StreamingContext(conf, Milliseconds(100))
     try {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
index 62fd43302b9d7..679c58dbae92b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingListenerSuite.scala
@@ -36,20 +36,11 @@ import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.streaming.receiver.Receiver
 import org.apache.spark.streaming.scheduler._
 
-class StreamingListenerSuite extends TestSuiteBase with Matchers {
+class StreamingListenerSuite extends TestSuiteBase with LocalStreamingContext with Matchers {
 
   val input = (1 to 4).map(Seq(_)).toSeq
   val operation = (d: DStream[Int]) => d.map(x => x)
 
-  var ssc: StreamingContext = _
-
-  override def afterFunction() {
-    super.afterFunction()
-    if (ssc != null) {
-      ssc.stop()
-    }
-  }
-
   // To make sure that the processing start and end times in collected
   // information are different for successive batches
   override def batchDuration: Duration = Milliseconds(100)
@@ -236,7 +227,7 @@ class StreamingListenerSuite extends TestSuiteBase with Matchers {
     // Post a Streaming event after stopping StreamingContext
     val receiverInfoStopped = ReceiverInfo(0, "test", false, "localhost", "0")
     ssc.scheduler.listenerBus.post(StreamingListenerReceiverStopped(receiverInfoStopped))
-    ssc.sparkContext.listenerBus.waitUntilEmpty(1000)
+    ssc.sparkContext.listenerBus.waitUntilEmpty()
     // The StreamingListener should not receive any event
     verifyNoMoreInteractions(streamingListener)
   }
@@ -288,15 +279,15 @@ class BatchInfoCollector extends StreamingListener {
   val batchInfosStarted = new ConcurrentLinkedQueue[BatchInfo]
   val batchInfosSubmitted = new ConcurrentLinkedQueue[BatchInfo]
 
-  override def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted) {
+  override def onBatchSubmitted(batchSubmitted: StreamingListenerBatchSubmitted): Unit = {
     batchInfosSubmitted.add(batchSubmitted.batchInfo)
   }
 
-  override def onBatchStarted(batchStarted: StreamingListenerBatchStarted) {
+  override def onBatchStarted(batchStarted: StreamingListenerBatchStarted): Unit = {
     batchInfosStarted.add(batchStarted.batchInfo)
   }
 
-  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) {
+  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = {
     batchInfosCompleted.add(batchCompleted.batchInfo)
   }
 }
@@ -307,15 +298,15 @@ class ReceiverInfoCollector extends StreamingListener {
   val stoppedReceiverStreamIds = new ConcurrentLinkedQueue[Int]
   val receiverErrors = new ConcurrentLinkedQueue[(Int, String, String)]
 
-  override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted) {
+  override def onReceiverStarted(receiverStarted: StreamingListenerReceiverStarted): Unit = {
     startedReceiverStreamIds.add(receiverStarted.receiverInfo.streamId)
   }
 
-  override def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped) {
+  override def onReceiverStopped(receiverStopped: StreamingListenerReceiverStopped): Unit = {
     stoppedReceiverStreamIds.add(receiverStopped.receiverInfo.streamId)
   }
 
-  override def onReceiverError(receiverError: StreamingListenerReceiverError) {
+  override def onReceiverError(receiverError: StreamingListenerReceiverError): Unit = {
     receiverErrors.add(((receiverError.receiverInfo.streamId,
       receiverError.receiverInfo.lastErrorMessage, receiverError.receiverInfo.lastError)))
   }
@@ -338,7 +329,7 @@ class OutputOperationInfoCollector extends StreamingListener {
 }
 
 class StreamingListenerSuiteReceiver extends Receiver[Any](StorageLevel.MEMORY_ONLY) with Logging {
-  def onStart() {
+  def onStart(): Unit = {
     Future {
       logInfo("Started receiver and sleeping")
       Thread.sleep(10)
@@ -349,7 +340,7 @@ class StreamingListenerSuiteReceiver extends Receiver[Any](StorageLevel.MEMORY_O
       stop("test stop error")
     }
   }
-  def onStop() { }
+  def onStop(): Unit = { }
 }
 
 /**
@@ -377,7 +368,7 @@ class StreamingContextStoppingCollector(val ssc: StreamingContext) extends Strea
 
   private var isFirstBatch = true
 
-  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted) {
+  override def onBatchCompleted(batchCompleted: StreamingListenerBatchCompleted): Unit = {
     if (isFirstBatch) {
       // We should only call `ssc.stop()` in the first batch. Otherwise, it's possible that the main
       // thread is calling `ssc.stop()`, while StreamingContextStoppingCollector is also calling
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index f2ae77896a5d3..55c2950261a07 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -24,7 +24,7 @@ import scala.collection.JavaConverters._
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
-import org.scalatest.BeforeAndAfter
+import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually.timeout
 import org.scalatest.concurrent.PatienceConfiguration
 import org.scalatest.time.{Seconds => ScalaTestSeconds, Span}
@@ -62,9 +62,9 @@ private[streaming] class DummyInputDStream(ssc: StreamingContext) extends InputD
 class TestInputStream[T: ClassTag](_ssc: StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
   extends InputDStream[T](_ssc) {
 
-  def start() {}
+  def start(): Unit = {}
 
-  def stop() {}
+  def stop(): Unit = {}
 
   def compute(validTime: Time): Option[RDD[T]] = {
     logInfo("Computing RDD for time " + validTime)
@@ -211,7 +211,7 @@ class BatchCounter(ssc: StreamingContext) {
  * This is the base trait for Spark Streaming testsuites. This provides basic functionality
  * to run user-defined set of input on user-defined stream operations, and verify the output.
  */
-trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
+trait TestSuiteBase extends SparkFunSuite with BeforeAndAfterEach with Logging {
 
   // Name of the framework for Spark context
   def framework: String = this.getClass.getSimpleName
@@ -250,8 +250,8 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
   val eventuallyTimeout: PatienceConfiguration.Timeout = timeout(Span(10, ScalaTestSeconds))
 
   // Default before function for any streaming test suite. Override this
-  // if you want to add your stuff to "before" (i.e., don't call before { } )
-  def beforeFunction() {
+  // if you want to add your stuff to "beforeEach"
+  def beforeFunction(): Unit = {
     if (useManualClock) {
       logInfo("Using manual clock")
       conf.set("spark.streaming.clock", "org.apache.spark.util.ManualClock")
@@ -262,13 +262,24 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
   }
 
   // Default after function for any streaming test suite. Override this
-  // if you want to add your stuff to "after" (i.e., don't call after { } )
-  def afterFunction() {
+  // if you want to add your stuff to "afterEach"
+  def afterFunction(): Unit = {
     System.clearProperty("spark.streaming.clock")
   }
 
-  before(beforeFunction)
-  after(afterFunction)
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    beforeFunction()
+  }
+
+  override def afterEach(): Unit = {
+    try {
+      afterFunction()
+    } finally {
+      super.afterEach()
+    }
+
+  }
 
   /**
    * Run a block of code with the given StreamingContext and automatically
@@ -278,12 +289,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
     try {
       block(ssc)
     } finally {
-      try {
-        ssc.stop(stopSparkContext = true)
-      } catch {
-        case e: Exception =>
-          logError("Error stopping StreamingContext", e)
-      }
+      LocalStreamingContext.stop(ssc, stopSparkContext = true)
     }
   }
 
@@ -452,7 +458,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       output: Seq[Seq[V]],
       expectedOutput: Seq[Seq[V]],
       useSet: Boolean
-    ) {
+    ): Unit = {
     logInfo("--------------------------------")
     logInfo("output.size = " + output.size)
     logInfo("output")
@@ -492,7 +498,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       operation: DStream[U] => DStream[V],
       expectedOutput: Seq[Seq[V]],
       useSet: Boolean = false
-    ) {
+    ): Unit = {
     testOperation[U, V](input, operation, expectedOutput, -1, useSet)
   }
 
@@ -511,7 +517,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       expectedOutput: Seq[Seq[V]],
       numBatches: Int,
       useSet: Boolean
-    ) {
+    ): Unit = {
     val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
     withStreamingContext(setupStreams[U, V](input, operation)) { ssc =>
       val output = runStreams[V](ssc, numBatches_, expectedOutput.size)
@@ -529,7 +535,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       operation: (DStream[U], DStream[V]) => DStream[W],
       expectedOutput: Seq[Seq[W]],
       useSet: Boolean
-    ) {
+    ): Unit = {
     testOperation[U, V, W](input1, input2, operation, expectedOutput, -1, useSet)
   }
 
@@ -550,7 +556,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfter with Logging {
       expectedOutput: Seq[Seq[W]],
       numBatches: Int,
       useSet: Boolean
-    ) {
+    ): Unit = {
     val numBatches_ = if (numBatches > 0) numBatches else expectedOutput.size
     withStreamingContext(setupStreams[U, V, W](input1, input2, operation)) { ssc =>
       val output = runStreams[W](ssc, numBatches_, expectedOutput.size)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
index 1d34221fde4f4..ca9d62f50b5ab 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
@@ -23,8 +23,8 @@ import org.openqa.selenium.WebDriver
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
 import org.scalatest._
 import org.scalatest.concurrent.Eventually._
-import org.scalatest.selenium.WebBrowser
 import org.scalatest.time.SpanSugar._
+import org.scalatestplus.selenium.WebBrowser
 
 import org.apache.spark._
 import org.apache.spark.internal.config.UI.UI_ENABLED
@@ -97,7 +97,7 @@ class UISeleniumSuite
 
       val sparkUI = ssc.sparkContext.ui.get
 
-      sparkUI.getHandlers.count(_.getContextPath.contains("/streaming")) should be (5)
+      sparkUI.getDelegatingHandlers.count(_.getContextPath.contains("/streaming")) should be (5)
 
       eventually(timeout(10.seconds), interval(50.milliseconds)) {
         go to (sparkUI.webUrl.stripSuffix("/"))
@@ -198,7 +198,7 @@ class UISeleniumSuite
 
       ssc.stop(false)
 
-      sparkUI.getHandlers.count(_.getContextPath.contains("/streaming")) should be (0)
+      sparkUI.getDelegatingHandlers.count(_.getContextPath.contains("/streaming")) should be (0)
 
       eventually(timeout(10.seconds), interval(50.milliseconds)) {
         go to (sparkUI.webUrl.stripSuffix("/"))
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
index c7d085ec0799b..468a52226682e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/WindowOperationsSuite.scala
@@ -146,15 +146,16 @@ class WindowOperationsSuite extends TestSuiteBase {
 
   test("window - persistence level") {
     val input = Seq( Seq(0), Seq(1), Seq(2), Seq(3), Seq(4), Seq(5))
-    val ssc = new StreamingContext(conf, batchDuration)
-    val inputStream = new TestInputStream[Int](ssc, input, 1)
-    val windowStream1 = inputStream.window(batchDuration * 2)
-    assert(windowStream1.storageLevel === StorageLevel.NONE)
-    assert(inputStream.storageLevel === StorageLevel.MEMORY_ONLY_SER)
-    windowStream1.persist(StorageLevel.MEMORY_ONLY)
-    assert(windowStream1.storageLevel === StorageLevel.NONE)
-    assert(inputStream.storageLevel === StorageLevel.MEMORY_ONLY)
-    ssc.stop()
+
+    withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
+      val inputStream = new TestInputStream[Int](ssc, input, 1)
+      val windowStream1 = inputStream.window(batchDuration * 2)
+      assert(windowStream1.storageLevel === StorageLevel.NONE)
+      assert(inputStream.storageLevel === StorageLevel.MEMORY_ONLY_SER)
+      windowStream1.persist(StorageLevel.MEMORY_ONLY)
+      assert(windowStream1.storageLevel === StorageLevel.NONE)
+      assert(inputStream.storageLevel === StorageLevel.MEMORY_ONLY)
+    }
   }
 
   // Testing naive reduceByKeyAndWindow (without invertible function)
@@ -276,7 +277,7 @@ class WindowOperationsSuite extends TestSuiteBase {
     expectedOutput: Seq[Seq[Int]],
     windowDuration: Duration = Seconds(2),
     slideDuration: Duration = Seconds(1)
-    ) {
+    ): Unit = {
     test("window - " + name) {
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
       val operation = (s: DStream[Int]) => s.window(windowDuration, slideDuration)
@@ -290,7 +291,7 @@ class WindowOperationsSuite extends TestSuiteBase {
     expectedOutput: Seq[Seq[(String, Int)]],
     windowDuration: Duration = Seconds(2),
     slideDuration: Duration = Seconds(1)
-    ) {
+    ): Unit = {
     test("reduceByKeyAndWindow - " + name) {
       logInfo("reduceByKeyAndWindow - " + name)
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
@@ -307,7 +308,7 @@ class WindowOperationsSuite extends TestSuiteBase {
     expectedOutput: Seq[Seq[(String, Int)]],
     windowDuration: Duration = Seconds(2),
     slideDuration: Duration = Seconds(1)
-  ) {
+  ): Unit = {
     test("reduceByKeyAndWindow with inverse function - " + name) {
       logInfo("reduceByKeyAndWindow with inverse function - " + name)
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
@@ -325,7 +326,7 @@ class WindowOperationsSuite extends TestSuiteBase {
       expectedOutput: Seq[Seq[(String, Int)]],
       windowDuration: Duration = Seconds(2),
       slideDuration: Duration = Seconds(1)
-    ) {
+    ): Unit = {
     test("reduceByKeyAndWindow with inverse and filter functions - " + name) {
       logInfo("reduceByKeyAndWindow with inverse and filter functions - " + name)
       val numBatches = expectedOutput.size * (slideDuration / batchDuration).toInt
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
index aa69be7ca9939..86a8dc47098af 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
@@ -164,7 +164,7 @@ class WriteAheadLogBackedBlockRDDSuite
       testIsBlockValid: Boolean = false,
       testBlockRemove: Boolean = false,
       testStoreInBM: Boolean = false
-    ) {
+    ): Unit = {
     require(numPartitionsInBM <= numPartitions,
       "Can't put more partitions in BlockManager than that in RDD")
     require(numPartitionsInWAL <= numPartitions,
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
index a8b00558b40a7..e489568667c0f 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManagerSuite.scala
@@ -19,26 +19,25 @@ package org.apache.spark.streaming.scheduler
 
 import org.mockito.ArgumentMatchers.{eq => meq}
 import org.mockito.Mockito.{never, reset, times, verify, when}
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, PrivateMethodTester}
+import org.scalatest.{BeforeAndAfterEach, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually.{eventually, timeout}
-import org.scalatest.mockito.MockitoSugar
 import org.scalatest.time.SpanSugar._
+import org.scalatestplus.mockito.MockitoSugar
 
-import org.apache.spark.{ExecutorAllocationClient, SparkConf, SparkFunSuite}
+import org.apache.spark.{ExecutorAllocationClient, SparkConf}
 import org.apache.spark.internal.config.{DYN_ALLOCATION_ENABLED, DYN_ALLOCATION_TESTING}
 import org.apache.spark.internal.config.Streaming._
-import org.apache.spark.streaming.{DummyInputDStream, Seconds, StreamingContext}
+import org.apache.spark.streaming.{DummyInputDStream, Seconds, StreamingContext, TestSuiteBase}
 import org.apache.spark.util.{ManualClock, Utils}
 
-
-class ExecutorAllocationManagerSuite extends SparkFunSuite
-  with BeforeAndAfter with BeforeAndAfterAll with MockitoSugar with PrivateMethodTester {
+class ExecutorAllocationManagerSuite extends TestSuiteBase
+  with MockitoSugar with PrivateMethodTester {
 
   private val batchDurationMillis = 1000L
   private var allocationClient: ExecutorAllocationClient = null
   private var clock: StreamManualClock = null
 
-  before {
+  override def beforeEach(): Unit = {
     allocationClient = mock[ExecutorAllocationClient]
     clock = new StreamManualClock()
   }
@@ -392,13 +391,9 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite
       .setAppName(this.getClass.getSimpleName)
       .set("spark.streaming.dynamicAllocation.testing", "true")  // to test dynamic allocation
 
-    var ssc: StreamingContext = null
-    try {
-      ssc = new  StreamingContext(conf, Seconds(1))
+    withStreamingContext(new StreamingContext(conf, Seconds(1))) { ssc =>
       new DummyInputDStream(ssc).foreachRDD(_ => { })
       body(ssc)
-    } finally {
-      if (ssc != null) ssc.stop()
     }
   }
 }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/InputInfoTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/InputInfoTrackerSuite.scala
index a7e365649d3e8..cc393425ca6f0 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/InputInfoTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/InputInfoTrackerSuite.scala
@@ -17,27 +17,15 @@
 
 package org.apache.spark.streaming.scheduler
 
-import org.scalatest.BeforeAndAfter
-
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.streaming.{Duration, StreamingContext, Time}
-
-class InputInfoTrackerSuite extends SparkFunSuite with BeforeAndAfter {
+import org.apache.spark.streaming.{Duration, LocalStreamingContext, StreamingContext, Time}
 
-  private var ssc: StreamingContext = _
+class InputInfoTrackerSuite extends SparkFunSuite with LocalStreamingContext {
 
-  before {
+  override def beforeEach(): Unit = {
+    super.beforeEach()
     val conf = new SparkConf().setMaster("local[2]").setAppName("DirectStreamTacker")
-    if (ssc == null) {
-      ssc = new StreamingContext(conf, Duration(1000))
-    }
-  }
-
-  after {
-    if (ssc != null) {
-      ssc.stop()
-      ssc = null
-    }
+    ssc = new StreamingContext(conf, Duration(1000))
   }
 
   test("test report and get InputInfo from InputInfoTracker") {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
index f0e502727402e..227a02eece65b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/JobGeneratorSuite.scala
@@ -93,14 +93,14 @@ class JobGeneratorSuite extends TestSuiteBase {
       }
 
       // Wait for new blocks to be received
-      def waitForNewReceivedBlocks() {
+      def waitForNewReceivedBlocks(): Unit = {
         eventually(testTimeout) {
           assert(receiverTracker.hasUnallocatedBlocks)
         }
       }
 
       // Wait for received blocks to be allocated to a batch
-      def waitForBlocksToBeAllocatedToBatch(batchTime: Long) {
+      def waitForBlocksToBeAllocatedToBatch(batchTime: Long): Unit = {
         eventually(testTimeout) {
           assert(getBlocksOfBatch(batchTime).nonEmpty)
         }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala
index 37ca0ce2f6a30..b5a45fc317d0e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/RateControllerSuite.scala
@@ -30,8 +30,7 @@ class RateControllerSuite extends TestSuiteBase {
   override def batchDuration: Duration = Milliseconds(50)
 
   test("RateController - rate controller publishes updates after batches complete") {
-    val ssc = new StreamingContext(conf, batchDuration)
-    withStreamingContext(ssc) { ssc =>
+    withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
       val dstream = new RateTestInputDStream(ssc)
       dstream.register()
       ssc.start()
@@ -43,8 +42,7 @@ class RateControllerSuite extends TestSuiteBase {
   }
 
   test("ReceiverRateController - published rates reach receivers") {
-    val ssc = new StreamingContext(conf, batchDuration)
-    withStreamingContext(ssc) { ssc =>
+    withStreamingContext(new StreamingContext(conf, batchDuration)) { ssc =>
       val estimator = new ConstantEstimator(100)
       val dstream = new RateTestInputDStream(ssc) {
         override val rateController =
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala
index fec20f0429ff0..1a0154600bf3c 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/scheduler/ReceiverTrackerSuite.scala
@@ -205,9 +205,9 @@ class StoppableReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
 
   var receivingThreadOption: Option[Thread] = None
 
-  def onStart() {
+  def onStart(): Unit = {
     val thread = new Thread() {
-      override def run() {
+      override def run(): Unit = {
         while (!StoppableReceiver.shouldStop) {
           Thread.sleep(10)
         }
@@ -217,7 +217,7 @@ class StoppableReceiver extends Receiver[Int](StorageLevel.MEMORY_ONLY) {
     thread.start()
   }
 
-  def onStop() {
+  def onStop(): Unit = {
     StoppableReceiver.shouldStop = true
     receivingThreadOption.foreach(_.join())
     // Reset it so as to restart it
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala
index 56b400850fdd4..10f92f9386173 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala
@@ -22,24 +22,18 @@ import java.util.Properties
 import org.scalatest.Matchers
 
 import org.apache.spark.scheduler.SparkListenerJobStart
-import org.apache.spark.streaming._
+import org.apache.spark.streaming.{LocalStreamingContext, _}
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.streaming.scheduler._
 
-class StreamingJobProgressListenerSuite extends TestSuiteBase with Matchers {
+class StreamingJobProgressListenerSuite
+  extends TestSuiteBase
+  with LocalStreamingContext
+  with Matchers {
 
   val input = (1 to 4).map(Seq(_)).toSeq
   val operation = (d: DStream[Int]) => d.map(x => x)
 
-  var ssc: StreamingContext = _
-
-  override def afterFunction() {
-    super.afterFunction()
-    if (ssc != null) {
-      ssc.stop()
-    }
-  }
-
   private def createJobStart(
       batchTime: Time, outputOpId: Int, jobId: Int): SparkListenerJobStart = {
     val properties = new Properties()
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
index 8d2fa7d515e2f..fb136077fe9ec 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala
@@ -36,7 +36,7 @@ import org.mockito.Mockito.{times, verify, when}
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterEach, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually
 import org.scalatest.concurrent.Eventually._
-import org.scalatest.mockito.MockitoSugar
+import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
 import org.apache.spark.streaming.scheduler._
@@ -256,12 +256,12 @@ class FileBasedWriteAheadLogSuite
           counter.increment()
           // block so that other threads also launch
           latch.await(10, TimeUnit.SECONDS)
-          override def completion() { counter.decrement() }
+          override def completion(): Unit = { counter.decrement() }
         }
       }
       @volatile var collected: Seq[Int] = Nil
       val t = new Thread() {
-        override def run() {
+        override def run(): Unit = {
           // run the calculation on a separate thread so that we can release the latch
           val iterator = FileBasedWriteAheadLog.seqToParIterator[Int, Int](executionContext,
             testSeq, handle)
diff --git a/tools/pom.xml b/tools/pom.xml
index 6286fad403c83..e380e869f55c7 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -44,7 +44,7 @@
     <dependency>
       <groupId>org.clapper</groupId>
       <artifactId>classutil_${scala.binary.version}</artifactId>
-      <version>1.1.2</version>
+      <version>1.5.1</version>
     </dependency>
   </dependencies>
 
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index c9058ff409893..f9bc499961ad7 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -115,7 +115,7 @@ object GenerateMIMAIgnore {
     ).filter(x => isPackagePrivate(x)).map(_.fullName) ++ getInnerFunctions(classSymbol)
   }
 
-  def main(args: Array[String]) {
+  def main(args: Array[String]): Unit = {
     import scala.tools.nsc.io.File
     val (privateClasses, privateMembers) = privateWithin("org.apache.spark")
     val previousContents = Try(File(".generated-mima-class-excludes").lines()).