From 18ce1b82403cfb1e2b4fcac2dd5875f289d61ed3 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Thu, 2 Feb 2017 16:31:05 -0800
Subject: [PATCH 01/31] Add capability to inherit SessionState (SQL conf, temp
 tables, registered functions) when forking a new SparkSession.

---
 .../sql/catalyst/catalog/SessionCatalog.scala | 21 +++++++
 .../spark/sql/ExperimentalMethods.scala       | 21 +++++++
 .../org/apache/spark/sql/SparkSession.scala   | 17 +++--
 .../spark/sql/internal/SessionState.scala     | 63 +++++++++++++++----
 4 files changed, 105 insertions(+), 17 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index e9543f79878b7..927514edc9376 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1180,4 +1180,25 @@ class SessionCatalog(
     }
   }
 
+  /**
+    * Get an identical copy of the `SessionCatalog`.
+    * The temporary tables and function registry are retained.
+    * The table relation cache will not be populated.
+    */
+  override def clone: SessionCatalog = {
+    val catalog = new SessionCatalog(
+      externalCatalog,
+      globalTempViewManager,
+      functionResourceLoader,
+      functionRegistry,
+      conf,
+      hadoopConf,
+      parser)
+
+    catalog.currentDb = currentDb
+    tempTables.foreach(kv => catalog.tempTables.put(kv._1, kv._2)) // copy over temporary tables
+
+    catalog
+  }
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index 1e8ba51e59e33..0441b0028a000 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import scala.collection.mutable.ListBuffer
+
 import org.apache.spark.annotation.{Experimental, InterfaceStability}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -46,4 +48,23 @@ class ExperimentalMethods private[sql]() {
 
   @volatile var extraOptimizations: Seq[Rule[LogicalPlan]] = Nil
 
+  /**
+    * Get an identical copy of this `ExperimentalMethods` instance.
+    * @note This is used when forking a `SparkSession`.
+    * `clone` is provided here instead of implementing equivalent functionality
+    * in `SparkSession.clone` since it needs to be updated
+    * as the class `ExperimentalMethods` is extended or  modified.
+    */
+  override def clone: ExperimentalMethods = {
+    def cloneSeq[T](seq: Seq[T]): Seq[T] = {
+      val newSeq = new ListBuffer[T]
+      newSeq ++= seq
+      newSeq
+    }
+
+    val result = new ExperimentalMethods
+    result.extraStrategies = cloneSeq(extraStrategies)
+    result.extraOptimizations = cloneSeq(extraOptimizations)
+    result
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index f3dde480eabe0..461af37a23819 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -72,7 +72,8 @@ import org.apache.spark.util.Utils
 @InterfaceStability.Stable
 class SparkSession private(
     @transient val sparkContext: SparkContext,
-    @transient private val existingSharedState: Option[SharedState])
+    @transient private val existingSharedState: Option[SharedState],
+    existingSessionState: Option[SessionState] = None)
   extends Serializable with Closeable with Logging { self =>
 
   private[sql] def this(sc: SparkContext) {
@@ -107,9 +108,9 @@ class SparkSession private(
    */
   @transient
   private[sql] lazy val sessionState: SessionState = {
-    SparkSession.reflect[SessionState, SparkSession](
+    existingSessionState.getOrElse(SparkSession.reflect[SessionState, SparkSession](
       SparkSession.sessionStateClassName(sparkContext.conf),
-      self)
+      self))
   }
 
   /**
@@ -201,6 +202,8 @@ class SparkSession private(
   /**
    * Start a new session with isolated SQL configurations, temporary tables, registered
    * functions are isolated, but sharing the underlying `SparkContext` and cached data.
+   * If inherit is enabled, then SQL configurations, temporary tables, registered functions
+   * are copied over from parent `SparkSession`.
    *
    * @note Other than the `SparkContext`, all shared state is initialized lazily.
    * This method will force the initialization of the shared state to ensure that parent
@@ -209,8 +212,12 @@ class SparkSession private(
    *
    * @since 2.0.0
    */
-  def newSession(): SparkSession = {
-    new SparkSession(sparkContext, Some(sharedState))
+  def newSession(inheritSessionState: Boolean = false): SparkSession = {
+    if (inheritSessionState) {
+      new SparkSession(sparkContext, Some(sharedState), Some(sessionState.clone))
+    } else {
+      new SparkSession(sparkContext, Some(sharedState))
+    }
   }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 68b774b52fd7f..ebd5321d5c0ed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -38,8 +38,11 @@ import org.apache.spark.sql.util.ExecutionListenerManager
 
 /**
  * A class that holds all session-specific state in a given [[SparkSession]].
+ * If an `existingSessionState` is supplied, then its members will be copied over.
  */
-private[sql] class SessionState(sparkSession: SparkSession) {
+private[sql] class SessionState(
+    sparkSession: SparkSession,
+    existingSessionState: Option[SessionState] = None) {
 
   // Note: These are all lazy vals because they depend on each other (e.g. conf) and we
   // want subclasses to override some of the fields. Otherwise, we would get a lot of NPEs.
@@ -47,7 +50,15 @@ private[sql] class SessionState(sparkSession: SparkSession) {
   /**
    * SQL-specific key-value configurations.
    */
-  lazy val conf: SQLConf = new SQLConf
+  lazy val conf: SQLConf = {
+    val result = new SQLConf
+    if (existingSessionState.nonEmpty) {
+      existingSessionState.get.conf.getAllConfs.foreach {
+        case (k, v) => if (v ne null) result.setConfString(k, v)
+      }
+    }
+    result
+  }
 
   def newHadoopConf(): Configuration = {
     val hadoopConf = new Configuration(sparkSession.sparkContext.hadoopConfiguration)
@@ -65,12 +76,29 @@ private[sql] class SessionState(sparkSession: SparkSession) {
     hadoopConf
   }
 
-  lazy val experimentalMethods = new ExperimentalMethods
+  lazy val experimentalMethods: ExperimentalMethods = {
+    existingSessionState
+      .map(_.experimentalMethods.clone)
+      .getOrElse(new ExperimentalMethods)
+  }
 
   /**
    * Internal catalog for managing functions registered by the user.
    */
-  lazy val functionRegistry: FunctionRegistry = FunctionRegistry.builtin.copy()
+  lazy val functionRegistry: FunctionRegistry = {
+    val registry = FunctionRegistry.builtin.copy()
+
+    if (existingSessionState.nonEmpty) {
+      val sourceRegistry = existingSessionState.get.functionRegistry
+      sourceRegistry
+        .listFunction()
+        .foreach(name => registry.registerFunction(
+          name,
+          sourceRegistry.lookupFunction(name).get,
+          sourceRegistry.lookupFunctionBuilder(name).get))
+    }
+    registry
+  }
 
   /**
    * A class for loading resources specified by a function.
@@ -93,14 +121,18 @@ private[sql] class SessionState(sparkSession: SparkSession) {
   /**
    * Internal catalog for managing table and database states.
    */
-  lazy val catalog = new SessionCatalog(
-    sparkSession.sharedState.externalCatalog,
-    sparkSession.sharedState.globalTempViewManager,
-    functionResourceLoader,
-    functionRegistry,
-    conf,
-    newHadoopConf(),
-    sqlParser)
+  lazy val catalog: SessionCatalog = {
+    existingSessionState
+      .map(_.catalog.clone)
+      .getOrElse(new SessionCatalog(
+        sparkSession.sharedState.externalCatalog,
+        sparkSession.sharedState.globalTempViewManager,
+        functionResourceLoader,
+        functionRegistry,
+        conf,
+        newHadoopConf(),
+        sqlParser))
+  }
 
   /**
    * Interface exposed to the user for registering user-defined functions.
@@ -165,6 +197,13 @@ private[sql] class SessionState(sparkSession: SparkSession) {
     conf.setConfString(k, v)
   }
 
+  /**
+    * Get an identical copy of the `SessionState`.
+    */
+  override def clone: SessionState = {
+    new SessionState(sparkSession, Some(this))
+  }
+
   // ------------------------------------------------------
   //  Helper methods, partially leftover from pre-2.0 days
   // ------------------------------------------------------

From 9beb78d82d7777bc17aed047d818fdd4fca1b0d6 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Mon, 6 Feb 2017 14:55:56 -0800
Subject: [PATCH 02/31] Add tests for forking new session with inherit config
 enabled. Update overloaded functions for Java bytecode compatibility.

---
 .../sql/catalyst/catalog/SessionCatalog.scala |  8 +--
 .../spark/sql/ExperimentalMethods.scala       | 12 ++--
 .../org/apache/spark/sql/SparkSession.scala   | 22 ++++--
 .../spark/sql/internal/SessionState.scala     | 10 ++-
 .../spark/sql/SparkSessionBuilderSuite.scala  | 68 +++++++++++++++++++
 .../spark/sql/internal/CatalogSuite.scala     | 15 ++++
 .../apache/spark/sql/hive/test/TestHive.scala | 12 ++--
 7 files changed, 125 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 927514edc9376..ab9ddd17b8ad5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1181,10 +1181,10 @@ class SessionCatalog(
   }
 
   /**
-    * Get an identical copy of the `SessionCatalog`.
-    * The temporary tables and function registry are retained.
-    * The table relation cache will not be populated.
-    */
+   * Get an identical copy of the `SessionCatalog`.
+   * The temporary tables and function registry are retained.
+   * The table relation cache will not be populated.
+   */
   override def clone: SessionCatalog = {
     val catalog = new SessionCatalog(
       externalCatalog,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index 0441b0028a000..aac2794376631 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -49,12 +49,12 @@ class ExperimentalMethods private[sql]() {
   @volatile var extraOptimizations: Seq[Rule[LogicalPlan]] = Nil
 
   /**
-    * Get an identical copy of this `ExperimentalMethods` instance.
-    * @note This is used when forking a `SparkSession`.
-    * `clone` is provided here instead of implementing equivalent functionality
-    * in `SparkSession.clone` since it needs to be updated
-    * as the class `ExperimentalMethods` is extended or  modified.
-    */
+   * Get an identical copy of this `ExperimentalMethods` instance.
+   * @note This is used when forking a `SparkSession`.
+   * `clone` is provided here instead of implementing equivalent functionality
+   * in `SparkSession.clone` since it needs to be updated
+   * as the class `ExperimentalMethods` is extended or  modified.
+   */
   override def clone: ExperimentalMethods = {
     def cloneSeq[T](seq: Seq[T]): Seq[T] = {
       val newSeq = new ListBuffer[T]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 461af37a23819..00142c3151e54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.internal.{CatalogImpl, SessionState, SharedState}
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.streaming._
-import org.apache.spark.sql.types.{DataType, LongType, StructType}
+import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.ExecutionListenerManager
 import org.apache.spark.util.Utils
 
@@ -202,8 +202,6 @@ class SparkSession private(
   /**
    * Start a new session with isolated SQL configurations, temporary tables, registered
    * functions are isolated, but sharing the underlying `SparkContext` and cached data.
-   * If inherit is enabled, then SQL configurations, temporary tables, registered functions
-   * are copied over from parent `SparkSession`.
    *
    * @note Other than the `SparkContext`, all shared state is initialized lazily.
    * This method will force the initialization of the shared state to ensure that parent
@@ -212,11 +210,25 @@ class SparkSession private(
    *
    * @since 2.0.0
    */
-  def newSession(inheritSessionState: Boolean = false): SparkSession = {
+  def newSession(): SparkSession = {
+    new SparkSession(sparkContext, Some(sharedState))
+  }
+
+  /**
+   * Start a new session, sharing the underlying `SparkContext` and cached data.
+   * If inheritSessionState is enabled, then SQL configurations, temporary tables,
+   * registered functions are copied over from parent `SparkSession`.
+   *
+   * @note Other than the `SparkContext`, all shared state is initialized lazily.
+   * This method will force the initialization of the shared state to ensure that parent
+   * and child sessions are set up with the same shared state. If the underlying catalog
+   * implementation is Hive, this will initialize the metastore, which may take some time.
+   */
+  def newSession(inheritSessionState: Boolean): SparkSession = {
     if (inheritSessionState) {
       new SparkSession(sparkContext, Some(sharedState), Some(sessionState.clone))
     } else {
-      new SparkSession(sparkContext, Some(sharedState))
+      newSession()
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index ebd5321d5c0ed..a864ff5279cfe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -42,7 +42,11 @@ import org.apache.spark.sql.util.ExecutionListenerManager
  */
 private[sql] class SessionState(
     sparkSession: SparkSession,
-    existingSessionState: Option[SessionState] = None) {
+    existingSessionState: Option[SessionState]) {
+
+  private[sql] def this(sparkSession: SparkSession) = {
+    this(sparkSession, None)
+  }
 
   // Note: These are all lazy vals because they depend on each other (e.g. conf) and we
   // want subclasses to override some of the fields. Otherwise, we would get a lot of NPEs.
@@ -198,8 +202,8 @@ private[sql] class SessionState(
   }
 
   /**
-    * Get an identical copy of the `SessionState`.
-    */
+   * Get an identical copy of the `SessionState`.
+   */
   override def clone: SessionState = {
     new SessionState(sparkSession, Some(this))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 386d13d07a95f..a3d6139e727d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
 
 /**
  * Test cases for the builder pattern of [[SparkSession]].
@@ -123,4 +125,70 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
       session.stop()
     }
   }
+
+  test("fork new session and inherit a copy of the session state") {
+    val activeSession = SparkSession.builder().master("local").getOrCreate()
+    val forkedSession = activeSession.newSession(inheritSessionState = true)
+
+    assert(forkedSession ne activeSession)
+    assert(forkedSession.sessionState ne activeSession.sessionState)
+
+    forkedSession.stop()
+    activeSession.stop()
+  }
+
+  test("fork new session and inherit sql config options") {
+    val activeSession = SparkSession
+      .builder()
+      .master("local")
+      .config("spark-configb", "b")
+      .getOrCreate()
+    val forkedSession = activeSession.newSession(inheritSessionState = true)
+
+    assert(forkedSession ne activeSession)
+    assert(forkedSession.conf ne activeSession.conf)
+    assert(forkedSession.conf.get("spark-configb") == "b")
+
+    forkedSession.stop()
+    activeSession.stop()
+  }
+
+  test("fork new session and inherit function registry and udf") {
+    val activeSession = SparkSession.builder().master("local").getOrCreate()
+    activeSession.udf.register("strlenScala", (_: String).length + (_: Int))
+    val forkedSession = activeSession.newSession(inheritSessionState = true)
+
+    assert(forkedSession ne activeSession)
+    assert(forkedSession.sessionState.functionRegistry ne
+      activeSession.sessionState.functionRegistry)
+    assert(forkedSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
+
+    forkedSession.stop()
+    activeSession.stop()
+  }
+
+  test("fork new session and inherit experimental methods") {
+    object DummyRule1 extends Rule[LogicalPlan] {
+      def apply(p: LogicalPlan): LogicalPlan = p
+    }
+    object DummyRule2 extends Rule[LogicalPlan] {
+      def apply(p: LogicalPlan): LogicalPlan = p
+    }
+    val optimizations = List(DummyRule1, DummyRule2)
+
+    val activeSession = SparkSession.builder().master("local").getOrCreate()
+    activeSession.experimental.extraOptimizations = optimizations
+
+    val forkedSession = activeSession.newSession(inheritSessionState = true)
+
+    assert(forkedSession ne activeSession)
+    assert(forkedSession.experimental ne activeSession.experimental)
+    assert(forkedSession.experimental.extraOptimizations ne
+      activeSession.experimental.extraOptimizations)
+    assert(forkedSession.experimental.extraOptimizations.toSet ==
+      activeSession.experimental.extraOptimizations.toSet)
+
+    forkedSession.stop()
+    activeSession.stop()
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index 75723d0abcfcc..4df2554d4cf58 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -493,6 +493,21 @@ class CatalogSuite
     }
   }
 
+  test("clone SessionCatalog") {
+    // need to test tempTables are cloned
+    assert(spark.catalog.listTables().collect().isEmpty)
+
+    createTempTable("my_temp_table")
+    assert(spark.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
+
+    val forkedSession = spark.newSession(inheritSessionState = true)
+    assert(forkedSession.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
+
+    dropTable("my_temp_table")
+    assert(spark.catalog.listTables().collect().map(_.name).toSet == Set())
+    assert(forkedSession.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
+  }
+
   // TODO: add tests for the rest of them
 
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 3267c237c865a..4417c25f6a716 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -32,13 +32,11 @@ import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
-import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.CacheTableCommand
 import org.apache.spark.sql.hive._
-import org.apache.spark.sql.internal.{SharedState, SQLConf}
+import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf}
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.util.{ShutdownHookManager, Utils}
 
@@ -115,6 +113,7 @@ class TestHiveContext(
 private[hive] class TestHiveSparkSession(
     @transient private val sc: SparkContext,
     @transient private val existingSharedState: Option[SharedState],
+    existingSessionState: Option[SessionState],
     private val loadTestTables: Boolean)
   extends SparkSession(sc) with Logging { self =>
 
@@ -122,9 +121,14 @@ private[hive] class TestHiveSparkSession(
     this(
       sc,
       existingSharedState = None,
+      existingSessionState = None,
       loadTestTables)
   }
 
+  def this(sc: SparkContext, existingSharedState: Option[SharedState], loadTestTables: Boolean) {
+    this(sc, existingSharedState, existingSessionState = None, loadTestTables)
+  }
+
   { // set the metastore temporary configuration
     val metastoreTempConf = HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false) ++ Map(
       ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname -> "true",
@@ -151,7 +155,7 @@ private[hive] class TestHiveSparkSession(
     new TestHiveSessionState(self)
 
   override def newSession(): TestHiveSparkSession = {
-    new TestHiveSparkSession(sc, Some(sharedState), loadTestTables)
+    new TestHiveSparkSession(sc, Some(sharedState), None, loadTestTables)
   }
 
   private var cacheTables: Boolean = false

From a343d8af9c577158042e4af9f8832f46aeecd509 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Mon, 6 Feb 2017 15:24:02 -0800
Subject: [PATCH 03/31] Fix constructor default args for bytecode
 compatibility.

---
 .../main/scala/org/apache/spark/sql/SparkSession.scala    | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 00142c3151e54..0db2bfbd56280 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -73,11 +73,15 @@ import org.apache.spark.util.Utils
 class SparkSession private(
     @transient val sparkContext: SparkContext,
     @transient private val existingSharedState: Option[SharedState],
-    existingSessionState: Option[SessionState] = None)
+    existingSessionState: Option[SessionState])
   extends Serializable with Closeable with Logging { self =>
 
+  private[sql] def this(sc: SparkContext, existingSharedState: Option[SharedState]) {
+    this(sc, existingSharedState, None)
+  }
+
   private[sql] def this(sc: SparkContext) {
-    this(sc, None)
+    this(sc, None, None)
   }
 
   sparkContext.assertNotStopped()

From 4210079c0dfe4eb3d606606ca06ac28ccdbf2ae1 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Thu, 9 Feb 2017 18:51:58 -0800
Subject: [PATCH 04/31] Incorporate feedback. Fix association of incorrect
 SparkSession while cloning SessionState.

---
 .../catalyst/analysis/FunctionRegistry.scala  |  7 +++-
 .../spark/sql/ExperimentalMethods.scala       | 14 +++-----
 .../org/apache/spark/sql/SparkSession.scala   | 23 +++++++------
 .../apache/spark/sql/internal/SQLConf.scala   |  8 +++++
 .../spark/sql/internal/SessionState.scala     | 33 +++++--------------
 .../spark/sql/SparkSessionBuilderSuite.scala  | 13 +++-----
 .../spark/sql/internal/CatalogSuite.scala     |  2 +-
 .../apache/spark/sql/hive/test/TestHive.scala | 10 ++----
 8 files changed, 47 insertions(+), 63 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index eea3740be8a90..640997d6c4abf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -64,6 +64,8 @@ trait FunctionRegistry {
   /** Clear all registered functions. */
   def clear(): Unit
 
+  /* Create a copy of this registry with identical functions as this registry */
+  def copy(): FunctionRegistry
 }
 
 class SimpleFunctionRegistry extends FunctionRegistry {
@@ -107,7 +109,7 @@ class SimpleFunctionRegistry extends FunctionRegistry {
     functionBuilders.clear()
   }
 
-  def copy(): SimpleFunctionRegistry = synchronized {
+  override def copy(): FunctionRegistry = synchronized {
     val registry = new SimpleFunctionRegistry
     functionBuilders.iterator.foreach { case (name, (info, builder)) =>
       registry.registerFunction(name, info, builder)
@@ -150,6 +152,9 @@ object EmptyFunctionRegistry extends FunctionRegistry {
     throw new UnsupportedOperationException
   }
 
+  override def copy(): FunctionRegistry = {
+    throw new UnsupportedOperationException
+  }
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index aac2794376631..07dac30562927 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -50,21 +50,15 @@ class ExperimentalMethods private[sql]() {
 
   /**
    * Get an identical copy of this `ExperimentalMethods` instance.
-   * @note This is used when forking a `SparkSession`.
-   * `clone` is provided here instead of implementing equivalent functionality
+   *
+   * @note `clone` is provided here instead of implementing equivalent functionality
    * in `SparkSession.clone` since it needs to be updated
    * as the class `ExperimentalMethods` is extended or  modified.
    */
   override def clone: ExperimentalMethods = {
-    def cloneSeq[T](seq: Seq[T]): Seq[T] = {
-      val newSeq = new ListBuffer[T]
-      newSeq ++= seq
-      newSeq
-    }
-
     val result = new ExperimentalMethods
-    result.extraStrategies = cloneSeq(extraStrategies)
-    result.extraOptimizations = cloneSeq(extraOptimizations)
+    result.extraStrategies = extraStrategies
+    result.extraOptimizations = extraOptimizations
     result
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 0db2bfbd56280..26f6b7935dc9e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -112,7 +112,7 @@ class SparkSession private(
    */
   @transient
   private[sql] lazy val sessionState: SessionState = {
-    existingSessionState.getOrElse(SparkSession.reflect[SessionState, SparkSession](
+    existingSessionState.map(_.clone(this)).getOrElse(SparkSession.reflect[SessionState, SparkSession](
       SparkSession.sessionStateClassName(sparkContext.conf),
       self))
   }
@@ -219,21 +219,24 @@ class SparkSession private(
   }
 
   /**
-   * Start a new session, sharing the underlying `SparkContext` and cached data.
-   * If inheritSessionState is enabled, then SQL configurations, temporary tables,
-   * registered functions are copied over from parent `SparkSession`.
+   * :: Experimental ::
+   * Create an identical copy of this `SparkSession`, sharing the underlying `SparkContext`
+   * and cached data. SessionState (SQL configurations, temporary tables, registered functions)
+   * is also copied over.
+   * Changes to base session are not propagated to cloned session, cloned is independent
+   * after creation.
    *
    * @note Other than the `SparkContext`, all shared state is initialized lazily.
    * This method will force the initialization of the shared state to ensure that parent
    * and child sessions are set up with the same shared state. If the underlying catalog
    * implementation is Hive, this will initialize the metastore, which may take some time.
+   *
+   * @since 2.1.1
    */
-  def newSession(inheritSessionState: Boolean): SparkSession = {
-    if (inheritSessionState) {
-      new SparkSession(sparkContext, Some(sharedState), Some(sessionState.clone))
-    } else {
-      newSession()
-    }
+  @Experimental
+  @InterfaceStability.Evolving
+  def cloneSession(): SparkSession = {
+    new SparkSession(sparkContext, Some(sharedState), Some(sessionState))
   }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d0c86ffc27d07..7036e6bda7102 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -980,6 +980,14 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
   def clear(): Unit = {
     settings.clear()
   }
+
+  override def clone: SQLConf = {
+    val result = new SQLConf
+    getAllConfs.foreach {
+      case(k, v) => if (v ne null) result.setConfString(k, v)
+    }
+    result
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index a864ff5279cfe..b861ed2c9e27a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.util.ExecutionListenerManager
  */
 private[sql] class SessionState(
     sparkSession: SparkSession,
-    existingSessionState: Option[SessionState]) {
+    parentSessionState: Option[SessionState]) {
 
   private[sql] def this(sparkSession: SparkSession) = {
     this(sparkSession, None)
@@ -55,13 +55,7 @@ private[sql] class SessionState(
    * SQL-specific key-value configurations.
    */
   lazy val conf: SQLConf = {
-    val result = new SQLConf
-    if (existingSessionState.nonEmpty) {
-      existingSessionState.get.conf.getAllConfs.foreach {
-        case (k, v) => if (v ne null) result.setConfString(k, v)
-      }
-    }
-    result
+    parentSessionState.map(_.conf.clone).getOrElse(new SQLConf)
   }
 
   def newHadoopConf(): Configuration = {
@@ -81,7 +75,7 @@ private[sql] class SessionState(
   }
 
   lazy val experimentalMethods: ExperimentalMethods = {
-    existingSessionState
+    parentSessionState
       .map(_.experimentalMethods.clone)
       .getOrElse(new ExperimentalMethods)
   }
@@ -90,18 +84,7 @@ private[sql] class SessionState(
    * Internal catalog for managing functions registered by the user.
    */
   lazy val functionRegistry: FunctionRegistry = {
-    val registry = FunctionRegistry.builtin.copy()
-
-    if (existingSessionState.nonEmpty) {
-      val sourceRegistry = existingSessionState.get.functionRegistry
-      sourceRegistry
-        .listFunction()
-        .foreach(name => registry.registerFunction(
-          name,
-          sourceRegistry.lookupFunction(name).get,
-          sourceRegistry.lookupFunctionBuilder(name).get))
-    }
-    registry
+    parentSessionState.map(_.functionRegistry.copy()).getOrElse(FunctionRegistry.builtin.copy())
   }
 
   /**
@@ -126,7 +109,7 @@ private[sql] class SessionState(
    * Internal catalog for managing table and database states.
    */
   lazy val catalog: SessionCatalog = {
-    existingSessionState
+    parentSessionState
       .map(_.catalog.clone)
       .getOrElse(new SessionCatalog(
         sparkSession.sharedState.externalCatalog,
@@ -202,10 +185,10 @@ private[sql] class SessionState(
   }
 
   /**
-   * Get an identical copy of the `SessionState`.
+   * Get an identical copy of the `SessionState` and associate it with the given `SparkSession`
    */
-  override def clone: SessionState = {
-    new SessionState(sparkSession, Some(this))
+  def clone(sc: SparkSession): SessionState = {
+    new SessionState(sc, Some(this))
   }
 
   // ------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index a3d6139e727d7..d4f557c7a750a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -128,7 +128,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
 
   test("fork new session and inherit a copy of the session state") {
     val activeSession = SparkSession.builder().master("local").getOrCreate()
-    val forkedSession = activeSession.newSession(inheritSessionState = true)
+    val forkedSession = activeSession.cloneSession()
 
     assert(forkedSession ne activeSession)
     assert(forkedSession.sessionState ne activeSession.sessionState)
@@ -141,9 +141,9 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     val activeSession = SparkSession
       .builder()
       .master("local")
-      .config("spark-configb", "b")
       .getOrCreate()
-    val forkedSession = activeSession.newSession(inheritSessionState = true)
+    activeSession.conf.set("spark-configb", "b")
+    val forkedSession = activeSession.cloneSession()
 
     assert(forkedSession ne activeSession)
     assert(forkedSession.conf ne activeSession.conf)
@@ -156,7 +156,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
   test("fork new session and inherit function registry and udf") {
     val activeSession = SparkSession.builder().master("local").getOrCreate()
     activeSession.udf.register("strlenScala", (_: String).length + (_: Int))
-    val forkedSession = activeSession.newSession(inheritSessionState = true)
+    val forkedSession = activeSession.cloneSession()
 
     assert(forkedSession ne activeSession)
     assert(forkedSession.sessionState.functionRegistry ne
@@ -179,12 +179,9 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     val activeSession = SparkSession.builder().master("local").getOrCreate()
     activeSession.experimental.extraOptimizations = optimizations
 
-    val forkedSession = activeSession.newSession(inheritSessionState = true)
+    val forkedSession = activeSession.cloneSession()
 
     assert(forkedSession ne activeSession)
-    assert(forkedSession.experimental ne activeSession.experimental)
-    assert(forkedSession.experimental.extraOptimizations ne
-      activeSession.experimental.extraOptimizations)
     assert(forkedSession.experimental.extraOptimizations.toSet ==
       activeSession.experimental.extraOptimizations.toSet)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index 4df2554d4cf58..fc481e45c426f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -500,7 +500,7 @@ class CatalogSuite
     createTempTable("my_temp_table")
     assert(spark.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
 
-    val forkedSession = spark.newSession(inheritSessionState = true)
+    val forkedSession = spark.cloneSession()
     assert(forkedSession.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
 
     dropTable("my_temp_table")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 4417c25f6a716..c48f5e464944e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.CacheTableCommand
 import org.apache.spark.sql.hive._
-import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf}
+import org.apache.spark.sql.internal.{SharedState, SQLConf}
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.util.{ShutdownHookManager, Utils}
 
@@ -113,7 +113,6 @@ class TestHiveContext(
 private[hive] class TestHiveSparkSession(
     @transient private val sc: SparkContext,
     @transient private val existingSharedState: Option[SharedState],
-    existingSessionState: Option[SessionState],
     private val loadTestTables: Boolean)
   extends SparkSession(sc) with Logging { self =>
 
@@ -121,14 +120,9 @@ private[hive] class TestHiveSparkSession(
     this(
       sc,
       existingSharedState = None,
-      existingSessionState = None,
       loadTestTables)
   }
 
-  def this(sc: SparkContext, existingSharedState: Option[SharedState], loadTestTables: Boolean) {
-    this(sc, existingSharedState, existingSessionState = None, loadTestTables)
-  }
-
   { // set the metastore temporary configuration
     val metastoreTempConf = HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false) ++ Map(
       ConfVars.METASTORE_INTEGER_JDO_PUSHDOWN.varname -> "true",
@@ -155,7 +149,7 @@ private[hive] class TestHiveSparkSession(
     new TestHiveSessionState(self)
 
   override def newSession(): TestHiveSparkSession = {
-    new TestHiveSparkSession(sc, Some(sharedState), None, loadTestTables)
+    new TestHiveSparkSession(sc, Some(sharedState), loadTestTables)
   }
 
   private var cacheTables: Boolean = false

From 6da6bdadc526d71ce887b0bcbef57272a713d6b2 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Fri, 10 Feb 2017 15:45:33 -0800
Subject: [PATCH 05/31] Update spark version. Rename clone to copy, in order to
 avoid Java Object.clone() issues.

---
 .../spark/sql/catalyst/analysis/FunctionRegistry.scala |  8 ++++----
 .../spark/sql/catalyst/catalog/SessionCatalog.scala    |  5 +++--
 .../org/apache/spark/sql/ExperimentalMethods.scala     |  9 +--------
 .../main/scala/org/apache/spark/sql/SparkSession.scala | 10 ++++++----
 .../scala/org/apache/spark/sql/internal/SQLConf.scala  |  2 +-
 .../org/apache/spark/sql/internal/SessionState.scala   | 10 +++++-----
 6 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 640997d6c4abf..0951f3a4752ba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -64,8 +64,8 @@ trait FunctionRegistry {
   /** Clear all registered functions. */
   def clear(): Unit
 
-  /* Create a copy of this registry with identical functions as this registry */
-  def copy(): FunctionRegistry
+  /** Create a copy of this registry with identical functions as this registry. */
+  def copy: FunctionRegistry
 }
 
 class SimpleFunctionRegistry extends FunctionRegistry {
@@ -109,7 +109,7 @@ class SimpleFunctionRegistry extends FunctionRegistry {
     functionBuilders.clear()
   }
 
-  override def copy(): FunctionRegistry = synchronized {
+  override def copy: FunctionRegistry = synchronized {
     val registry = new SimpleFunctionRegistry
     functionBuilders.iterator.foreach { case (name, (info, builder)) =>
       registry.registerFunction(name, info, builder)
@@ -152,7 +152,7 @@ object EmptyFunctionRegistry extends FunctionRegistry {
     throw new UnsupportedOperationException
   }
 
-  override def copy(): FunctionRegistry = {
+  override def copy: FunctionRegistry = {
     throw new UnsupportedOperationException
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index ab9ddd17b8ad5..4c36a923a7cc9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1185,7 +1185,7 @@ class SessionCatalog(
    * The temporary tables and function registry are retained.
    * The table relation cache will not be populated.
    */
-  override def clone: SessionCatalog = {
+  def copy: SessionCatalog = {
     val catalog = new SessionCatalog(
       externalCatalog,
       globalTempViewManager,
@@ -1196,7 +1196,8 @@ class SessionCatalog(
       parser)
 
     catalog.currentDb = currentDb
-    tempTables.foreach(kv => catalog.tempTables.put(kv._1, kv._2)) // copy over temporary tables
+    // copy over temporary tables
+    tempTables.foreach(kv => catalog.tempTables.put(kv._1, kv._2))
 
     catalog
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index 07dac30562927..3909e2f1a47f2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -48,14 +48,7 @@ class ExperimentalMethods private[sql]() {
 
   @volatile var extraOptimizations: Seq[Rule[LogicalPlan]] = Nil
 
-  /**
-   * Get an identical copy of this `ExperimentalMethods` instance.
-   *
-   * @note `clone` is provided here instead of implementing equivalent functionality
-   * in `SparkSession.clone` since it needs to be updated
-   * as the class `ExperimentalMethods` is extended or  modified.
-   */
-  override def clone: ExperimentalMethods = {
+  def copy: ExperimentalMethods = {
     val result = new ExperimentalMethods
     result.extraStrategies = extraStrategies
     result.extraOptimizations = extraOptimizations
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 26f6b7935dc9e..f0ddff1e2a4ed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -112,9 +112,11 @@ class SparkSession private(
    */
   @transient
   private[sql] lazy val sessionState: SessionState = {
-    existingSessionState.map(_.clone(this)).getOrElse(SparkSession.reflect[SessionState, SparkSession](
-      SparkSession.sessionStateClassName(sparkContext.conf),
-      self))
+    existingSessionState
+      .map(_.copy(this))
+      .getOrElse(SparkSession.reflect[SessionState, SparkSession](
+        SparkSession.sessionStateClassName(sparkContext.conf),
+        self))
   }
 
   /**
@@ -231,7 +233,7 @@ class SparkSession private(
    * and child sessions are set up with the same shared state. If the underlying catalog
    * implementation is Hive, this will initialize the metastore, which may take some time.
    *
-   * @since 2.1.1
+   * @since 2.2.0
    */
   @Experimental
   @InterfaceStability.Evolving
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 7036e6bda7102..b18694eb05867 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -981,7 +981,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
     settings.clear()
   }
 
-  override def clone: SQLConf = {
+  def copy: SQLConf = {
     val result = new SQLConf
     getAllConfs.foreach {
       case(k, v) => if (v ne null) result.setConfString(k, v)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index b861ed2c9e27a..d4e483645afca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -55,7 +55,7 @@ private[sql] class SessionState(
    * SQL-specific key-value configurations.
    */
   lazy val conf: SQLConf = {
-    parentSessionState.map(_.conf.clone).getOrElse(new SQLConf)
+    parentSessionState.map(_.conf.copy).getOrElse(new SQLConf)
   }
 
   def newHadoopConf(): Configuration = {
@@ -76,7 +76,7 @@ private[sql] class SessionState(
 
   lazy val experimentalMethods: ExperimentalMethods = {
     parentSessionState
-      .map(_.experimentalMethods.clone)
+      .map(_.experimentalMethods.copy)
       .getOrElse(new ExperimentalMethods)
   }
 
@@ -84,7 +84,7 @@ private[sql] class SessionState(
    * Internal catalog for managing functions registered by the user.
    */
   lazy val functionRegistry: FunctionRegistry = {
-    parentSessionState.map(_.functionRegistry.copy()).getOrElse(FunctionRegistry.builtin.copy())
+    parentSessionState.map(_.functionRegistry.copy).getOrElse(FunctionRegistry.builtin.copy)
   }
 
   /**
@@ -110,7 +110,7 @@ private[sql] class SessionState(
    */
   lazy val catalog: SessionCatalog = {
     parentSessionState
-      .map(_.catalog.clone)
+      .map(_.catalog.copy)
       .getOrElse(new SessionCatalog(
         sparkSession.sharedState.externalCatalog,
         sparkSession.sharedState.globalTempViewManager,
@@ -187,7 +187,7 @@ private[sql] class SessionState(
   /**
    * Get an identical copy of the `SessionState` and associate it with the given `SparkSession`
    */
-  def clone(sc: SparkSession): SessionState = {
+  def copy(sc: SparkSession): SessionState = {
     new SessionState(sc, Some(this))
   }
 

From 579d0b77738e2de53c06725e55f6a0de905325a5 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Tue, 14 Feb 2017 13:33:47 -0800
Subject: [PATCH 06/31] Make lazy vals strict.

---
 .../spark/sql/internal/SessionState.scala     | 31 ++++++++++---------
 .../spark/sql/test/TestSQLContext.scala       | 10 +++---
 .../spark/sql/hive/HiveSessionState.scala     | 24 ++++++++++----
 3 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index d4e483645afca..f80555946bb8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -48,13 +48,14 @@ private[sql] class SessionState(
     this(sparkSession, None)
   }
 
-  // Note: These are all lazy vals because they depend on each other (e.g. conf) and we
-  // want subclasses to override some of the fields. Otherwise, we would get a lot of NPEs.
+  // Note: Many of these vals depend on each other (e.g. conf) and should be initialized
+  // with an early initializer if we want subclasses to override some of the fields.
+  // Otherwise, we would get a lot of NPEs.
 
   /**
    * SQL-specific key-value configurations.
    */
-  lazy val conf: SQLConf = {
+  val conf: SQLConf = {
     parentSessionState.map(_.conf.copy).getOrElse(new SQLConf)
   }
 
@@ -74,7 +75,7 @@ private[sql] class SessionState(
     hadoopConf
   }
 
-  lazy val experimentalMethods: ExperimentalMethods = {
+  val experimentalMethods: ExperimentalMethods = {
     parentSessionState
       .map(_.experimentalMethods.copy)
       .getOrElse(new ExperimentalMethods)
@@ -83,14 +84,14 @@ private[sql] class SessionState(
   /**
    * Internal catalog for managing functions registered by the user.
    */
-  lazy val functionRegistry: FunctionRegistry = {
+  val functionRegistry: FunctionRegistry = {
     parentSessionState.map(_.functionRegistry.copy).getOrElse(FunctionRegistry.builtin.copy)
   }
 
   /**
    * A class for loading resources specified by a function.
    */
-  lazy val functionResourceLoader: FunctionResourceLoader = {
+  val functionResourceLoader: FunctionResourceLoader = {
     new FunctionResourceLoader {
       override def loadResource(resource: FunctionResource): Unit = {
         resource.resourceType match {
@@ -108,7 +109,7 @@ private[sql] class SessionState(
   /**
    * Internal catalog for managing table and database states.
    */
-  lazy val catalog: SessionCatalog = {
+  val catalog: SessionCatalog = {
     parentSessionState
       .map(_.catalog.copy)
       .getOrElse(new SessionCatalog(
@@ -125,12 +126,12 @@ private[sql] class SessionState(
    * Interface exposed to the user for registering user-defined functions.
    * Note that the user-defined functions must be deterministic.
    */
-  lazy val udf: UDFRegistration = new UDFRegistration(functionRegistry)
+  val udf: UDFRegistration = new UDFRegistration(functionRegistry)
 
   /**
    * Logical query plan analyzer for resolving unresolved attributes and relations.
    */
-  lazy val analyzer: Analyzer = {
+  val analyzer: Analyzer = {
     new Analyzer(catalog, conf) {
       override val extendedResolutionRules =
         new FindDataSourceTable(sparkSession) ::
@@ -149,12 +150,12 @@ private[sql] class SessionState(
   /**
    * Logical query plan optimizer.
    */
-  lazy val optimizer: Optimizer = new SparkOptimizer(catalog, conf, experimentalMethods)
+  val optimizer: Optimizer = new SparkOptimizer(catalog, conf, experimentalMethods)
 
   /**
    * Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
    */
-  lazy val sqlParser: ParserInterface = new SparkSqlParser(conf)
+  val sqlParser: ParserInterface = new SparkSqlParser(conf)
 
   /**
    * Planner that converts optimized logical plans to physical plans.
@@ -166,12 +167,12 @@ private[sql] class SessionState(
    * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s
    * that listen for execution metrics.
    */
-  lazy val listenerManager: ExecutionListenerManager = new ExecutionListenerManager
+  val listenerManager: ExecutionListenerManager = new ExecutionListenerManager
 
   /**
    * Interface to start and stop [[StreamingQuery]]s.
    */
-  lazy val streamingQueryManager: StreamingQueryManager = {
+  val streamingQueryManager: StreamingQueryManager = {
     new StreamingQueryManager(sparkSession)
   }
 
@@ -187,8 +188,8 @@ private[sql] class SessionState(
   /**
    * Get an identical copy of the `SessionState` and associate it with the given `SparkSession`
    */
-  def copy(sc: SparkSession): SessionState = {
-    new SessionState(sc, Some(this))
+  def copy(associatedSparkSession: SparkSession): SessionState = {
+    new SessionState(associatedSparkSession, Some(this))
   }
 
   // ------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 2f247ca3e8b7f..ca619333674be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -34,9 +34,8 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
     this(new SparkConf)
   }
 
-  @transient
-  protected[sql] override lazy val sessionState: SessionState = new SessionState(self) {
-    override lazy val conf: SQLConf = {
+  class TestSessionState(sparkSession: SparkSession) extends {
+    override val conf: SQLConf = {
       new SQLConf {
         clear()
         override def clear(): Unit = {
@@ -46,7 +45,10 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
         }
       }
     }
-  }
+  } with SessionState(sparkSession)
+
+  @transient
+  protected[sql] override lazy val sessionState: SessionState = new TestSessionState(this)
 
   // Needed for Java tests
   def loadTestData(): Unit = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 9fd03ef8ba037..5a4057173dfc6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -28,21 +28,29 @@ import org.apache.spark.sql.internal.SessionState
 /**
  * A class that holds all session-specific state in a given [[SparkSession]] backed by Hive.
  */
-private[hive] class HiveSessionState(sparkSession: SparkSession)
-  extends SessionState(sparkSession) {
+private[hive] class HiveSessionState(
+    sparkSession: SparkSession,
+    parentHiveSessionState: Option[HiveSessionState])
+  extends SessionState(sparkSession, parentHiveSessionState) {
 
   self =>
 
+  private[hive] def this(associatedSparkSession: SparkSession) = {
+    this(associatedSparkSession, None)
+  }
+
   /**
    * A Hive client used for interacting with the metastore.
    */
-  lazy val metadataHive: HiveClient =
-    sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client.newSession()
+  val metadataHive: HiveClient =
+    parentHiveSessionState.map(_.metadataHive).getOrElse(
+      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+        .newSession())
 
   /**
    * Internal catalog for managing table and database states.
    */
-  override lazy val catalog = {
+  override val catalog = {
     new HiveSessionCatalog(
       sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
       sparkSession.sharedState.globalTempViewManager,
@@ -57,7 +65,7 @@ private[hive] class HiveSessionState(sparkSession: SparkSession)
   /**
    * An analyzer that uses the Hive metastore.
    */
-  override lazy val analyzer: Analyzer = {
+  override val analyzer: Analyzer = {
     new Analyzer(catalog, conf) {
       override val extendedResolutionRules =
         catalog.ParquetConversions ::
@@ -147,4 +155,8 @@ private[hive] class HiveSessionState(sparkSession: SparkSession)
     conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC)
   }
 
+  override def copy(associatedSparkSession: SparkSession): HiveSessionState = {
+    new HiveSessionState(associatedSparkSession, Some(this.asInstanceOf[HiveSessionState]))
+  }
+
 }

From 2837e73e00374fe20c2713138815cb539468996f Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Wed, 15 Feb 2017 16:28:54 -0800
Subject: [PATCH 07/31] Refactor SessionState to remove passing of base
 SessionState, and initialize all fields directly instead. Same change for
 HiveSessionState.

---
 .../org/apache/spark/sql/SparkSession.scala   |  17 +-
 .../spark/sql/internal/SessionState.scala     | 245 +++++++++---------
 .../spark/sql/test/TestSQLContext.scala       |  43 ++-
 .../spark/sql/hive/HiveMetastoreCatalog.scala |   4 +-
 .../spark/sql/hive/HiveSessionCatalog.scala   |  18 ++
 .../spark/sql/hive/HiveSessionState.scala     | 110 +++++---
 .../apache/spark/sql/hive/test/TestHive.scala |  50 +++-
 7 files changed, 313 insertions(+), 174 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index f0ddff1e2a4ed..01e59de199e66 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -22,7 +22,6 @@ import java.io.Closeable
 import java.util.concurrent.atomic.AtomicReference
 
 import scala.collection.JavaConverters._
-import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
 
@@ -114,7 +113,7 @@ class SparkSession private(
   private[sql] lazy val sessionState: SessionState = {
     existingSessionState
       .map(_.copy(this))
-      .getOrElse(SparkSession.reflect[SessionState, SparkSession](
+      .getOrElse(SparkSession.instantiateSessionState(
         SparkSession.sessionStateClassName(sparkContext.conf),
         self))
   }
@@ -994,16 +993,18 @@ object SparkSession {
   }
 
   /**
-   * Helper method to create an instance of [[T]] using a single-arg constructor that
-   * accepts an [[Arg]].
+   * Helper method to create an instance of `SessionState`
+   * The result is either `SessionState` or `HiveSessionState`
    */
-  private def reflect[T, Arg <: AnyRef](
+  private def instantiateSessionState(
       className: String,
-      ctorArg: Arg)(implicit ctorArgTag: ClassTag[Arg]): T = {
+      sparkSession: SparkSession): SessionState = {
+
     try {
+      // get `SessionState.apply(SparkSession)`
       val clazz = Utils.classForName(className)
-      val ctor = clazz.getDeclaredConstructor(ctorArgTag.runtimeClass)
-      ctor.newInstance(ctorArg).asInstanceOf[T]
+      val method = clazz.getMethod("apply", sparkSession.getClass)
+      method.invoke(null, sparkSession).asInstanceOf[SessionState]
     } catch {
       case NonFatal(e) =>
         throw new IllegalArgumentException(s"Error while instantiating '$className':", e)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index f80555946bb8c..2148ba45d85e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -32,105 +32,28 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.AnalyzeTableCommand
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.streaming.{StreamingQuery, StreamingQueryManager}
+import org.apache.spark.sql.streaming.{StreamingQueryManager}
 import org.apache.spark.sql.util.ExecutionListenerManager
 
 
 /**
  * A class that holds all session-specific state in a given [[SparkSession]].
- * If an `existingSessionState` is supplied, then its members will be copied over.
  */
 private[sql] class SessionState(
     sparkSession: SparkSession,
-    parentSessionState: Option[SessionState]) {
+    val conf: SQLConf,
+    val experimentalMethods: ExperimentalMethods,
+    val functionRegistry: FunctionRegistry,
+    val catalog: SessionCatalog,
+    val sqlParser: ParserInterface) {
 
-  private[sql] def this(sparkSession: SparkSession) = {
-    this(sparkSession, None)
-  }
-
-  // Note: Many of these vals depend on each other (e.g. conf) and should be initialized
-  // with an early initializer if we want subclasses to override some of the fields.
-  // Otherwise, we would get a lot of NPEs.
-
-  /**
-   * SQL-specific key-value configurations.
-   */
-  val conf: SQLConf = {
-    parentSessionState.map(_.conf.copy).getOrElse(new SQLConf)
-  }
-
-  def newHadoopConf(): Configuration = {
-    val hadoopConf = new Configuration(sparkSession.sparkContext.hadoopConfiguration)
-    conf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, v) }
-    hadoopConf
-  }
-
-  def newHadoopConfWithOptions(options: Map[String, String]): Configuration = {
-    val hadoopConf = newHadoopConf()
-    options.foreach { case (k, v) =>
-      if ((v ne null) && k != "path" && k != "paths") {
-        hadoopConf.set(k, v)
-      }
-    }
-    hadoopConf
-  }
-
-  val experimentalMethods: ExperimentalMethods = {
-    parentSessionState
-      .map(_.experimentalMethods.copy)
-      .getOrElse(new ExperimentalMethods)
-  }
-
-  /**
-   * Internal catalog for managing functions registered by the user.
-   */
-  val functionRegistry: FunctionRegistry = {
-    parentSessionState.map(_.functionRegistry.copy).getOrElse(FunctionRegistry.builtin.copy)
-  }
-
-  /**
-   * A class for loading resources specified by a function.
-   */
-  val functionResourceLoader: FunctionResourceLoader = {
-    new FunctionResourceLoader {
-      override def loadResource(resource: FunctionResource): Unit = {
-        resource.resourceType match {
-          case JarResource => addJar(resource.uri)
-          case FileResource => sparkSession.sparkContext.addFile(resource.uri)
-          case ArchiveResource =>
-            throw new AnalysisException(
-              "Archive is not allowed to be loaded. If YARN mode is used, " +
-                "please use --archives options while calling spark-submit.")
-        }
-      }
-    }
-  }
-
-  /**
-   * Internal catalog for managing table and database states.
-   */
-  val catalog: SessionCatalog = {
-    parentSessionState
-      .map(_.catalog.copy)
-      .getOrElse(new SessionCatalog(
-        sparkSession.sharedState.externalCatalog,
-        sparkSession.sharedState.globalTempViewManager,
-        functionResourceLoader,
-        functionRegistry,
-        conf,
-        newHadoopConf(),
-        sqlParser))
-  }
-
-  /**
+  /*
    * Interface exposed to the user for registering user-defined functions.
    * Note that the user-defined functions must be deterministic.
    */
   val udf: UDFRegistration = new UDFRegistration(functionRegistry)
 
-  /**
-   * Logical query plan analyzer for resolving unresolved attributes and relations.
-   */
+  // Logical query plan analyzer for resolving unresolved attributes and relations.
   val analyzer: Analyzer = {
     new Analyzer(catalog, conf) {
       override val extendedResolutionRules =
@@ -147,37 +70,17 @@ private[sql] class SessionState(
     }
   }
 
-  /**
-   * Logical query plan optimizer.
-   */
+  // Logical query plan optimizer.
   val optimizer: Optimizer = new SparkOptimizer(catalog, conf, experimentalMethods)
 
-  /**
-   * Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
-   */
-  val sqlParser: ParserInterface = new SparkSqlParser(conf)
-
-  /**
-   * Planner that converts optimized logical plans to physical plans.
-   */
-  def planner: SparkPlanner =
-    new SparkPlanner(sparkSession.sparkContext, conf, experimentalMethods.extraStrategies)
-
-  /**
+  /*
    * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s
    * that listen for execution metrics.
    */
   val listenerManager: ExecutionListenerManager = new ExecutionListenerManager
 
-  /**
-   * Interface to start and stop [[StreamingQuery]]s.
-   */
-  val streamingQueryManager: StreamingQueryManager = {
-    new StreamingQueryManager(sparkSession)
-  }
-
-  private val jarClassLoader: NonClosableMutableURLClassLoader =
-    sparkSession.sharedState.jarClassLoader
+  // Interface to start and stop [[StreamingQuery]]s.
+  val streamingQueryManager: StreamingQueryManager = new StreamingQueryManager(sparkSession)
 
   // Automatically extract all entries and put it in our SQLConf
   // We need to call it after all of vals have been initialized.
@@ -185,11 +88,38 @@ private[sql] class SessionState(
     conf.setConfString(k, v)
   }
 
+  /**
+   * Planner that converts optimized logical plans to physical plans.
+   */
+  def planner: SparkPlanner =
+    new SparkPlanner(sparkSession.sparkContext, conf, experimentalMethods.extraStrategies)
+
+  def newHadoopConf(): Configuration = SessionState.newHadoopConf(sparkSession, conf)
+
+  def newHadoopConfWithOptions(options: Map[String, String]): Configuration = {
+    val hadoopConf = newHadoopConf()
+    options.foreach { case (k, v) =>
+      if ((v ne null) && k != "path" && k != "paths") {
+        hadoopConf.set(k, v)
+      }
+    }
+    hadoopConf
+  }
+
   /**
    * Get an identical copy of the `SessionState` and associate it with the given `SparkSession`
    */
   def copy(associatedSparkSession: SparkSession): SessionState = {
-    new SessionState(associatedSparkSession, Some(this))
+    val sqlConf = conf.copy
+    val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
+
+    new SessionState(
+      sparkSession,
+      sqlConf,
+      experimentalMethods.copy,
+      functionRegistry.copy,
+      catalog.copy,
+      sqlParser)
   }
 
   // ------------------------------------------------------
@@ -202,7 +132,89 @@ private[sql] class SessionState(
     catalog.refreshTable(sqlParser.parseTableIdentifier(tableName))
   }
 
-  def addJar(path: String): Unit = {
+  private val jarClassLoader: NonClosableMutableURLClassLoader =
+    sparkSession.sharedState.jarClassLoader
+
+  def addJar(path: String): Unit = SessionState.addJar(sparkSession, path, jarClassLoader)
+
+  /**
+   * Analyzes the given table in the current database to generate statistics, which will be
+   * used in query optimizations.
+   */
+  def analyze(tableIdent: TableIdentifier, noscan: Boolean = true): Unit = {
+    AnalyzeTableCommand(tableIdent, noscan).run(sparkSession)
+  }
+
+}
+
+
+object SessionState {
+
+  def apply(sparkSession: SparkSession): SessionState = {
+    apply(sparkSession, None)
+  }
+
+  def apply(
+      sparkSession: SparkSession,
+      conf: Option[SQLConf]): SessionState = {
+
+    // SQL-specific key-value configurations.
+    val sqlConf = conf.getOrElse(new SQLConf)
+
+    // Internal catalog for managing functions registered by the user.
+    val functionRegistry = FunctionRegistry.builtin.copy
+
+    val jarClassLoader: NonClosableMutableURLClassLoader = sparkSession.sharedState.jarClassLoader
+
+    // A class for loading resources specified by a function.
+    val functionResourceLoader: FunctionResourceLoader =
+      createFunctionResourceLoader(sparkSession, jarClassLoader)
+
+    // Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
+    val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
+
+    // Internal catalog for managing table and database states.
+    val catalog = new SessionCatalog(
+      sparkSession.sharedState.externalCatalog,
+      sparkSession.sharedState.globalTempViewManager,
+      functionResourceLoader,
+      functionRegistry,
+      sqlConf,
+      newHadoopConf(sparkSession, sqlConf),
+      sqlParser)
+
+    new SessionState(
+      sparkSession,
+      sqlConf,
+      new ExperimentalMethods,
+      functionRegistry,
+      catalog,
+      sqlParser)
+  }
+
+  def createFunctionResourceLoader(
+      sparkSession: SparkSession,
+      jarClassLoader: NonClosableMutableURLClassLoader): FunctionResourceLoader = {
+
+    new FunctionResourceLoader {
+      override def loadResource(resource: FunctionResource): Unit = {
+        resource.resourceType match {
+          case JarResource => addJar(sparkSession, resource.uri, jarClassLoader)
+          case FileResource => sparkSession.sparkContext.addFile(resource.uri)
+          case ArchiveResource =>
+            throw new AnalysisException(
+              "Archive is not allowed to be loaded. If YARN mode is used, " +
+                "please use --archives options while calling spark-submit.")
+        }
+      }
+    }
+  }
+
+  def addJar(
+      sparkSession: SparkSession,
+      path: String,
+      jarClassLoader: NonClosableMutableURLClassLoader): Unit = {
+
     sparkSession.sparkContext.addJar(path)
 
     val uri = new Path(path).toUri
@@ -217,11 +229,10 @@ private[sql] class SessionState(
     Thread.currentThread().setContextClassLoader(jarClassLoader)
   }
 
-  /**
-   * Analyzes the given table in the current database to generate statistics, which will be
-   * used in query optimizations.
-   */
-  def analyze(tableIdent: TableIdentifier, noscan: Boolean = true): Unit = {
-    AnalyzeTableCommand(tableIdent, noscan).run(sparkSession)
+  def newHadoopConf(sparkSession: SparkSession, conf: SQLConf): Configuration = {
+    val hadoopConf = new Configuration(sparkSession.sparkContext.hadoopConfiguration)
+    conf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, v) }
+    hadoopConf
   }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index ca619333674be..da00320d84877 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -18,7 +18,10 @@
 package org.apache.spark.sql.test
 
 import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{ExperimentalMethods, SparkSession}
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog
+import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.internal.{SessionState, SQLConf}
 
 /**
@@ -34,8 +37,24 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
     this(new SparkConf)
   }
 
-  class TestSessionState(sparkSession: SparkSession) extends {
-    override val conf: SQLConf = {
+  class TestSessionState(
+      sparkSession: SparkSession,
+      conf: SQLConf,
+      experimentalMethods: ExperimentalMethods,
+      functionRegistry: FunctionRegistry,
+      catalog: SessionCatalog,
+      sqlParser: ParserInterface)
+    extends SessionState(
+        sparkSession,
+        conf,
+        experimentalMethods,
+        functionRegistry,
+        catalog,
+        sqlParser) {}
+
+  object TestSessionState {
+
+    def makeTestConf: SQLConf = {
       new SQLConf {
         clear()
         override def clear(): Unit = {
@@ -45,10 +64,24 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
         }
       }
     }
-  } with SessionState(sparkSession)
+
+    def apply(sparkSession: SparkSession): TestSessionState = {
+      val conf = makeTestConf
+      val copyHelper = SessionState(sparkSession, Some(conf))
+
+      new TestSessionState(
+        sparkSession,
+        conf,
+        copyHelper.experimentalMethods,
+        copyHelper.functionRegistry,
+        copyHelper.catalog,
+        copyHelper.sqlParser
+      )
+    }
+  }
 
   @transient
-  protected[sql] override lazy val sessionState: SessionState = new TestSessionState(this)
+  protected[sql] override lazy val sessionState: SessionState = TestSessionState(this)
 
   // Needed for Java tests
   def loadTestData(): Unit = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index faa76b73fde4b..c6624e01e5a1f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -38,8 +38,8 @@ import org.apache.spark.sql.types._
  * cleaned up to integrate more nicely with [[HiveExternalCatalog]].
  */
 private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Logging {
-  private val sessionState = sparkSession.sessionState.asInstanceOf[HiveSessionState]
-  private lazy val tableRelationCache = sparkSession.sessionState.catalog.tableRelationCache
+  private def sessionState = sparkSession.sessionState.asInstanceOf[HiveSessionState]
+  private def tableRelationCache = sparkSession.sessionState.catalog.tableRelationCache
 
   private def getCurrentDatabase: String = sessionState.catalog.getCurrentDatabase
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 44ef5cce2ee05..d05781918e5a1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -75,6 +75,24 @@ private[sql] class HiveSessionCatalog(
     metastoreCatalog.hiveDefaultTableFilePath(name)
   }
 
+  def copy(associatedSparkSession: SparkSession): HiveSessionCatalog = {
+    val catalog = new HiveSessionCatalog(
+      externalCatalog,
+      globalTempViewManager,
+      associatedSparkSession,
+      functionResourceLoader,
+      functionRegistry,
+      conf,
+      hadoopConf,
+      parser)
+
+    catalog.currentDb = currentDb
+    // copy over temporary tables
+    tempTables.foreach(kv => catalog.tempTables.put(kv._1, kv._2))
+
+    catalog
+  }
+
   // For testing only
   private[hive] def getCachedDataSourceTable(table: TableIdentifier): LogicalPlan = {
     val key = metastoreCatalog.getQualifiedTableName(table)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 5a4057173dfc6..4f909ca15e385 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -18,11 +18,13 @@
 package org.apache.spark.sql.hive
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.Analyzer
-import org.apache.spark.sql.execution.SparkPlanner
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
+import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.execution.{SparkPlanner, SparkSqlParser}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.client.HiveClient
-import org.apache.spark.sql.internal.SessionState
+import org.apache.spark.sql.internal.{NonClosableMutableURLClassLoader, SessionState, SQLConf}
 
 
 /**
@@ -30,38 +32,22 @@ import org.apache.spark.sql.internal.SessionState
  */
 private[hive] class HiveSessionState(
     sparkSession: SparkSession,
-    parentHiveSessionState: Option[HiveSessionState])
-  extends SessionState(sparkSession, parentHiveSessionState) {
+    conf: SQLConf,
+    experimentalMethods: ExperimentalMethods,
+    functionRegistry: FunctionRegistry,
+    override val catalog: HiveSessionCatalog,
+    sqlParser: ParserInterface,
+    val metadataHive: HiveClient)
+  extends SessionState(
+    sparkSession,
+    conf,
+    experimentalMethods,
+    functionRegistry,
+    catalog,
+    sqlParser) {
 
   self =>
 
-  private[hive] def this(associatedSparkSession: SparkSession) = {
-    this(associatedSparkSession, None)
-  }
-
-  /**
-   * A Hive client used for interacting with the metastore.
-   */
-  val metadataHive: HiveClient =
-    parentHiveSessionState.map(_.metadataHive).getOrElse(
-      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
-        .newSession())
-
-  /**
-   * Internal catalog for managing table and database states.
-   */
-  override val catalog = {
-    new HiveSessionCatalog(
-      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
-      sparkSession.sharedState.globalTempViewManager,
-      sparkSession,
-      functionResourceLoader,
-      functionRegistry,
-      conf,
-      newHadoopConf(),
-      sqlParser)
-  }
-
   /**
    * An analyzer that uses the Hive metastore.
    */
@@ -156,7 +142,65 @@ private[hive] class HiveSessionState(
   }
 
   override def copy(associatedSparkSession: SparkSession): HiveSessionState = {
-    new HiveSessionState(associatedSparkSession, Some(this.asInstanceOf[HiveSessionState]))
+    val sqlParser: ParserInterface = new SparkSqlParser(conf)
+
+    new HiveSessionState(
+      associatedSparkSession,
+      conf.copy,
+      experimentalMethods.copy,
+      functionRegistry.copy,
+      catalog.copy(associatedSparkSession),
+      sqlParser,
+      metadataHive)
+  }
+
+}
+
+object HiveSessionState {
+
+  def apply(sparkSession: SparkSession): HiveSessionState = {
+    apply(sparkSession, None)
+  }
+
+  def apply(
+     sparkSession: SparkSession,
+     conf: Option[SQLConf]): HiveSessionState = {
+
+    val sqlConf = conf.getOrElse(new SQLConf)
+
+    val functionRegistry = FunctionRegistry.builtin.copy
+
+    val jarClassLoader: NonClosableMutableURLClassLoader = sparkSession.sharedState.jarClassLoader
+
+    val functionResourceLoader: FunctionResourceLoader =
+      SessionState.createFunctionResourceLoader(sparkSession, jarClassLoader)
+
+    val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
+
+    val catalog = new HiveSessionCatalog(
+      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
+      sparkSession.sharedState.globalTempViewManager,
+      sparkSession,
+      functionResourceLoader,
+      functionRegistry,
+      sqlConf,
+      SessionState.newHadoopConf(sparkSession, sqlConf),
+      sqlParser)
+
+
+    // A Hive client used for interacting with the metastore.
+    val metadataHive: HiveClient =
+      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+        .newSession()
+
+    new HiveSessionState(
+      sparkSession,
+      sqlConf,
+      new ExperimentalMethods,
+      functionRegistry,
+      catalog,
+      sqlParser,
+      metadataHive)
   }
 
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index c48f5e464944e..23120bd595026 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -30,12 +30,14 @@ import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{SparkSession, SQLContext}
-import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.{ExperimentalMethods, SparkSession, SQLContext}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.CacheTableCommand
 import org.apache.spark.sql.hive._
+import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.{SharedState, SQLConf}
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.util.{ShutdownHookManager, Utils}
@@ -145,8 +147,7 @@ private[hive] class TestHiveSparkSession(
   // TODO: Let's remove TestHiveSessionState. Otherwise, we are not really testing the reflection
   // logic based on the setting of CATALOG_IMPLEMENTATION.
   @transient
-  override lazy val sessionState: TestHiveSessionState =
-    new TestHiveSessionState(self)
+  override lazy val sessionState: TestHiveSessionState = TestHiveSessionState(self)
 
   override def newSession(): TestHiveSparkSession = {
     new TestHiveSparkSession(sc, Some(sharedState), loadTestTables)
@@ -491,10 +492,30 @@ private[hive] class TestHiveQueryExecution(
 }
 
 private[hive] class TestHiveSessionState(
-    sparkSession: TestHiveSparkSession)
-  extends HiveSessionState(sparkSession) { self =>
+    sparkSession: TestHiveSparkSession,
+    conf: SQLConf,
+    experimentalMethods: ExperimentalMethods,
+    functionRegistry: org.apache.spark.sql.catalyst.analysis.FunctionRegistry,
+    catalog: HiveSessionCatalog,
+    sqlParser: ParserInterface,
+    metadataHive: HiveClient)
+  extends HiveSessionState(
+      sparkSession,
+      conf,
+      experimentalMethods,
+      functionRegistry,
+      catalog,
+      sqlParser,
+      metadataHive) {
+
+  override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
+    new TestHiveQueryExecution(sparkSession, plan)
+  }
+}
+
+private[hive] object TestHiveSessionState {
 
-  override lazy val conf: SQLConf = {
+  def makeTestConf: SQLConf = {
     new SQLConf {
       clear()
       override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false)
@@ -505,9 +526,20 @@ private[hive] class TestHiveSessionState(
     }
   }
 
-  override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
-    new TestHiveQueryExecution(sparkSession, plan)
+  def apply(sparkSession: TestHiveSparkSession): TestHiveSessionState = {
+    val sqlConf = makeTestConf
+    val copyHelper = HiveSessionState(sparkSession, Some(sqlConf))
+
+    new TestHiveSessionState(
+      sparkSession,
+      sqlConf,
+      copyHelper.experimentalMethods,
+      copyHelper.functionRegistry,
+      copyHelper.catalog,
+      copyHelper.sqlParser,
+      copyHelper.metadataHive)
   }
+
 }
 
 

From 8c003449a910a5909fd0b80509ef7b3ef96d698b Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Wed, 15 Feb 2017 16:36:54 -0800
Subject: [PATCH 08/31] Remove unused import.

---
 .../main/scala/org/apache/spark/sql/ExperimentalMethods.scala   | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index 3909e2f1a47f2..3d846c7bbf256 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql
 
-import scala.collection.mutable.ListBuffer
-
 import org.apache.spark.annotation.{Experimental, InterfaceStability}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule

From f423f7481348c021d9a27986064dfbe389c5de77 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Thu, 16 Feb 2017 15:38:52 -0800
Subject: [PATCH 09/31] Remove SparkSession reference from SessionState.

---
 .../spark/sql/internal/SessionState.scala     | 145 ++++++------
 .../spark/sql/test/TestSQLContext.scala       |  45 ++--
 .../spark/sql/hive/HiveSessionState.scala     | 212 +++++++++++-------
 .../apache/spark/sql/hive/test/TestHive.scala |  55 +++--
 4 files changed, 280 insertions(+), 177 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 2148ba45d85e6..4ab2fa4e65c0c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -22,17 +22,16 @@ import java.io.File
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.SparkContext
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.command.AnalyzeTableCommand
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.streaming.{StreamingQueryManager}
+import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.util.ExecutionListenerManager
 
 
@@ -40,12 +39,16 @@ import org.apache.spark.sql.util.ExecutionListenerManager
  * A class that holds all session-specific state in a given [[SparkSession]].
  */
 private[sql] class SessionState(
-    sparkSession: SparkSession,
+    sparkContext: SparkContext,
     val conf: SQLConf,
     val experimentalMethods: ExperimentalMethods,
     val functionRegistry: FunctionRegistry,
     val catalog: SessionCatalog,
-    val sqlParser: ParserInterface) {
+    val sqlParser: ParserInterface,
+    val analyzer: Analyzer,
+    val streamingQueryManager: StreamingQueryManager,
+    val queryExecutionCreator: LogicalPlan => QueryExecution,
+    val jarClassLoader: NonClosableMutableURLClassLoader) {
 
   /*
    * Interface exposed to the user for registering user-defined functions.
@@ -53,23 +56,6 @@ private[sql] class SessionState(
    */
   val udf: UDFRegistration = new UDFRegistration(functionRegistry)
 
-  // Logical query plan analyzer for resolving unresolved attributes and relations.
-  val analyzer: Analyzer = {
-    new Analyzer(catalog, conf) {
-      override val extendedResolutionRules =
-        new FindDataSourceTable(sparkSession) ::
-        new ResolveDataSource(sparkSession) :: Nil
-
-      override val postHocResolutionRules =
-        AnalyzeCreateTable(sparkSession) ::
-        PreprocessTableInsertion(conf) ::
-        DataSourceAnalysis(conf) :: Nil
-
-      override val extendedCheckRules =
-        Seq(PreWriteCheck(conf, catalog), HiveOnlyCheck)
-    }
-  }
-
   // Logical query plan optimizer.
   val optimizer: Optimizer = new SparkOptimizer(catalog, conf, experimentalMethods)
 
@@ -79,22 +65,15 @@ private[sql] class SessionState(
    */
   val listenerManager: ExecutionListenerManager = new ExecutionListenerManager
 
-  // Interface to start and stop [[StreamingQuery]]s.
-  val streamingQueryManager: StreamingQueryManager = new StreamingQueryManager(sparkSession)
-
-  // Automatically extract all entries and put it in our SQLConf
-  // We need to call it after all of vals have been initialized.
-  sparkSession.sparkContext.getConf.getAll.foreach { case (k, v) =>
-    conf.setConfString(k, v)
-  }
-
   /**
    * Planner that converts optimized logical plans to physical plans.
    */
   def planner: SparkPlanner =
-    new SparkPlanner(sparkSession.sparkContext, conf, experimentalMethods.extraStrategies)
+    new SparkPlanner(sparkContext, conf, experimentalMethods.extraStrategies)
 
-  def newHadoopConf(): Configuration = SessionState.newHadoopConf(sparkSession, conf)
+  def newHadoopConf(): Configuration = SessionState.newHadoopConf(
+    sparkContext.hadoopConfiguration,
+    conf)
 
   def newHadoopConfWithOptions(options: Map[String, String]): Configuration = {
     val hadoopConf = newHadoopConf()
@@ -111,39 +90,38 @@ private[sql] class SessionState(
    */
   def copy(associatedSparkSession: SparkSession): SessionState = {
     val sqlConf = conf.copy
+    val catalogCopy = catalog.copy
     val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
+    val queryExecution = (plan: LogicalPlan) => new QueryExecution(associatedSparkSession, plan)
+
+    associatedSparkSession.sparkContext.getConf.getAll.foreach { case (k, v) =>
+      sqlConf.setConfString(k, v)
+    }
 
     new SessionState(
-      sparkSession,
+      associatedSparkSession.sparkContext,
       sqlConf,
       experimentalMethods.copy,
       functionRegistry.copy,
-      catalog.copy,
-      sqlParser)
+      catalogCopy,
+      sqlParser,
+      SessionState.createAnalyzer(associatedSparkSession, catalogCopy, sqlConf),
+      new StreamingQueryManager(associatedSparkSession),
+      queryExecution,
+      jarClassLoader)
   }
 
   // ------------------------------------------------------
   //  Helper methods, partially leftover from pre-2.0 days
   // ------------------------------------------------------
 
-  def executePlan(plan: LogicalPlan): QueryExecution = new QueryExecution(sparkSession, plan)
+  def executePlan(plan: LogicalPlan): QueryExecution = queryExecutionCreator(plan)
 
   def refreshTable(tableName: String): Unit = {
     catalog.refreshTable(sqlParser.parseTableIdentifier(tableName))
   }
 
-  private val jarClassLoader: NonClosableMutableURLClassLoader =
-    sparkSession.sharedState.jarClassLoader
-
-  def addJar(path: String): Unit = SessionState.addJar(sparkSession, path, jarClassLoader)
-
-  /**
-   * Analyzes the given table in the current database to generate statistics, which will be
-   * used in query optimizations.
-   */
-  def analyze(tableIdent: TableIdentifier, noscan: Boolean = true): Unit = {
-    AnalyzeTableCommand(tableIdent, noscan).run(sparkSession)
-  }
+  def addJar(path: String): Unit = SessionState.addJar(sparkContext, path, jarClassLoader)
 
 }
 
@@ -158,9 +136,16 @@ object SessionState {
       sparkSession: SparkSession,
       conf: Option[SQLConf]): SessionState = {
 
+    val sparkContext = sparkSession.sparkContext
+
     // SQL-specific key-value configurations.
     val sqlConf = conf.getOrElse(new SQLConf)
 
+    // Automatically extract all entries and put them in our SQLConf
+    sparkContext.getConf.getAll.foreach { case (k, v) =>
+      sqlConf.setConfString(k, v)
+    }
+
     // Internal catalog for managing functions registered by the user.
     val functionRegistry = FunctionRegistry.builtin.copy
 
@@ -168,7 +153,7 @@ object SessionState {
 
     // A class for loading resources specified by a function.
     val functionResourceLoader: FunctionResourceLoader =
-      createFunctionResourceLoader(sparkSession, jarClassLoader)
+      createFunctionResourceLoader(sparkContext, jarClassLoader)
 
     // Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
     val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
@@ -180,27 +165,39 @@ object SessionState {
       functionResourceLoader,
       functionRegistry,
       sqlConf,
-      newHadoopConf(sparkSession, sqlConf),
+      newHadoopConf(sparkContext.hadoopConfiguration, sqlConf),
       sqlParser)
 
+    // Logical query plan analyzer for resolving unresolved attributes and relations.
+    val analyzer: Analyzer = createAnalyzer(sparkSession, catalog, sqlConf)
+
+    // Interface to start and stop [[StreamingQuery]]s.
+    val streamingQueryManager: StreamingQueryManager = new StreamingQueryManager(sparkSession)
+
+    val queryExecution = (plan: LogicalPlan) => new QueryExecution(sparkSession, plan)
+
     new SessionState(
-      sparkSession,
+      sparkContext,
       sqlConf,
       new ExperimentalMethods,
       functionRegistry,
       catalog,
-      sqlParser)
+      sqlParser,
+      analyzer,
+      streamingQueryManager,
+      queryExecution,
+      jarClassLoader)
   }
 
   def createFunctionResourceLoader(
-      sparkSession: SparkSession,
+      sparkContext: SparkContext,
       jarClassLoader: NonClosableMutableURLClassLoader): FunctionResourceLoader = {
 
     new FunctionResourceLoader {
       override def loadResource(resource: FunctionResource): Unit = {
         resource.resourceType match {
-          case JarResource => addJar(sparkSession, resource.uri, jarClassLoader)
-          case FileResource => sparkSession.sparkContext.addFile(resource.uri)
+          case JarResource => addJar(sparkContext, resource.uri, jarClassLoader)
+          case FileResource => sparkContext.addFile(resource.uri)
           case ArchiveResource =>
             throw new AnalysisException(
               "Archive is not allowed to be loaded. If YARN mode is used, " +
@@ -210,12 +207,38 @@ object SessionState {
     }
   }
 
-  def addJar(
+  def newHadoopConf(copyHadoopConf: Configuration, sqlConf: SQLConf): Configuration = {
+    val hadoopConf = new Configuration(copyHadoopConf)
+    sqlConf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, v) }
+    hadoopConf
+  }
+
+  def createAnalyzer(
       sparkSession: SparkSession,
+      catalog: SessionCatalog,
+      sqlConf: SQLConf): Analyzer = {
+
+    new Analyzer(catalog, sqlConf) {
+      override val extendedResolutionRules =
+        new FindDataSourceTable(sparkSession) ::
+        new ResolveDataSource(sparkSession) :: Nil
+
+      override val postHocResolutionRules =
+        AnalyzeCreateTable(sparkSession) ::
+        PreprocessTableInsertion(sqlConf) ::
+        DataSourceAnalysis(sqlConf) :: Nil
+
+      override val extendedCheckRules =
+        Seq(PreWriteCheck(sqlConf, catalog), HiveOnlyCheck)
+    }
+  }
+
+  def addJar(
+      sparkContext: SparkContext,
       path: String,
       jarClassLoader: NonClosableMutableURLClassLoader): Unit = {
 
-    sparkSession.sparkContext.addJar(path)
+    sparkContext.addJar(path)
 
     val uri = new Path(path).toUri
     val jarURL = if (uri.getScheme == null) {
@@ -229,10 +252,4 @@ object SessionState {
     Thread.currentThread().setContextClassLoader(jarClassLoader)
   }
 
-  def newHadoopConf(sparkSession: SparkSession, conf: SQLConf): Configuration = {
-    val hadoopConf = new Configuration(sparkSession.sparkContext.hadoopConfiguration)
-    conf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, v) }
-    hadoopConf
-  }
-
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index da00320d84877..3b17d18ea5be7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -19,10 +19,13 @@ package org.apache.spark.sql.test
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql.{ExperimentalMethods, SparkSession}
-import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.internal.{SessionState, SQLConf}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.internal.{NonClosableMutableURLClassLoader, SessionState, SQLConf}
+import org.apache.spark.sql.streaming.StreamingQueryManager
 
 /**
  * A special [[SparkSession]] prepared for testing.
@@ -38,23 +41,31 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
   }
 
   class TestSessionState(
-      sparkSession: SparkSession,
+      sparkContext: SparkContext,
       conf: SQLConf,
       experimentalMethods: ExperimentalMethods,
       functionRegistry: FunctionRegistry,
       catalog: SessionCatalog,
-      sqlParser: ParserInterface)
+      sqlParser: ParserInterface,
+      analyzer: Analyzer,
+      streamingQueryManager: StreamingQueryManager,
+      queryExecution: LogicalPlan => QueryExecution,
+      jarClassLoader: NonClosableMutableURLClassLoader)
     extends SessionState(
-        sparkSession,
+        sparkContext,
         conf,
         experimentalMethods,
         functionRegistry,
         catalog,
-        sqlParser) {}
+        sqlParser,
+        analyzer,
+        streamingQueryManager,
+        queryExecution,
+        jarClassLoader) {}
 
   object TestSessionState {
 
-    def makeTestConf: SQLConf = {
+    def createTestConf: SQLConf = {
       new SQLConf {
         clear()
         override def clear(): Unit = {
@@ -66,16 +77,20 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
     }
 
     def apply(sparkSession: SparkSession): TestSessionState = {
-      val conf = makeTestConf
-      val copyHelper = SessionState(sparkSession, Some(conf))
+      val sqlConf = createTestConf
+      val initHelper = SessionState(sparkSession, Some(sqlConf))
 
       new TestSessionState(
-        sparkSession,
-        conf,
-        copyHelper.experimentalMethods,
-        copyHelper.functionRegistry,
-        copyHelper.catalog,
-        copyHelper.sqlParser
+        sparkSession.sparkContext,
+        sqlConf,
+        initHelper.experimentalMethods,
+        initHelper.functionRegistry,
+        initHelper.catalog,
+        initHelper.sqlParser,
+        initHelper.analyzer,
+        initHelper.streamingQueryManager,
+        initHelper.queryExecutionCreator,
+        initHelper.jarClassLoader
       )
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 4f909ca15e385..5f5952a133cf9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -17,85 +17,51 @@
 
 package org.apache.spark.sql.hive
 
+import org.apache.spark.SparkContext
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.execution.{SparkPlanner, SparkSqlParser}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.{QueryExecution, SparkPlanner, SparkSqlParser}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.{NonClosableMutableURLClassLoader, SessionState, SQLConf}
+import org.apache.spark.sql.streaming.StreamingQueryManager
 
 
 /**
  * A class that holds all session-specific state in a given [[SparkSession]] backed by Hive.
  */
 private[hive] class HiveSessionState(
-    sparkSession: SparkSession,
+    sparkContext: SparkContext,
     conf: SQLConf,
     experimentalMethods: ExperimentalMethods,
     functionRegistry: FunctionRegistry,
     override val catalog: HiveSessionCatalog,
     sqlParser: ParserInterface,
-    val metadataHive: HiveClient)
+    val metadataHive: HiveClient,
+    override val analyzer: Analyzer,
+    streamingQueryManager: StreamingQueryManager,
+    queryExecutionCreator: LogicalPlan => QueryExecution,
+    jarClassLoader: NonClosableMutableURLClassLoader,
+    val plannerCreator: () => SparkPlanner)
   extends SessionState(
-    sparkSession,
-    conf,
-    experimentalMethods,
-    functionRegistry,
-    catalog,
-    sqlParser) {
-
-  self =>
-
-  /**
-   * An analyzer that uses the Hive metastore.
-   */
-  override val analyzer: Analyzer = {
-    new Analyzer(catalog, conf) {
-      override val extendedResolutionRules =
-        catalog.ParquetConversions ::
-        catalog.OrcConversions ::
-        new DetermineHiveSerde(conf) ::
-        new FindDataSourceTable(sparkSession) ::
-        new FindHiveSerdeTable(sparkSession) ::
-        new ResolveDataSource(sparkSession) :: Nil
-
-      override val postHocResolutionRules =
-        AnalyzeCreateTable(sparkSession) ::
-        PreprocessTableInsertion(conf) ::
-        DataSourceAnalysis(conf) ::
-        new HiveAnalysis(sparkSession) :: Nil
-
-      override val extendedCheckRules = Seq(PreWriteCheck(conf, catalog))
-    }
-  }
+      sparkContext,
+      conf,
+      experimentalMethods,
+      functionRegistry,
+      catalog,
+      sqlParser,
+      analyzer,
+      streamingQueryManager,
+      queryExecutionCreator,
+      jarClassLoader) { self =>
 
   /**
    * Planner that takes into account Hive-specific strategies.
    */
-  override def planner: SparkPlanner = {
-    new SparkPlanner(sparkSession.sparkContext, conf, experimentalMethods.extraStrategies)
-      with HiveStrategies {
-      override val sparkSession: SparkSession = self.sparkSession
-
-      override def strategies: Seq[Strategy] = {
-        experimentalMethods.extraStrategies ++ Seq(
-          FileSourceStrategy,
-          DataSourceStrategy,
-          DDLStrategy,
-          SpecialLimits,
-          InMemoryScans,
-          HiveTableScans,
-          Scripts,
-          Aggregation,
-          JoinSelection,
-          BasicOperators
-        )
-      }
-    }
-  }
-
+  override def planner: SparkPlanner = plannerCreator()
 
   // ------------------------------------------------------
   //  Helper methods, partially leftover from pre-2.0 days
@@ -142,16 +108,29 @@ private[hive] class HiveSessionState(
   }
 
   override def copy(associatedSparkSession: SparkSession): HiveSessionState = {
-    val sqlParser: ParserInterface = new SparkSqlParser(conf)
+    val sqlConf = conf.copy
+    val copyHelper = SessionState(associatedSparkSession, Some(sqlConf))
+    val catalogCopy = catalog.copy(associatedSparkSession)
+    val hiveClient =
+      associatedSparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+        .newSession()
 
     new HiveSessionState(
-      associatedSparkSession,
-      conf.copy,
-      experimentalMethods.copy,
-      functionRegistry.copy,
-      catalog.copy(associatedSparkSession),
-      sqlParser,
-      metadataHive)
+      associatedSparkSession.sparkContext,
+      sqlConf,
+      copyHelper.experimentalMethods,
+      copyHelper.functionRegistry,
+      catalogCopy,
+      copyHelper.sqlParser,
+      hiveClient,
+      HiveSessionState.createAnalyzer(associatedSparkSession, catalogCopy, sqlConf),
+      copyHelper.streamingQueryManager,
+      copyHelper.queryExecutionCreator,
+      jarClassLoader,
+      HiveSessionState.createPlannerCreator(
+        associatedSparkSession,
+        sqlConf,
+        copyHelper.experimentalMethods))
   }
 
 }
@@ -163,44 +142,123 @@ object HiveSessionState {
   }
 
   def apply(
-     sparkSession: SparkSession,
-     conf: Option[SQLConf]): HiveSessionState = {
+      associatedSparkSession: SparkSession,
+      conf: Option[SQLConf]): HiveSessionState = {
+
+    val sparkContext = associatedSparkSession.sparkContext
 
     val sqlConf = conf.getOrElse(new SQLConf)
 
+    sparkContext.getConf.getAll.foreach { case (k, v) =>
+      sqlConf.setConfString(k, v)
+    }
+
     val functionRegistry = FunctionRegistry.builtin.copy
 
-    val jarClassLoader: NonClosableMutableURLClassLoader = sparkSession.sharedState.jarClassLoader
+    val experimentalMethods = new ExperimentalMethods
+
+    val jarClassLoader: NonClosableMutableURLClassLoader =
+      associatedSparkSession.sharedState.jarClassLoader
 
     val functionResourceLoader: FunctionResourceLoader =
-      SessionState.createFunctionResourceLoader(sparkSession, jarClassLoader)
+      SessionState.createFunctionResourceLoader(sparkContext, jarClassLoader)
 
     val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
 
     val catalog = new HiveSessionCatalog(
-      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
-      sparkSession.sharedState.globalTempViewManager,
-      sparkSession,
+      associatedSparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
+      associatedSparkSession.sharedState.globalTempViewManager,
+      associatedSparkSession,
       functionResourceLoader,
       functionRegistry,
       sqlConf,
-      SessionState.newHadoopConf(sparkSession, sqlConf),
+      SessionState.newHadoopConf(sparkContext.hadoopConfiguration, sqlConf),
       sqlParser)
 
-
     // A Hive client used for interacting with the metastore.
     val metadataHive: HiveClient =
-      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+      associatedSparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
         .newSession()
 
+    // An analyzer that uses the Hive metastore.
+    val analyzer: Analyzer = createAnalyzer(associatedSparkSession, catalog, sqlConf)
+
+    val streamingQueryManager: StreamingQueryManager =
+      new StreamingQueryManager(associatedSparkSession)
+
+    val queryExecutionCreator = (plan: LogicalPlan) =>
+      new QueryExecution(associatedSparkSession, plan)
+
+    val plannerCreator = createPlannerCreator(associatedSparkSession, sqlConf, experimentalMethods)
+
     new HiveSessionState(
-      sparkSession,
+      sparkContext,
       sqlConf,
-      new ExperimentalMethods,
+      experimentalMethods,
       functionRegistry,
       catalog,
       sqlParser,
-      metadataHive)
+      metadataHive,
+      analyzer,
+      streamingQueryManager,
+      queryExecutionCreator,
+      jarClassLoader,
+      plannerCreator)
+  }
+
+  def createAnalyzer(
+      sparkSession: SparkSession,
+      catalog: HiveSessionCatalog,
+      sqlConf: SQLConf): Analyzer = {
+
+    new Analyzer(catalog, sqlConf) {
+      override val extendedResolutionRules =
+        catalog.ParquetConversions ::
+          catalog.OrcConversions ::
+          new DetermineHiveSerde(sqlConf) ::
+          new FindDataSourceTable(sparkSession) ::
+          new FindHiveSerdeTable(sparkSession) ::
+          new ResolveDataSource(sparkSession) :: Nil
+
+      override val postHocResolutionRules =
+        AnalyzeCreateTable(sparkSession) ::
+          PreprocessTableInsertion(sqlConf) ::
+          DataSourceAnalysis(sqlConf) ::
+          new HiveAnalysis(sparkSession) :: Nil
+
+      override val extendedCheckRules = Seq(PreWriteCheck(sqlConf, catalog))
+    }
+  }
+
+  def createPlannerCreator(
+      associatedSparkSession: SparkSession,
+      sqlConf: SQLConf,
+      experimentalMethods: ExperimentalMethods): () => SparkPlanner = {
+
+    () =>
+      new SparkPlanner(
+          associatedSparkSession.sparkContext,
+          sqlConf,
+          experimentalMethods.extraStrategies)
+        with HiveStrategies {
+
+        override val sparkSession: SparkSession = associatedSparkSession
+
+        override def strategies: Seq[Strategy] = {
+          experimentalMethods.extraStrategies ++ Seq(
+            FileSourceStrategy,
+            DataSourceStrategy,
+            DDLStrategy,
+            SpecialLimits,
+            InMemoryScans,
+            HiveTableScans,
+            Scripts,
+            Aggregation,
+            JoinSelection,
+            BasicOperators
+          )
+        }
+      }
   }
 
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 23120bd595026..0a0bba8b7c5c2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -31,15 +31,16 @@ import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{ExperimentalMethods, SparkSession, SQLContext}
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.execution.{QueryExecution, SparkPlanner}
 import org.apache.spark.sql.execution.command.CacheTableCommand
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.client.HiveClient
-import org.apache.spark.sql.internal.{SharedState, SQLConf}
+import org.apache.spark.sql.internal.{NonClosableMutableURLClassLoader, SharedState, SQLConf}
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.util.{ShutdownHookManager, Utils}
 
 // SPARK-3729: Test key required to check for initialization errors with config.
@@ -492,30 +493,35 @@ private[hive] class TestHiveQueryExecution(
 }
 
 private[hive] class TestHiveSessionState(
-    sparkSession: TestHiveSparkSession,
+    sparkContext: SparkContext,
     conf: SQLConf,
     experimentalMethods: ExperimentalMethods,
     functionRegistry: org.apache.spark.sql.catalyst.analysis.FunctionRegistry,
     catalog: HiveSessionCatalog,
     sqlParser: ParserInterface,
-    metadataHive: HiveClient)
+    metadataHive: HiveClient,
+    analyzer: Analyzer,
+    streamingQueryManager: StreamingQueryManager,
+    queryExecutionCreator: LogicalPlan => TestHiveQueryExecution,
+    jarClassLoader: NonClosableMutableURLClassLoader,
+    plannerCreator: () => SparkPlanner)
   extends HiveSessionState(
-      sparkSession,
+      sparkContext,
       conf,
       experimentalMethods,
       functionRegistry,
       catalog,
       sqlParser,
-      metadataHive) {
-
-  override def executePlan(plan: LogicalPlan): TestHiveQueryExecution = {
-    new TestHiveQueryExecution(sparkSession, plan)
-  }
-}
+      metadataHive,
+      analyzer,
+      streamingQueryManager,
+      queryExecutionCreator,
+      jarClassLoader,
+      plannerCreator) {}
 
 private[hive] object TestHiveSessionState {
 
-  def makeTestConf: SQLConf = {
+  def createTestConf: SQLConf = {
     new SQLConf {
       clear()
       override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false)
@@ -527,17 +533,24 @@ private[hive] object TestHiveSessionState {
   }
 
   def apply(sparkSession: TestHiveSparkSession): TestHiveSessionState = {
-    val sqlConf = makeTestConf
-    val copyHelper = HiveSessionState(sparkSession, Some(sqlConf))
+    val sqlConf = createTestConf
+    val initHelper = HiveSessionState(sparkSession, Some(sqlConf))
+    val queryExecutionCreator = (plan: LogicalPlan) =>
+      new TestHiveQueryExecution(sparkSession, plan)
 
     new TestHiveSessionState(
-      sparkSession,
+      sparkSession.sparkContext,
       sqlConf,
-      copyHelper.experimentalMethods,
-      copyHelper.functionRegistry,
-      copyHelper.catalog,
-      copyHelper.sqlParser,
-      copyHelper.metadataHive)
+      initHelper.experimentalMethods,
+      initHelper.functionRegistry,
+      initHelper.catalog,
+      initHelper.sqlParser,
+      initHelper.metadataHive,
+      initHelper.analyzer,
+      initHelper.streamingQueryManager,
+      queryExecutionCreator,
+      initHelper.jarClassLoader,
+      initHelper.plannerCreator)
   }
 
 }

From 2cee190eb6c6902d39f68c25d928fbd5aaa522bc Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Thu, 16 Feb 2017 16:24:20 -0800
Subject: [PATCH 10/31] Fix initialization loop.

---
 .../org/apache/spark/sql/execution/datasources/rules.scala   | 2 +-
 .../org/apache/spark/sql/SparkSessionBuilderSuite.scala      | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 1c3e7c6d52239..bdffa047350a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -67,7 +67,7 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
  * Preprocess [[CreateTable]], to do some normalization and checking.
  */
 case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[LogicalPlan] {
-  private val catalog = sparkSession.sessionState.catalog
+  private def catalog = sparkSession.sessionState.catalog
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     // When we CREATE TABLE without specifying the table schema, we should fail the query if
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index d4f557c7a750a..4c65e128c3e15 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -188,4 +188,9 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
     forkedSession.stop()
     activeSession.stop()
   }
+
+  test("fork new session and run query on inherited table") {
+    val activeSession = SparkSession.builder().master("local").getOrCreate()
+
+  }
 }

From e2bbfa8c81f91c57f5628e771f42d414a1031d57 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Thu, 16 Feb 2017 17:07:31 -0800
Subject: [PATCH 11/31] Fix var name error.

---
 .../scala/org/apache/spark/sql/hive/HiveSessionState.scala   | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index e44d65648683a..7290528168c74 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -223,8 +223,8 @@ object HiveSessionState {
         catalog.ParquetConversions ::
         catalog.OrcConversions ::
         PreprocessTableCreation(sparkSession) ::
-        PreprocessTableInsertion(conf) ::
-        DataSourceAnalysis(conf) ::
+        PreprocessTableInsertion(sqlConf) ::
+        DataSourceAnalysis(sqlConf) ::
         HiveAnalysis :: Nil
 
       override val extendedCheckRules = Seq(PreWriteCheck)
@@ -249,7 +249,6 @@ object HiveSessionState {
           experimentalMethods.extraStrategies ++ Seq(
             FileSourceStrategy,
             DataSourceStrategy,
-            DDLStrategy,
             SpecialLimits,
             InMemoryScans,
             HiveTableScans,

From 8ac778ab444f90eadd22d36b91889d81ef593d44 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Fri, 17 Feb 2017 17:19:32 -0800
Subject: [PATCH 12/31] Add tests. Refactor. Temporarily disable subtest
 SPARK-18360: default table path of tables in default database should depend
 on the location of the default database.

---
 .../spark/sql/internal/SessionState.scala     |  4 +-
 .../spark/sql/SparkSessionBuilderSuite.scala  | 28 +++++++-
 .../spark/sql/hive/HiveSessionCatalog.scala   |  4 +-
 .../spark/sql/hive/HiveSessionState.scala     | 67 +++++++------------
 .../sql/hive/HiveSessionStateSuite.scala      | 54 +++++++++++++++
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |  2 +
 6 files changed, 110 insertions(+), 49 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 6d645c5b96b84..6b46f6e648c8d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -174,7 +174,7 @@ object SessionState {
     // Interface to start and stop [[StreamingQuery]]s.
     val streamingQueryManager: StreamingQueryManager = new StreamingQueryManager(sparkSession)
 
-    val queryExecution = (plan: LogicalPlan) => new QueryExecution(sparkSession, plan)
+    val queryExecutionCreator = (plan: LogicalPlan) => new QueryExecution(sparkSession, plan)
 
     new SessionState(
       sparkContext,
@@ -185,7 +185,7 @@ object SessionState {
       sqlParser,
       analyzer,
       streamingQueryManager,
-      queryExecution,
+      queryExecutionCreator,
       jarClassLoader)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 4c65e128c3e15..bdf6728995481 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -20,15 +20,16 @@ package org.apache.spark.sql
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.test.SharedSQLContext
 
 /**
  * Test cases for the builder pattern of [[SparkSession]].
  */
-class SparkSessionBuilderSuite extends SparkFunSuite {
+class SparkSessionBuilderSuite extends SparkFunSuite with SharedSQLContext {
 
   private var initialSession: SparkSession = _
 
-  private lazy val sparkContext: SparkContext = {
+  override lazy val sparkContext: SparkContext = {
     initialSession = SparkSession.builder()
       .master("local")
       .config("spark.ui.enabled", value = false)
@@ -190,7 +191,30 @@ class SparkSessionBuilderSuite extends SparkFunSuite {
   }
 
   test("fork new session and run query on inherited table") {
+    import testImplicits._
+
+    def checkTableExists(sparkSession: SparkSession): Unit = {
+      QueryTest.checkAnswer(sql(
+        """
+          |SELECT x.str, COUNT(*)
+          |FROM df x JOIN df y ON x.str = y.str
+          |GROUP BY x.str
+        """.stripMargin),
+        Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
+    }
+
     val activeSession = SparkSession.builder().master("local").getOrCreate()
+    SparkSession.setActiveSession(activeSession)
 
+    Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str").createOrReplaceTempView("df")
+    checkTableExists(activeSession)
+
+    val forkedSession = activeSession.cloneSession()
+    SparkSession.setActiveSession(forkedSession)
+    checkTableExists(forkedSession)
+
+    SparkSession.clearActiveSession()
+    forkedSession.stop()
+    activeSession.stop()
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 7685cecb684cd..62ebbeb10b4cc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -77,11 +77,11 @@ private[sql] class HiveSessionCatalog(
     metastoreCatalog.hiveDefaultTableFilePath(name)
   }
 
-  def copy(associatedSparkSession: SparkSession): HiveSessionCatalog = {
+  def copy(sparkSession: SparkSession): HiveSessionCatalog = {
     val catalog = new HiveSessionCatalog(
       externalCatalog,
       globalTempViewManager,
-      associatedSparkSession,
+      sparkSession,
       functionResourceLoader,
       functionRegistry,
       conf,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 7290528168c74..5345c964570a7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -143,67 +143,48 @@ object HiveSessionState {
   }
 
   def apply(
-      associatedSparkSession: SparkSession,
+      sparkSession: SparkSession,
       conf: Option[SQLConf]): HiveSessionState = {
 
-    val sparkContext = associatedSparkSession.sparkContext
-
-    val sqlConf = conf.getOrElse(new SQLConf)
-
-    sparkContext.getConf.getAll.foreach { case (k, v) =>
-      sqlConf.setConfString(k, v)
-    }
-
-    val functionRegistry = FunctionRegistry.builtin.copy
+    val initHelper = SessionState(sparkSession, conf)
 
-    val experimentalMethods = new ExperimentalMethods
-
-    val jarClassLoader: NonClosableMutableURLClassLoader =
-      associatedSparkSession.sharedState.jarClassLoader
-
-    val functionResourceLoader: FunctionResourceLoader =
-      SessionState.createFunctionResourceLoader(sparkContext, jarClassLoader)
-
-    val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
+    val sparkContext = sparkSession.sparkContext
 
     val catalog = new HiveSessionCatalog(
-      associatedSparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
-      associatedSparkSession.sharedState.globalTempViewManager,
-      associatedSparkSession,
-      functionResourceLoader,
-      functionRegistry,
-      sqlConf,
-      SessionState.newHadoopConf(sparkContext.hadoopConfiguration, sqlConf),
-      sqlParser)
+      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
+      sparkSession.sharedState.globalTempViewManager,
+      sparkSession,
+      SessionState.createFunctionResourceLoader(sparkContext, initHelper.jarClassLoader),
+      initHelper.functionRegistry,
+      initHelper.conf,
+      SessionState.newHadoopConf(sparkContext.hadoopConfiguration, initHelper.conf),
+      initHelper.sqlParser)
 
     // A Hive client used for interacting with the metastore.
     val metadataHive: HiveClient =
-      associatedSparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
         .newSession()
 
     // An analyzer that uses the Hive metastore.
-    val analyzer: Analyzer = createAnalyzer(associatedSparkSession, catalog, sqlConf)
-
-    val streamingQueryManager: StreamingQueryManager =
-      new StreamingQueryManager(associatedSparkSession)
-
-    val queryExecutionCreator = (plan: LogicalPlan) =>
-      new QueryExecution(associatedSparkSession, plan)
+    val analyzer: Analyzer = createAnalyzer(sparkSession, catalog, initHelper.conf)
 
-    val plannerCreator = createPlannerCreator(associatedSparkSession, sqlConf, experimentalMethods)
+    val plannerCreator = createPlannerCreator(
+      sparkSession,
+      initHelper.conf,
+      initHelper.experimentalMethods)
 
     new HiveSessionState(
       sparkContext,
-      sqlConf,
-      experimentalMethods,
-      functionRegistry,
+      initHelper.conf,
+      initHelper.experimentalMethods,
+      initHelper.functionRegistry,
       catalog,
-      sqlParser,
+      initHelper.sqlParser,
       metadataHive,
       analyzer,
-      streamingQueryManager,
-      queryExecutionCreator,
-      jarClassLoader,
+      initHelper.streamingQueryManager,
+      initHelper.queryExecutionCreator,
+      initHelper.jarClassLoader,
       plannerCreator)
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
new file mode 100644
index 0000000000000..335466373124e
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.sql.{QueryTest, Row, SparkSession}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.test.SharedSQLContext
+
+class HiveSessionStateSuite  extends SparkFunSuite with SharedSQLContext {
+
+  test("fork new session and inherit a copy of the session state") {
+    val activeSession = SparkSession.builder().master("local").enableHiveSupport().getOrCreate()
+    val forkedSession = activeSession.cloneSession()
+
+    assert(forkedSession ne activeSession)
+    assert(forkedSession.sessionState ne activeSession.sessionState)
+    assert(forkedSession.sessionState.isInstanceOf[HiveSessionState])
+
+    forkedSession.stop()
+    activeSession.stop()
+  }
+
+  test("fork new session and inherit function registry and udf") {
+    val activeSession = SparkSession.builder().master("local").getOrCreate()
+    activeSession.udf.register("strlenScala", (_: String).length + (_: Int))
+    val forkedSession = activeSession.cloneSession()
+
+    assert(forkedSession ne activeSession)
+    assert(forkedSession.sessionState.functionRegistry ne
+      activeSession.sessionState.functionRegistry)
+    assert(forkedSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
+
+    forkedSession.stop()
+    activeSession.stop()
+  }
+
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 8f0d5d886c9d5..818ff55660125 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -862,6 +862,7 @@ object SPARK_18360 {
         schema = new StructType().add("i", "int"),
         provider = Some(DDLUtils.HIVE_PROVIDER))
 
+      /* TODO: SPARK-19540 re-enable this assert
       val newWarehousePath = Utils.createTempDir().getAbsolutePath
       hiveClient.runSqlHive(s"SET hive.metastore.warehouse.dir=$newWarehousePath")
       hiveClient.createTable(tableMeta, ignoreIfExists = false)
@@ -870,6 +871,7 @@ object SPARK_18360 {
       // location for tables in default database.
       assert(rawTable.storage.locationUri.get.contains(newWarehousePath))
       hiveClient.dropTable("default", "test_tbl", ignoreIfNotExists = false, purge = false)
+      */
 
       spark.sharedState.externalCatalog.createTable(tableMeta, ignoreIfExists = false)
       val readBack = spark.sharedState.externalCatalog.getTable("default", "test_tbl")

From 3c995e1b4b347fbe9f6339b6362f9ae906278136 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Mon, 20 Feb 2017 19:20:32 -0800
Subject: [PATCH 13/31] Fix copy of SessionCatalog. Changes from review.

---
 .../spark/sql/catalyst/CatalystConf.scala     | 22 +++++-
 .../catalyst/analysis/FunctionRegistry.scala  |  4 +-
 .../sql/catalyst/catalog/SessionCatalog.scala | 10 ++-
 .../org/apache/spark/sql/SparkSession.scala   | 11 +--
 .../spark/sql/internal/SessionState.scala     | 27 ++++---
 .../spark/sql/hive/HiveSessionCatalog.scala   | 74 ++++++++++++++-----
 .../spark/sql/hive/HiveSessionState.scala     | 28 ++++---
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |  2 +
 8 files changed, 128 insertions(+), 50 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
index 5f50ce1ba68ff..bfa019363a97b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
@@ -60,6 +60,8 @@ trait CatalystConf {
    * Enables CBO for estimation of plan statistics when set true.
    */
   def cboEnabled: Boolean
+
+  def copy: CatalystConf
 }
 
 
@@ -77,4 +79,22 @@ case class SimpleCatalystConf(
     cboEnabled: Boolean = false,
     warehousePath: String = "/user/hive/warehouse",
     sessionLocalTimeZone: String = TimeZone.getDefault().getID)
-  extends CatalystConf
+  extends CatalystConf {
+
+  override def copy: SimpleCatalystConf = {
+    SimpleCatalystConf(
+      caseSensitiveAnalysis,
+      orderByOrdinal,
+      groupByOrdinal,
+      optimizerMaxIterations,
+      optimizerInSetConversionThreshold,
+      maxCaseBranchesForCodegen,
+      tableRelationCacheSize,
+      runSQLonFile,
+      crossJoinEnabled,
+      cboEnabled,
+      warehousePath,
+      sessionLocalTimeZone)
+  }
+
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 59286ad135975..8a623f5513162 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -152,9 +152,7 @@ object EmptyFunctionRegistry extends FunctionRegistry {
     throw new UnsupportedOperationException
   }
 
-  override def copy: FunctionRegistry = {
-    throw new UnsupportedOperationException
-  }
+  override def copy: FunctionRegistry = this
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 24c25174a53fd..0ee18ad84a8bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1184,8 +1184,16 @@ class SessionCatalog(
    * Get an identical copy of the `SessionCatalog`.
    * The temporary tables and function registry are retained.
    * The table relation cache will not be populated.
+   * @note `externalCatalog` and `globalTempViewManager` are from shared state, don't need deep copy
+   * `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
+   * All arguments passed in should be associated with a particular `SparkSession`.
    */
-  def copy: SessionCatalog = {
+  def copy(
+      conf: CatalystConf,
+      hadoopConf: Configuration,
+      functionRegistry: FunctionRegistry,
+      parser: ParserInterface): SessionCatalog = {
+
     val catalog = new SessionCatalog(
       externalCatalog,
       globalTempViewManager,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 4f4da0ff8cd70..670fbfea4ac95 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -72,7 +72,7 @@ import org.apache.spark.util.Utils
 class SparkSession private(
     @transient val sparkContext: SparkContext,
     @transient private val existingSharedState: Option[SharedState],
-    existingSessionState: Option[SessionState])
+    @transient private val parentSessionState: Option[SessionState])
   extends Serializable with Closeable with Logging { self =>
 
   private[sql] def this(sc: SparkContext, existingSharedState: Option[SharedState]) {
@@ -108,10 +108,11 @@ class SparkSession private(
   /**
    * State isolated across sessions, including SQL configurations, temporary tables, registered
    * functions, and everything else that accepts a [[org.apache.spark.sql.internal.SQLConf]].
+   * If `parentSessionState` is not null, the `SessionState` will be a copy of the parent.
    */
   @transient
   private[sql] lazy val sessionState: SessionState = {
-    existingSessionState
+    parentSessionState
       .map(_.copy(this))
       .getOrElse(SparkSession.instantiateSessionState(
         SparkSession.sessionStateClassName(sparkContext.conf),
@@ -211,8 +212,8 @@ class SparkSession private(
   /**
    * :: Experimental ::
    * Create an identical copy of this `SparkSession`, sharing the underlying `SparkContext`
-   * and cached data. SessionState (SQL configurations, temporary tables, registered functions)
-   * is also copied over.
+   * and cached data. All the state of this session (i.e. SQL configurations, temporary tables,
+   * registered functions) is also copied over.
    * Changes to base session are not propagated to cloned session, cloned is independent
    * after creation.
    *
@@ -982,7 +983,7 @@ object SparkSession {
   }
 
   /**
-   * Helper method to create an instance of `SessionState`
+   * Helper method to create an instance of `SessionState` based on `className` from conf.
    * The result is either `SessionState` or `HiveSessionState`
    */
   private def instantiateSessionState(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 6b46f6e648c8d..b59d1f460e5e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -88,25 +88,28 @@ private[sql] class SessionState(
   /**
    * Get an identical copy of the `SessionState` and associate it with the given `SparkSession`
    */
-  def copy(associatedSparkSession: SparkSession): SessionState = {
-    val sqlConf = conf.copy
-    val catalogCopy = catalog.copy
-    val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
-    val queryExecution = (plan: LogicalPlan) => new QueryExecution(associatedSparkSession, plan)
+  def copy(sparkSession: SparkSession): SessionState = {
+    val sparkContext = sparkSession.sparkContext
+    val confCopy = conf.copy
+    val hadoopConfCopy = SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy)
+    val functionRegistryCopy = functionRegistry.copy
+    val sqlParser: ParserInterface = new SparkSqlParser(confCopy)
+    val catalogCopy = catalog.copy(confCopy, hadoopConfCopy, functionRegistryCopy, sqlParser)
+    val queryExecution = (plan: LogicalPlan) => new QueryExecution(sparkSession, plan)
 
-    associatedSparkSession.sparkContext.getConf.getAll.foreach { case (k, v) =>
-      sqlConf.setConfString(k, v)
+    sparkContext.getConf.getAll.foreach { case (k, v) =>
+      confCopy.setConfString(k, v)
     }
 
     new SessionState(
-      associatedSparkSession.sparkContext,
-      sqlConf,
+      sparkContext,
+      confCopy,
       experimentalMethods.copy,
-      functionRegistry.copy,
+      functionRegistryCopy,
       catalogCopy,
       sqlParser,
-      SessionState.createAnalyzer(associatedSparkSession, catalogCopy, sqlConf),
-      new StreamingQueryManager(associatedSparkSession),
+      SessionState.createAnalyzer(sparkSession, catalogCopy, confCopy),
+      new StreamingQueryManager(sparkSession),
       queryExecution,
       jarClassLoader)
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 62ebbeb10b4cc..4d11041cec48b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -43,31 +43,25 @@ import org.apache.spark.util.Utils
 private[sql] class HiveSessionCatalog(
     externalCatalog: HiveExternalCatalog,
     globalTempViewManager: GlobalTempViewManager,
-    sparkSession: SparkSession,
+    private val metastoreCatalog: HiveMetastoreCatalog,
     functionResourceLoader: FunctionResourceLoader,
     functionRegistry: FunctionRegistry,
     conf: SQLConf,
     hadoopConf: Configuration,
     parser: ParserInterface)
   extends SessionCatalog(
-    externalCatalog,
-    globalTempViewManager,
-    functionResourceLoader,
-    functionRegistry,
-    conf,
-    hadoopConf,
-    parser) {
+      externalCatalog,
+      globalTempViewManager,
+      functionResourceLoader,
+      functionRegistry,
+      conf,
+      hadoopConf,
+      parser) {
 
   // ----------------------------------------------------------------
   // | Methods and fields for interacting with HiveMetastoreCatalog |
   // ----------------------------------------------------------------
 
-  // Catalog for handling data source tables. TODO: This really doesn't belong here since it is
-  // essentially a cache for metastore tables. However, it relies on a lot of session-specific
-  // things so it would be a lot of work to split its functionality between HiveSessionCatalog
-  // and HiveCatalog. We should still do it at some point...
-  private val metastoreCatalog = new HiveMetastoreCatalog(sparkSession)
-
   // These 2 rules must be run before all other DDL post-hoc resolution rules, i.e.
   // `PreprocessTableCreation`, `PreprocessTableInsertion`, `DataSourceAnalysis` and `HiveAnalysis`.
   val ParquetConversions: Rule[LogicalPlan] = metastoreCatalog.ParquetConversions
@@ -77,8 +71,24 @@ private[sql] class HiveSessionCatalog(
     metastoreCatalog.hiveDefaultTableFilePath(name)
   }
 
-  def copy(sparkSession: SparkSession): HiveSessionCatalog = {
-    val catalog = new HiveSessionCatalog(
+  /**
+   * Get an identical copy of the `SessionCatalog`.
+   * The temporary tables and function registry are retained.
+   * The table relation cache will not be populated.
+   * @note `externalCatalog` and `globalTempViewManager` are from shared state, don't need deep copy
+   * `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
+   * All arguments passed in should be associated with `sparkSession`.
+   * This should ideally override `SessionCatalog.copy` but does not at present, since
+   * `HiveMetastoreCatalog` is dependent on `SparkSession`.
+   */
+  def copy(
+      sparkSession: SparkSession,
+      conf: SQLConf,
+      hadoopConf: Configuration,
+      functionRegistry: FunctionRegistry,
+      parser: ParserInterface): HiveSessionCatalog = {
+
+    val catalog = HiveSessionCatalog(
       externalCatalog,
       globalTempViewManager,
       sparkSession,
@@ -98,7 +108,7 @@ private[sql] class HiveSessionCatalog(
   // For testing only
   private[hive] def getCachedDataSourceTable(table: TableIdentifier): LogicalPlan = {
     val key = metastoreCatalog.getQualifiedTableName(table)
-    sparkSession.sessionState.catalog.tableRelationCache.getIfPresent(key)
+    tableRelationCache.getIfPresent(key)
   }
 
   override def makeFunctionBuilder(funcName: String, className: String): FunctionBuilder = {
@@ -230,3 +240,33 @@ private[sql] class HiveSessionCatalog(
     "histogram_numeric"
   )
 }
+
+private[sql] object HiveSessionCatalog {
+
+  def apply(
+      externalCatalog: HiveExternalCatalog,
+      globalTempViewManager: GlobalTempViewManager,
+      sparkSession: SparkSession,
+      functionResourceLoader: FunctionResourceLoader,
+      functionRegistry: FunctionRegistry,
+      conf: SQLConf,
+      hadoopConf: Configuration,
+      parser: ParserInterface): HiveSessionCatalog = {
+
+    // Catalog for handling data source tables. TODO: This really doesn't belong here since it is
+    // essentially a cache for metastore tables. However, it relies on a lot of session-specific
+    // things so it would be a lot of work to split its functionality between HiveSessionCatalog
+    // and HiveCatalog. We should still do it at some point...
+    val metastoreCatalog = new HiveMetastoreCatalog(sparkSession)
+
+    new HiveSessionCatalog(
+      externalCatalog: HiveExternalCatalog,
+      globalTempViewManager: GlobalTempViewManager,
+      metastoreCatalog,
+      functionResourceLoader: FunctionResourceLoader,
+      functionRegistry: FunctionRegistry,
+      conf: SQLConf,
+      hadoopConf: Configuration,
+      parser: ParserInterface)
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 5345c964570a7..b31fa47e4ba31 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -108,29 +108,35 @@ private[hive] class HiveSessionState(
     conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC)
   }
 
-  override def copy(associatedSparkSession: SparkSession): HiveSessionState = {
-    val sqlConf = conf.copy
-    val copyHelper = SessionState(associatedSparkSession, Some(sqlConf))
-    val catalogCopy = catalog.copy(associatedSparkSession)
+  override def copy(sparkSession: SparkSession): HiveSessionState = {
+    val sparkContext = sparkSession.sparkContext
+    val confCopy = conf.copy
+    val copyHelper = SessionState(sparkSession, Some(confCopy))
+    val catalogCopy = catalog.copy(
+      sparkSession,
+      confCopy,
+      SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy),
+      copyHelper.functionRegistry,
+      copyHelper.sqlParser)
     val hiveClient =
-      associatedSparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
         .newSession()
 
     new HiveSessionState(
-      associatedSparkSession.sparkContext,
-      sqlConf,
+      sparkContext,
+      confCopy,
       copyHelper.experimentalMethods,
       copyHelper.functionRegistry,
       catalogCopy,
       copyHelper.sqlParser,
       hiveClient,
-      HiveSessionState.createAnalyzer(associatedSparkSession, catalogCopy, sqlConf),
+      HiveSessionState.createAnalyzer(sparkSession, catalogCopy, confCopy),
       copyHelper.streamingQueryManager,
       copyHelper.queryExecutionCreator,
       jarClassLoader,
       HiveSessionState.createPlannerCreator(
-        associatedSparkSession,
-        sqlConf,
+        sparkSession,
+        confCopy,
         copyHelper.experimentalMethods))
   }
 
@@ -150,7 +156,7 @@ object HiveSessionState {
 
     val sparkContext = sparkSession.sparkContext
 
-    val catalog = new HiveSessionCatalog(
+    val catalog = HiveSessionCatalog(
       sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
       sparkSession.sharedState.globalTempViewManager,
       sparkSession,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 818ff55660125..377aa1c8bc334 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -217,6 +217,7 @@ class HiveSparkSubmitSuite
     runSparkSubmit(args)
   }
 
+  /*  TODO: SPARK-19540 re-enable this test
   test("set hive.metastore.warehouse.dir") {
     // In this test, we set hive.metastore.warehouse.dir in hive-site.xml but
     // not set spark.sql.warehouse.dir. So, the warehouse dir should be
@@ -255,6 +256,7 @@ class HiveSparkSubmitSuite
       unusedJar.toString)
     runSparkSubmit(args)
   }
+  */
 
   test("SPARK-16901: set javax.jdo.option.ConnectionURL") {
     // In this test, we set javax.jdo.option.ConnectionURL and set metastore version to

From 295ee41ca7c5d622c823b730e7076590f90b1330 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Mon, 20 Feb 2017 21:54:46 -0800
Subject: [PATCH 14/31] Add synchronized blocks. Ignore hive metastore tests
 for now.

---
 .../spark/sql/catalyst/CatalystConf.scala     | 19 ++-----------------
 .../sql/catalyst/catalog/SessionCatalog.scala | 10 ++++++----
 .../apache/spark/sql/internal/SQLConf.scala   |  2 +-
 .../spark/sql/internal/SessionState.scala     |  2 +-
 .../spark/sql/test/TestSQLContext.scala       |  2 +-
 .../spark/sql/hive/HiveSessionCatalog.scala   |  8 +++++---
 .../spark/sql/hive/HiveSessionState.scala     |  2 +-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala | 10 ++++------
 8 files changed, 21 insertions(+), 34 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
index bfa019363a97b..9ebac433bb736 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
@@ -61,7 +61,7 @@ trait CatalystConf {
    */
   def cboEnabled: Boolean
 
-  def copy: CatalystConf
+  override def clone: CatalystConf = throw new CloneNotSupportedException()
 }
 
 
@@ -81,20 +81,5 @@ case class SimpleCatalystConf(
     sessionLocalTimeZone: String = TimeZone.getDefault().getID)
   extends CatalystConf {
 
-  override def copy: SimpleCatalystConf = {
-    SimpleCatalystConf(
-      caseSensitiveAnalysis,
-      orderByOrdinal,
-      groupByOrdinal,
-      optimizerMaxIterations,
-      optimizerInSetConversionThreshold,
-      maxCaseBranchesForCodegen,
-      tableRelationCacheSize,
-      runSQLonFile,
-      crossJoinEnabled,
-      cboEnabled,
-      warehousePath,
-      sessionLocalTimeZone)
-  }
-
+  override def clone: SimpleCatalystConf = this.copy()
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 50d734ff5e5c4..1f54093a554d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -74,7 +74,7 @@ class SessionCatalog(
 
   // For testing only.
   def this(externalCatalog: ExternalCatalog) {
-    this(externalCatalog, new SimpleFunctionRegistry, new SimpleCatalystConf(true))
+    this(externalCatalog, new SimpleFunctionRegistry, SimpleCatalystConf(true))
   }
 
   /** List of temporary tables, mapping from table name to their logical plan. */
@@ -1201,9 +1201,11 @@ class SessionCatalog(
       hadoopConf,
       parser)
 
-    catalog.currentDb = currentDb
-    // copy over temporary tables
-    tempTables.foreach(kv => catalog.tempTables.put(kv._1, kv._2))
+    synchronized {
+      catalog.currentDb = currentDb
+      // copy over temporary tables
+      tempTables.foreach(kv => catalog.tempTables.put(kv._1, kv._2))
+    }
 
     catalog
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d931871a7c9e2..32f7a58ecdadf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -998,7 +998,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
     settings.clear()
   }
 
-  def copy: SQLConf = {
+  override def clone: SQLConf = {
     val result = new SQLConf
     getAllConfs.foreach {
       case(k, v) => if (v ne null) result.setConfString(k, v)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index b59d1f460e5e5..730de044c8459 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -90,7 +90,7 @@ private[sql] class SessionState(
    */
   def copy(sparkSession: SparkSession): SessionState = {
     val sparkContext = sparkSession.sparkContext
-    val confCopy = conf.copy
+    val confCopy = conf.clone
     val hadoopConfCopy = SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy)
     val functionRegistryCopy = functionRegistry.copy
     val sqlParser: ParserInterface = new SparkSqlParser(confCopy)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 3b17d18ea5be7..4395568e5c5a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -96,7 +96,7 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
   }
 
   @transient
-  protected[sql] override lazy val sessionState: SessionState = TestSessionState(this)
+  override lazy val sessionState: SessionState = TestSessionState(this)
 
   // Needed for Java tests
   def loadTestData(): Unit = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 4d11041cec48b..e27387651a5ae 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -98,9 +98,11 @@ private[sql] class HiveSessionCatalog(
       hadoopConf,
       parser)
 
-    catalog.currentDb = currentDb
-    // copy over temporary tables
-    tempTables.foreach(kv => catalog.tempTables.put(kv._1, kv._2))
+    synchronized {
+      catalog.currentDb = currentDb
+      // copy over temporary tables
+      tempTables.foreach(kv => catalog.tempTables.put(kv._1, kv._2))
+    }
 
     catalog
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index b31fa47e4ba31..9c80ba584c5dd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -110,7 +110,7 @@ private[hive] class HiveSessionState(
 
   override def copy(sparkSession: SparkSession): HiveSessionState = {
     val sparkContext = sparkSession.sparkContext
-    val confCopy = conf.copy
+    val confCopy = conf.clone
     val copyHelper = SessionState(sparkSession, Some(confCopy))
     val catalogCopy = catalog.copy(
       sparkSession,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 377aa1c8bc334..06bbcbc134c27 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -217,8 +217,8 @@ class HiveSparkSubmitSuite
     runSparkSubmit(args)
   }
 
-  /*  TODO: SPARK-19540 re-enable this test
-  test("set hive.metastore.warehouse.dir") {
+  // TODO: SPARK-19540 re-enable this test
+  ignore("set hive.metastore.warehouse.dir") {
     // In this test, we set hive.metastore.warehouse.dir in hive-site.xml but
     // not set spark.sql.warehouse.dir. So, the warehouse dir should be
     // the value of hive.metastore.warehouse.dir. Also, the value of
@@ -256,7 +256,6 @@ class HiveSparkSubmitSuite
       unusedJar.toString)
     runSparkSubmit(args)
   }
-  */
 
   test("SPARK-16901: set javax.jdo.option.ConnectionURL") {
     // In this test, we set javax.jdo.option.ConnectionURL and set metastore version to
@@ -299,7 +298,8 @@ class HiveSparkSubmitSuite
     runSparkSubmit(args)
   }
 
-  test("SPARK-18360: default table path of tables in default database should depend on the " +
+  // TODO: SPARK-19540 re-enable this test
+  ignore("SPARK-18360: default table path of tables in default database should depend on the " +
     "location of default database") {
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val args = Seq(
@@ -864,7 +864,6 @@ object SPARK_18360 {
         schema = new StructType().add("i", "int"),
         provider = Some(DDLUtils.HIVE_PROVIDER))
 
-      /* TODO: SPARK-19540 re-enable this assert
       val newWarehousePath = Utils.createTempDir().getAbsolutePath
       hiveClient.runSqlHive(s"SET hive.metastore.warehouse.dir=$newWarehousePath")
       hiveClient.createTable(tableMeta, ignoreIfExists = false)
@@ -873,7 +872,6 @@ object SPARK_18360 {
       // location for tables in default database.
       assert(rawTable.storage.locationUri.get.contains(newWarehousePath))
       hiveClient.dropTable("default", "test_tbl", ignoreIfNotExists = false, purge = false)
-      */
 
       spark.sharedState.externalCatalog.createTable(tableMeta, ignoreIfExists = false)
       val readBack = spark.sharedState.externalCatalog.getTable("default", "test_tbl")

From 9beba844123f3eec7d519333ecb7a60cde8da9c4 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Wed, 22 Feb 2017 11:15:27 -0800
Subject: [PATCH 15/31] Add tests. Force copy of session state on cloneSession.

---
 .../sql/catalyst/catalog/SessionCatalog.scala |  5 +-
 .../catalog/SessionCatalogSuite.scala         | 27 +++++++-
 .../org/apache/spark/sql/SparkSession.scala   |  9 ++-
 .../spark/sql/SparkSessionBuilderSuite.scala  | 38 ++++++++---
 .../sql/internal/SQLConfEntrySuite.scala      | 17 +++++
 .../spark/sql/hive/HiveSessionCatalog.scala   |  8 +--
 .../spark/sql/hive/HiveSessionState.scala     |  2 -
 .../sql/hive/HiveSessionCatalogSuite.scala    | 63 +++++++++++++++++++
 .../sql/hive/HiveSessionStateSuite.scala      | 54 ----------------
 9 files changed, 148 insertions(+), 75 deletions(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
 delete mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 1f54093a554d1..e3a4d0543e2c7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -74,7 +74,10 @@ class SessionCatalog(
 
   // For testing only.
   def this(externalCatalog: ExternalCatalog) {
-    this(externalCatalog, new SimpleFunctionRegistry, SimpleCatalystConf(true))
+    this(
+      externalCatalog,
+      new SimpleFunctionRegistry,
+      SimpleCatalystConf(caseSensitiveAnalysis = true))
   }
 
   /** List of temporary tables, mapping from table name to their logical plan. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 44434324d3770..106fcddac0d77 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.sql.catalyst.catalog
 
+import org.apache.hadoop.conf.Configuration
+
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, SimpleCatalystConf, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -1196,4 +1198,27 @@ class SessionCatalogSuite extends PlanTest {
       catalog.listFunctions("unknown_db", "func*")
     }
   }
+
+  test("copy SessionCatalog") {
+    val externalCatalog = newEmptyCatalog()
+    val original = new SessionCatalog(externalCatalog)
+    val tempTable1 = Range(1, 10, 1, 10)
+    original.createTempView("copytest1", tempTable1, overrideIfExists = false)
+
+    // check if tables copied over
+    val clone = original.copy(
+      SimpleCatalystConf(caseSensitiveAnalysis = true),
+      new Configuration(),
+      new SimpleFunctionRegistry,
+      CatalystSqlParser)
+    assert(clone.getTempView("copytest1") == Option(tempTable1))
+
+    // check if clone and original independent
+    clone.dropTable(TableIdentifier("copytest1"), ignoreIfNotExists = false, purge = false)
+    assert(original.getTempView("copytest1") == Option(tempTable1))
+
+    val tempTable2 = Range(1, 20, 2, 10)
+    original.createTempView("copytest2", tempTable2, overrideIfExists = false)
+    assert(clone.getTempView("copytest2").isEmpty)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 6db872f56f378..6f99beece1531 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -237,7 +237,9 @@ class SparkSession private(
   @Experimental
   @InterfaceStability.Evolving
   def cloneSession(): SparkSession = {
-    new SparkSession(sparkContext, Some(sharedState), Some(sessionState))
+    val result = new SparkSession(sparkContext, Some(sharedState), Some(sessionState))
+    result.sessionState // force copy of SessionState
+    result
   }
 
 
@@ -867,6 +869,7 @@ object SparkSession {
         if (options.nonEmpty) {
           logWarning("Using an existing SparkSession; some configuration may not take effect.")
         }
+        logInfo("1 using current thread's active session")
         return session
       }
 
@@ -879,6 +882,7 @@ object SparkSession {
           if (options.nonEmpty) {
             logWarning("Using an existing SparkSession; some configuration may not take effect.")
           }
+          logInfo("2 using global session")
           return session
         }
 
@@ -898,6 +902,7 @@ object SparkSession {
           if (!sc.conf.contains("spark.app.name")) {
             sc.conf.setAppName(randomAppName)
           }
+          logDebug("3 using new sparkContext")
           sc
         }
         session = new SparkSession(sparkContext)
@@ -914,7 +919,7 @@ object SparkSession {
           }
         })
       }
-
+      logInfo("4 using created session")
       return session
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index bdf6728995481..cd0dadac725c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -133,25 +133,34 @@ class SparkSessionBuilderSuite extends SparkFunSuite with SharedSQLContext {
 
     assert(forkedSession ne activeSession)
     assert(forkedSession.sessionState ne activeSession.sessionState)
+    assert(forkedSession.conf ne activeSession.conf)
+    // the rest of copying is tested individually for each field
 
     forkedSession.stop()
-    activeSession.stop()
   }
 
-  test("fork new session and inherit sql config options") {
+  test("fork new session and inherit RuntimeConfig options") {
     val activeSession = SparkSession
       .builder()
       .master("local")
       .getOrCreate()
-    activeSession.conf.set("spark-configb", "b")
-    val forkedSession = activeSession.cloneSession()
 
+    val key = "spark-config-clone"
+    activeSession.conf.set(key, "active")
+
+    // inheritance
+    val forkedSession = activeSession.cloneSession()
     assert(forkedSession ne activeSession)
     assert(forkedSession.conf ne activeSession.conf)
-    assert(forkedSession.conf.get("spark-configb") == "b")
+    assert(forkedSession.conf.get(key) == "active")
+
+    // independence
+    forkedSession.conf.set(key, "forked")
+    assert(activeSession.conf.get(key) == "active")
+    activeSession.conf.set(key, "dontcopyme")
+    assert(forkedSession.conf.get(key) == "forked")
 
     forkedSession.stop()
-    activeSession.stop()
   }
 
   test("fork new session and inherit function registry and udf") {
@@ -159,13 +168,19 @@ class SparkSessionBuilderSuite extends SparkFunSuite with SharedSQLContext {
     activeSession.udf.register("strlenScala", (_: String).length + (_: Int))
     val forkedSession = activeSession.cloneSession()
 
+    // inheritance
     assert(forkedSession ne activeSession)
     assert(forkedSession.sessionState.functionRegistry ne
       activeSession.sessionState.functionRegistry)
     assert(forkedSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
 
+    // independence
+    forkedSession.sessionState.functionRegistry.dropFunction("strlenScala")
+    assert(activeSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
+    activeSession.udf.register("addone", (_: Int) + 1)
+    assert(forkedSession.sessionState.functionRegistry.lookupFunction("addone").isEmpty)
+
     forkedSession.stop()
-    activeSession.stop()
   }
 
   test("fork new session and inherit experimental methods") {
@@ -182,12 +197,18 @@ class SparkSessionBuilderSuite extends SparkFunSuite with SharedSQLContext {
 
     val forkedSession = activeSession.cloneSession()
 
+    // inheritance
     assert(forkedSession ne activeSession)
     assert(forkedSession.experimental.extraOptimizations.toSet ==
       activeSession.experimental.extraOptimizations.toSet)
 
+    // independence
+    forkedSession.experimental.extraOptimizations = List(DummyRule2)
+    assert(activeSession.experimental.extraOptimizations == optimizations)
+    activeSession.experimental.extraOptimizations = List(DummyRule1)
+    assert(forkedSession.experimental.extraOptimizations == List(DummyRule2))
+
     forkedSession.stop()
-    activeSession.stop()
   }
 
   test("fork new session and run query on inherited table") {
@@ -215,6 +236,5 @@ class SparkSessionBuilderSuite extends SparkFunSuite with SharedSQLContext {
 
     SparkSession.clearActiveSession()
     forkedSession.stop()
-    activeSession.stop()
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
index 0e3a5ca9d71dd..987b9919b6967 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
@@ -187,4 +187,21 @@ class SQLConfEntrySuite extends SparkFunSuite {
     }
     assert(e2.getMessage === "The maximum size of the cache must not be negative")
   }
+
+  test("clone SQLConf") {
+    val original = new SQLConf
+    val key = "spark.sql.SQLConfEntrySuite.clone"
+    assert(original.getConfString(key, "noentry") === "noentry")
+
+    // inheritance
+    original.setConfString(key, "orig")
+    val clone = original.clone
+    assert(clone.getConfString(key, "noentry") === "orig")
+
+    // independence
+    clone.setConfString(key, "clone")
+    assert(original.getConfString(key, "noentry") === "orig")
+    original.setConfString(key, "dontcopyme")
+    assert(clone.getConfString(key, "noentry") === "clone")
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index e27387651a5ae..725e6ba4dedfc 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -89,8 +89,6 @@ private[sql] class HiveSessionCatalog(
       parser: ParserInterface): HiveSessionCatalog = {
 
     val catalog = HiveSessionCatalog(
-      externalCatalog,
-      globalTempViewManager,
       sparkSession,
       functionResourceLoader,
       functionRegistry,
@@ -246,8 +244,6 @@ private[sql] class HiveSessionCatalog(
 private[sql] object HiveSessionCatalog {
 
   def apply(
-      externalCatalog: HiveExternalCatalog,
-      globalTempViewManager: GlobalTempViewManager,
       sparkSession: SparkSession,
       functionResourceLoader: FunctionResourceLoader,
       functionRegistry: FunctionRegistry,
@@ -262,8 +258,8 @@ private[sql] object HiveSessionCatalog {
     val metastoreCatalog = new HiveMetastoreCatalog(sparkSession)
 
     new HiveSessionCatalog(
-      externalCatalog: HiveExternalCatalog,
-      globalTempViewManager: GlobalTempViewManager,
+      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
+      sparkSession.sharedState.globalTempViewManager,
       metastoreCatalog,
       functionResourceLoader: FunctionResourceLoader,
       functionRegistry: FunctionRegistry,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 9c80ba584c5dd..354e242c51524 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -157,8 +157,6 @@ object HiveSessionState {
     val sparkContext = sparkSession.sparkContext
 
     val catalog = HiveSessionCatalog(
-      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
-      sparkSession.sharedState.globalTempViewManager,
       sparkSession,
       SessionState.createFunctionResourceLoader(sparkContext, initHelper.jarClassLoader),
       initHelper.functionRegistry,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
new file mode 100644
index 0000000000000..6cf370e15f62f
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.analysis.SimpleFunctionRegistry
+import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.plans.logical.Range
+import org.apache.spark.sql.internal.SQLConf
+
+class HiveSessionCatalogSuite extends SessionCatalogSuite {
+
+  test("clone HiveSessionCatalog") {
+    val sparkSession = SparkSession.builder().master("local").enableHiveSupport().getOrCreate()
+    val original = HiveSessionCatalog(
+      sparkSession,
+      DummyFunctionResourceLoader,
+      new SimpleFunctionRegistry,
+      new SQLConf,
+      new Configuration(),
+      CatalystSqlParser)
+
+    val tempTable1 = Range(1, 10, 1, 10)
+    original.createTempView("copytest1", tempTable1, overrideIfExists = false)
+
+    // check if tables copied over
+    val clone = original.copy(
+      sparkSession,
+      new SQLConf,
+      new Configuration(),
+      new SimpleFunctionRegistry,
+      CatalystSqlParser)
+    assert(clone.getTempView("copytest1") == Option(tempTable1))
+
+    // check if clone and original independent
+    clone.dropTable(TableIdentifier("copytest1"), ignoreIfNotExists = false, purge = false)
+    assert(original.getTempView("copytest1") == Option(tempTable1))
+
+    val tempTable2 = Range(1, 20, 2, 10)
+    original.createTempView("copytest2", tempTable2, overrideIfExists = false)
+    assert(clone.getTempView("copytest2").isEmpty)
+  }
+
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
deleted file mode 100644
index 335466373124e..0000000000000
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.hive
-
-import org.apache.spark.{SparkContext, SparkFunSuite}
-import org.apache.spark.sql.{QueryTest, Row, SparkSession}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.test.SharedSQLContext
-
-class HiveSessionStateSuite  extends SparkFunSuite with SharedSQLContext {
-
-  test("fork new session and inherit a copy of the session state") {
-    val activeSession = SparkSession.builder().master("local").enableHiveSupport().getOrCreate()
-    val forkedSession = activeSession.cloneSession()
-
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.sessionState ne activeSession.sessionState)
-    assert(forkedSession.sessionState.isInstanceOf[HiveSessionState])
-
-    forkedSession.stop()
-    activeSession.stop()
-  }
-
-  test("fork new session and inherit function registry and udf") {
-    val activeSession = SparkSession.builder().master("local").getOrCreate()
-    activeSession.udf.register("strlenScala", (_: String).length + (_: Int))
-    val forkedSession = activeSession.cloneSession()
-
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.sessionState.functionRegistry ne
-      activeSession.sessionState.functionRegistry)
-    assert(forkedSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
-
-    forkedSession.stop()
-    activeSession.stop()
-  }
-
-}

From 3d2e4a62321b8441f3abe59a303e1353243e226e Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Wed, 22 Feb 2017 14:42:16 -0800
Subject: [PATCH 16/31] Rename copy to clone() to work around copy method of
 case classes. Move addJar from SessionState to SharedState.

---
 .../spark/sql/catalyst/CatalystConf.scala     |  4 +-
 .../catalyst/analysis/FunctionRegistry.scala  |  6 +--
 .../sql/catalyst/catalog/SessionCatalog.scala |  2 +-
 .../catalog/SessionCatalogSuite.scala         |  2 +-
 .../org/apache/spark/sql/SparkSession.scala   |  2 +-
 .../apache/spark/sql/internal/SQLConf.scala   |  2 +-
 .../spark/sql/internal/SessionState.scala     | 51 ++++++-------------
 .../spark/sql/internal/SharedState.scala      | 18 +++++++
 .../spark/sql/SparkSessionBuilderSuite.scala  |  5 +-
 .../sql/internal/SQLConfEntrySuite.scala      |  2 +-
 .../spark/sql/test/TestSQLContext.scala       | 15 +++---
 .../spark/sql/hive/HiveSessionCatalog.scala   |  2 +-
 .../spark/sql/hive/HiveSessionState.scala     | 23 ++++-----
 .../apache/spark/sql/hive/test/TestHive.scala |  6 +--
 .../sql/hive/HiveSessionCatalogSuite.scala    |  2 +-
 15 files changed, 68 insertions(+), 74 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
index 9ebac433bb736..56bfc00f97088 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystConf.scala
@@ -61,7 +61,7 @@ trait CatalystConf {
    */
   def cboEnabled: Boolean
 
-  override def clone: CatalystConf = throw new CloneNotSupportedException()
+  override def clone(): CatalystConf = throw new CloneNotSupportedException()
 }
 
 
@@ -81,5 +81,5 @@ case class SimpleCatalystConf(
     sessionLocalTimeZone: String = TimeZone.getDefault().getID)
   extends CatalystConf {
 
-  override def clone: SimpleCatalystConf = this.copy()
+  override def clone(): SimpleCatalystConf = this.copy()
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 8a623f5513162..7ad03a67a1c63 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -65,7 +65,7 @@ trait FunctionRegistry {
   def clear(): Unit
 
   /** Create a copy of this registry with identical functions as this registry. */
-  def copy: FunctionRegistry
+  override def clone(): FunctionRegistry = throw new CloneNotSupportedException()
 }
 
 class SimpleFunctionRegistry extends FunctionRegistry {
@@ -109,7 +109,7 @@ class SimpleFunctionRegistry extends FunctionRegistry {
     functionBuilders.clear()
   }
 
-  override def copy: FunctionRegistry = synchronized {
+  override def clone(): FunctionRegistry = synchronized {
     val registry = new SimpleFunctionRegistry
     functionBuilders.iterator.foreach { case (name, (info, builder)) =>
       registry.registerFunction(name, info, builder)
@@ -152,7 +152,7 @@ object EmptyFunctionRegistry extends FunctionRegistry {
     throw new UnsupportedOperationException
   }
 
-  override def copy: FunctionRegistry = this
+  override def clone(): FunctionRegistry = this
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index e3a4d0543e2c7..3334faeb37cf4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1189,7 +1189,7 @@ class SessionCatalog(
    * `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
    * All arguments passed in should be associated with a particular `SparkSession`.
    */
-  def copy(
+  def clone(
       conf: CatalystConf,
       hadoopConf: Configuration,
       functionRegistry: FunctionRegistry,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 106fcddac0d77..a5cd95294b5be 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1206,7 +1206,7 @@ class SessionCatalogSuite extends PlanTest {
     original.createTempView("copytest1", tempTable1, overrideIfExists = false)
 
     // check if tables copied over
-    val clone = original.copy(
+    val clone = original.clone(
       SimpleCatalystConf(caseSensitiveAnalysis = true),
       new Configuration(),
       new SimpleFunctionRegistry,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 6f99beece1531..5c9cc05a9d6cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -123,7 +123,7 @@ class SparkSession private(
   @transient
   lazy val sessionState: SessionState = {
     parentSessionState
-      .map(_.copy(this))
+      .map(_.clone(this))
       .getOrElse(SparkSession.instantiateSessionState(
         SparkSession.sessionStateClassName(sparkContext.conf),
         self))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 32f7a58ecdadf..f75adcae5ad58 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -998,7 +998,7 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
     settings.clear()
   }
 
-  override def clone: SQLConf = {
+  override def clone(): SQLConf = {
     val result = new SQLConf
     getAllConfs.foreach {
       case(k, v) => if (v ne null) result.setConfString(k, v)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 730de044c8459..a5f51d8b9c980 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.util.ExecutionListenerManager
  */
 private[sql] class SessionState(
     sparkContext: SparkContext,
+    sharedState: SharedState,
     val conf: SQLConf,
     val experimentalMethods: ExperimentalMethods,
     val functionRegistry: FunctionRegistry,
@@ -47,8 +48,7 @@ private[sql] class SessionState(
     val sqlParser: ParserInterface,
     val analyzer: Analyzer,
     val streamingQueryManager: StreamingQueryManager,
-    val queryExecutionCreator: LogicalPlan => QueryExecution,
-    val jarClassLoader: NonClosableMutableURLClassLoader) {
+    val queryExecutionCreator: LogicalPlan => QueryExecution) {
 
   /*
    * Interface exposed to the user for registering user-defined functions.
@@ -88,13 +88,13 @@ private[sql] class SessionState(
   /**
    * Get an identical copy of the `SessionState` and associate it with the given `SparkSession`
    */
-  def copy(sparkSession: SparkSession): SessionState = {
+  def clone(sparkSession: SparkSession): SessionState = {
     val sparkContext = sparkSession.sparkContext
-    val confCopy = conf.clone
+    val confCopy = conf.clone()
     val hadoopConfCopy = SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy)
-    val functionRegistryCopy = functionRegistry.copy
+    val functionRegistryCopy = functionRegistry.clone()
     val sqlParser: ParserInterface = new SparkSqlParser(confCopy)
-    val catalogCopy = catalog.copy(confCopy, hadoopConfCopy, functionRegistryCopy, sqlParser)
+    val catalogCopy = catalog.clone(confCopy, hadoopConfCopy, functionRegistryCopy, sqlParser)
     val queryExecution = (plan: LogicalPlan) => new QueryExecution(sparkSession, plan)
 
     sparkContext.getConf.getAll.foreach { case (k, v) =>
@@ -103,6 +103,7 @@ private[sql] class SessionState(
 
     new SessionState(
       sparkContext,
+      sparkSession.sharedState,
       confCopy,
       experimentalMethods.copy,
       functionRegistryCopy,
@@ -110,8 +111,7 @@ private[sql] class SessionState(
       sqlParser,
       SessionState.createAnalyzer(sparkSession, catalogCopy, confCopy),
       new StreamingQueryManager(sparkSession),
-      queryExecution,
-      jarClassLoader)
+      queryExecution)
   }
 
   // ------------------------------------------------------
@@ -124,7 +124,7 @@ private[sql] class SessionState(
     catalog.refreshTable(sqlParser.parseTableIdentifier(tableName))
   }
 
-  def addJar(path: String): Unit = SessionState.addJar(sparkContext, path, jarClassLoader)
+  def addJar(path: String): Unit = sharedState.addJar(path)
 
 }
 
@@ -150,13 +150,11 @@ object SessionState {
     }
 
     // Internal catalog for managing functions registered by the user.
-    val functionRegistry = FunctionRegistry.builtin.copy
-
-    val jarClassLoader: NonClosableMutableURLClassLoader = sparkSession.sharedState.jarClassLoader
+    val functionRegistry = FunctionRegistry.builtin.clone()
 
     // A class for loading resources specified by a function.
     val functionResourceLoader: FunctionResourceLoader =
-      createFunctionResourceLoader(sparkContext, jarClassLoader)
+      createFunctionResourceLoader(sparkContext, sparkSession.sharedState)
 
     // Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
     val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
@@ -181,6 +179,7 @@ object SessionState {
 
     new SessionState(
       sparkContext,
+      sparkSession.sharedState,
       sqlConf,
       new ExperimentalMethods,
       functionRegistry,
@@ -188,18 +187,17 @@ object SessionState {
       sqlParser,
       analyzer,
       streamingQueryManager,
-      queryExecutionCreator,
-      jarClassLoader)
+      queryExecutionCreator)
   }
 
   def createFunctionResourceLoader(
       sparkContext: SparkContext,
-      jarClassLoader: NonClosableMutableURLClassLoader): FunctionResourceLoader = {
+      sharedState: SharedState): FunctionResourceLoader = {
 
     new FunctionResourceLoader {
       override def loadResource(resource: FunctionResource): Unit = {
         resource.resourceType match {
-          case JarResource => addJar(sparkContext, resource.uri, jarClassLoader)
+          case JarResource => sharedState.addJar(resource.uri)
           case FileResource => sparkContext.addFile(resource.uri)
           case ArchiveResource =>
             throw new AnalysisException(
@@ -235,23 +233,4 @@ object SessionState {
     }
   }
 
-  def addJar(
-      sparkContext: SparkContext,
-      path: String,
-      jarClassLoader: NonClosableMutableURLClassLoader): Unit = {
-
-    sparkContext.addJar(path)
-
-    val uri = new Path(path).toUri
-    val jarURL = if (uri.getScheme == null) {
-      // `path` is a local file path without a URL scheme
-      new File(path).toURI.toURL
-    } else {
-      // `path` is a URL with a scheme
-      uri.toURL
-    }
-    jarClassLoader.addURL(jarURL)
-    Thread.currentThread().setContextClassLoader(jarClassLoader)
-  }
-
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 7ce9938f0d075..6488591612cb7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.sql.internal
 
+import java.io.File
+
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException}
 import org.apache.spark.internal.Logging
@@ -136,6 +139,21 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
     }
     SparkSession.sqlListener.get()
   }
+
+  def addJar(path: String): Unit = {
+    sparkContext.addJar(path)
+
+    val uri = new Path(path).toUri
+    val jarURL = if (uri.getScheme == null) {
+      // `path` is a local file path without a URL scheme
+      new File(path).toURI.toURL
+    } else {
+      // `path` is a URL with a scheme
+      uri.toURL
+    }
+    jarClassLoader.addURL(jarURL)
+    Thread.currentThread().setContextClassLoader(jarClassLoader)
+  }
 }
 
 object SharedState {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index cd0dadac725c9..bcd971ae818d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -212,10 +212,8 @@ class SparkSessionBuilderSuite extends SparkFunSuite with SharedSQLContext {
   }
 
   test("fork new session and run query on inherited table") {
-    import testImplicits._
-
     def checkTableExists(sparkSession: SparkSession): Unit = {
-      QueryTest.checkAnswer(sql(
+      QueryTest.checkAnswer(sparkSession.sql(
         """
           |SELECT x.str, COUNT(*)
           |FROM df x JOIN df y ON x.str = y.str
@@ -226,6 +224,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with SharedSQLContext {
 
     val activeSession = SparkSession.builder().master("local").getOrCreate()
     SparkSession.setActiveSession(activeSession)
+    import activeSession.implicits._
 
     Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str").createOrReplaceTempView("df")
     checkTableExists(activeSession)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
index 987b9919b6967..16273da754ee9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
@@ -195,7 +195,7 @@ class SQLConfEntrySuite extends SparkFunSuite {
 
     // inheritance
     original.setConfString(key, "orig")
-    val clone = original.clone
+    val clone = original.clone()
     assert(clone.getConfString(key, "noentry") === "orig")
 
     // independence
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 4395568e5c5a6..0bcfbb110c1e2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.internal.{NonClosableMutableURLClassLoader, SessionState, SQLConf}
+import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf}
 import org.apache.spark.sql.streaming.StreamingQueryManager
 
 /**
@@ -42,6 +42,7 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
 
   class TestSessionState(
       sparkContext: SparkContext,
+      sharedState: SharedState,
       conf: SQLConf,
       experimentalMethods: ExperimentalMethods,
       functionRegistry: FunctionRegistry,
@@ -49,10 +50,10 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
       sqlParser: ParserInterface,
       analyzer: Analyzer,
       streamingQueryManager: StreamingQueryManager,
-      queryExecution: LogicalPlan => QueryExecution,
-      jarClassLoader: NonClosableMutableURLClassLoader)
+      queryExecution: LogicalPlan => QueryExecution)
     extends SessionState(
         sparkContext,
+        sharedState,
         conf,
         experimentalMethods,
         functionRegistry,
@@ -60,8 +61,7 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
         sqlParser,
         analyzer,
         streamingQueryManager,
-        queryExecution,
-        jarClassLoader) {}
+        queryExecution) {}
 
   object TestSessionState {
 
@@ -82,6 +82,7 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
 
       new TestSessionState(
         sparkSession.sparkContext,
+        sparkSession.sharedState,
         sqlConf,
         initHelper.experimentalMethods,
         initHelper.functionRegistry,
@@ -89,9 +90,7 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
         initHelper.sqlParser,
         initHelper.analyzer,
         initHelper.streamingQueryManager,
-        initHelper.queryExecutionCreator,
-        initHelper.jarClassLoader
-      )
+        initHelper.queryExecutionCreator)
     }
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 725e6ba4dedfc..3d8a607a3126a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -81,7 +81,7 @@ private[sql] class HiveSessionCatalog(
    * This should ideally override `SessionCatalog.copy` but does not at present, since
    * `HiveMetastoreCatalog` is dependent on `SparkSession`.
    */
-  def copy(
+  def clone(
       sparkSession: SparkSession,
       conf: SQLConf,
       hadoopConf: Configuration,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 354e242c51524..67c19593d8ea9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -20,13 +20,12 @@ package org.apache.spark.sql.hive
 import org.apache.spark.SparkContext
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
-import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.{QueryExecution, SparkPlanner, SparkSqlParser}
+import org.apache.spark.sql.execution.{QueryExecution, SparkPlanner}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.client.HiveClient
-import org.apache.spark.sql.internal.{NonClosableMutableURLClassLoader, SessionState, SQLConf}
+import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf}
 import org.apache.spark.sql.streaming.StreamingQueryManager
 
 
@@ -35,6 +34,7 @@ import org.apache.spark.sql.streaming.StreamingQueryManager
  */
 private[hive] class HiveSessionState(
     sparkContext: SparkContext,
+    sharedState: SharedState,
     conf: SQLConf,
     experimentalMethods: ExperimentalMethods,
     functionRegistry: FunctionRegistry,
@@ -44,10 +44,10 @@ private[hive] class HiveSessionState(
     override val analyzer: Analyzer,
     streamingQueryManager: StreamingQueryManager,
     queryExecutionCreator: LogicalPlan => QueryExecution,
-    jarClassLoader: NonClosableMutableURLClassLoader,
     val plannerCreator: () => SparkPlanner)
   extends SessionState(
       sparkContext,
+      sharedState,
       conf,
       experimentalMethods,
       functionRegistry,
@@ -55,8 +55,7 @@ private[hive] class HiveSessionState(
       sqlParser,
       analyzer,
       streamingQueryManager,
-      queryExecutionCreator,
-      jarClassLoader) { self =>
+      queryExecutionCreator) { self =>
 
   /**
    * Planner that takes into account Hive-specific strategies.
@@ -108,11 +107,11 @@ private[hive] class HiveSessionState(
     conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC)
   }
 
-  override def copy(sparkSession: SparkSession): HiveSessionState = {
+  override def clone(sparkSession: SparkSession): HiveSessionState = {
     val sparkContext = sparkSession.sparkContext
-    val confCopy = conf.clone
+    val confCopy = conf.clone()
     val copyHelper = SessionState(sparkSession, Some(confCopy))
-    val catalogCopy = catalog.copy(
+    val catalogCopy = catalog.clone(
       sparkSession,
       confCopy,
       SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy),
@@ -124,6 +123,7 @@ private[hive] class HiveSessionState(
 
     new HiveSessionState(
       sparkContext,
+      sparkSession.sharedState,
       confCopy,
       copyHelper.experimentalMethods,
       copyHelper.functionRegistry,
@@ -133,7 +133,6 @@ private[hive] class HiveSessionState(
       HiveSessionState.createAnalyzer(sparkSession, catalogCopy, confCopy),
       copyHelper.streamingQueryManager,
       copyHelper.queryExecutionCreator,
-      jarClassLoader,
       HiveSessionState.createPlannerCreator(
         sparkSession,
         confCopy,
@@ -158,7 +157,7 @@ object HiveSessionState {
 
     val catalog = HiveSessionCatalog(
       sparkSession,
-      SessionState.createFunctionResourceLoader(sparkContext, initHelper.jarClassLoader),
+      SessionState.createFunctionResourceLoader(sparkContext, sparkSession.sharedState),
       initHelper.functionRegistry,
       initHelper.conf,
       SessionState.newHadoopConf(sparkContext.hadoopConfiguration, initHelper.conf),
@@ -179,6 +178,7 @@ object HiveSessionState {
 
     new HiveSessionState(
       sparkContext,
+      sparkSession.sharedState,
       initHelper.conf,
       initHelper.experimentalMethods,
       initHelper.functionRegistry,
@@ -188,7 +188,6 @@ object HiveSessionState {
       analyzer,
       initHelper.streamingQueryManager,
       initHelper.queryExecutionCreator,
-      initHelper.jarClassLoader,
       plannerCreator)
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index f4eb44f2afa2a..c72abfda3409c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -494,6 +494,7 @@ private[hive] class TestHiveQueryExecution(
 
 private[hive] class TestHiveSessionState(
     sparkContext: SparkContext,
+    sharedState: SharedState,
     conf: SQLConf,
     experimentalMethods: ExperimentalMethods,
     functionRegistry: org.apache.spark.sql.catalyst.analysis.FunctionRegistry,
@@ -503,10 +504,10 @@ private[hive] class TestHiveSessionState(
     analyzer: Analyzer,
     streamingQueryManager: StreamingQueryManager,
     queryExecutionCreator: LogicalPlan => TestHiveQueryExecution,
-    jarClassLoader: NonClosableMutableURLClassLoader,
     plannerCreator: () => SparkPlanner)
   extends HiveSessionState(
       sparkContext,
+      sharedState,
       conf,
       experimentalMethods,
       functionRegistry,
@@ -516,7 +517,6 @@ private[hive] class TestHiveSessionState(
       analyzer,
       streamingQueryManager,
       queryExecutionCreator,
-      jarClassLoader,
       plannerCreator) {}
 
 private[hive] object TestHiveSessionState {
@@ -540,6 +540,7 @@ private[hive] object TestHiveSessionState {
 
     new TestHiveSessionState(
       sparkSession.sparkContext,
+      sparkSession.sharedState,
       sqlConf,
       initHelper.experimentalMethods,
       initHelper.functionRegistry,
@@ -549,7 +550,6 @@ private[hive] object TestHiveSessionState {
       initHelper.analyzer,
       initHelper.streamingQueryManager,
       queryExecutionCreator,
-      initHelper.jarClassLoader,
       initHelper.plannerCreator)
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
index 6cf370e15f62f..fef94d42d5860 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
@@ -43,7 +43,7 @@ class HiveSessionCatalogSuite extends SessionCatalogSuite {
     original.createTempView("copytest1", tempTable1, overrideIfExists = false)
 
     // check if tables copied over
-    val clone = original.copy(
+    val clone = original.clone(
       sparkSession,
       new SQLConf,
       new Configuration(),

From 4f70d12dba6f89a6548ffea40879663a42902811 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Wed, 22 Feb 2017 15:59:42 -0800
Subject: [PATCH 17/31] Fix HiveSessionState clone.

---
 .../apache/spark/sql/hive/HiveSessionState.scala    | 13 ++++++-------
 .../spark/sql/hive/HiveSparkSubmitSuite.scala       |  4 +++-
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 67c19593d8ea9..edbe9df66602b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -109,12 +109,11 @@ private[hive] class HiveSessionState(
 
   override def clone(sparkSession: SparkSession): HiveSessionState = {
     val sparkContext = sparkSession.sparkContext
-    val confCopy = conf.clone()
-    val copyHelper = SessionState(sparkSession, Some(confCopy))
+    val copyHelper = super.clone(sparkSession)
     val catalogCopy = catalog.clone(
       sparkSession,
-      confCopy,
-      SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy),
+      copyHelper.conf,
+      SessionState.newHadoopConf(sparkContext.hadoopConfiguration, copyHelper.conf),
       copyHelper.functionRegistry,
       copyHelper.sqlParser)
     val hiveClient =
@@ -124,18 +123,18 @@ private[hive] class HiveSessionState(
     new HiveSessionState(
       sparkContext,
       sparkSession.sharedState,
-      confCopy,
+      copyHelper.conf,
       copyHelper.experimentalMethods,
       copyHelper.functionRegistry,
       catalogCopy,
       copyHelper.sqlParser,
       hiveClient,
-      HiveSessionState.createAnalyzer(sparkSession, catalogCopy, confCopy),
+      HiveSessionState.createAnalyzer(sparkSession, catalogCopy, copyHelper.conf),
       copyHelper.streamingQueryManager,
       copyHelper.queryExecutionCreator,
       HiveSessionState.createPlannerCreator(
         sparkSession,
-        confCopy,
+        copyHelper.conf,
         copyHelper.experimentalMethods))
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 06bbcbc134c27..86b030e44b679 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -218,7 +218,7 @@ class HiveSparkSubmitSuite
   }
 
   // TODO: SPARK-19540 re-enable this test
-  ignore("set hive.metastore.warehouse.dir") {
+  test("set hive.metastore.warehouse.dir") {
     // In this test, we set hive.metastore.warehouse.dir in hive-site.xml but
     // not set spark.sql.warehouse.dir. So, the warehouse dir should be
     // the value of hive.metastore.warehouse.dir. Also, the value of
@@ -238,6 +238,7 @@ class HiveSparkSubmitSuite
 
     // Write a hive-site.xml containing a setting of hive.metastore.warehouse.dir.
     val hiveSiteDir = Utils.createTempDir()
+    println(s"kunal $hiveSiteDir")
     val file = new File(hiveSiteDir.getCanonicalPath, "hive-site.xml")
     val bw = new BufferedWriter(new FileWriter(file))
     bw.write(hiveSiteXmlContent)
@@ -448,6 +449,7 @@ object SetWarehouseLocationTest extends Logging {
     val providedExpectedWarehouseLocation =
       sparkConf.getOption("spark.sql.test.expectedWarehouseDir")
 
+    println(s"kunal providedExpectedWarehouseLocation: $providedExpectedWarehouseLocation")
     val (sparkSession, expectedWarehouseLocation) = providedExpectedWarehouseLocation match {
       case Some(warehouseDir) =>
         // If spark.sql.test.expectedWarehouseDir is set, the warehouse dir is set

From dd2dedd6d578a3b5a75359be72677e61eea751e3 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Wed, 22 Feb 2017 20:10:48 -0800
Subject: [PATCH 18/31] Add tests for HiveSessionState. Review feedback.

---
 .../catalyst/analysis/FunctionRegistry.scala  |   2 +-
 .../sql/catalyst/catalog/SessionCatalog.scala |   4 +-
 .../catalog/SessionCatalogSuite.scala         |   1 +
 .../spark/sql/ExperimentalMethods.scala       |   2 +-
 .../org/apache/spark/sql/SparkSession.scala   |  10 +-
 .../sql/execution/datasources/rules.scala     |   1 +
 .../spark/sql/internal/SessionState.scala     |  19 ++-
 .../spark/sql/internal/SharedState.scala      |   5 +
 .../apache/spark/sql/SessionStateSuite.scala  | 136 ++++++++++++++++++
 .../spark/sql/SparkSessionBuilderSuite.scala  | 117 +--------------
 .../spark/sql/internal/CatalogSuite.scala     |   7 +-
 .../spark/sql/test/TestSQLContext.scala       |   4 +-
 .../spark/sql/hive/HiveSessionCatalog.scala   |   8 +-
 .../apache/spark/sql/hive/test/TestHive.scala |  41 +++++-
 .../sql/hive/HiveSessionStateSuite.scala      |  31 ++++
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |   4 +-
 16 files changed, 242 insertions(+), 150 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 7ad03a67a1c63..d548d97ab4ab8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -109,7 +109,7 @@ class SimpleFunctionRegistry extends FunctionRegistry {
     functionBuilders.clear()
   }
 
-  override def clone(): FunctionRegistry = synchronized {
+  override def clone(): SimpleFunctionRegistry = synchronized {
     val registry = new SimpleFunctionRegistry
     functionBuilders.iterator.foreach { case (name, (info, builder)) =>
       registry.registerFunction(name, info, builder)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 3334faeb37cf4..0c7f5567f2717 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1185,8 +1185,8 @@ class SessionCatalog(
    * Get an identical copy of the `SessionCatalog`.
    * The temporary tables and function registry are retained.
    * The table relation cache will not be populated.
-   * @note `externalCatalog` and `globalTempViewManager` are from shared state, don't need deep copy
-   * `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
+   * @note `externalCatalog` and `globalTempViewManager` are from shared state, don't need deep
+   * copy. `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
    * All arguments passed in should be associated with a particular `SparkSession`.
    */
   def clone(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index a5cd95294b5be..908e9c8f4e65f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1214,6 +1214,7 @@ class SessionCatalogSuite extends PlanTest {
     assert(clone.getTempView("copytest1") == Option(tempTable1))
 
     // check if clone and original independent
+    assert(original ne clone)
     clone.dropTable(TableIdentifier("copytest1"), ignoreIfNotExists = false, purge = false)
     assert(original.getTempView("copytest1") == Option(tempTable1))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index 3d846c7bbf256..bd8dd6ea3fe0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -46,7 +46,7 @@ class ExperimentalMethods private[sql]() {
 
   @volatile var extraOptimizations: Seq[Rule[LogicalPlan]] = Nil
 
-  def copy: ExperimentalMethods = {
+  override def clone(): ExperimentalMethods = {
     val result = new ExperimentalMethods
     result.extraStrategies = extraStrategies
     result.extraOptimizations = extraOptimizations
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 5c9cc05a9d6cd..91663943125e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -223,9 +223,9 @@ class SparkSession private(
    * :: Experimental ::
    * Create an identical copy of this `SparkSession`, sharing the underlying `SparkContext`
    * and cached data. All the state of this session (i.e. SQL configurations, temporary tables,
-   * registered functions) is also copied over.
-   * Changes to base session are not propagated to cloned session, cloned is independent
-   * after creation.
+   * registered functions) is copied over, and the cloned session is set up with the same shared
+   * state as this session. The cloned session is independent of this session, that is, any
+   * non-global change in either session is not reflected in the other.
    *
    * @note Other than the `SparkContext`, all shared state is initialized lazily.
    * This method will force the initialization of the shared state to ensure that parent
@@ -869,7 +869,6 @@ object SparkSession {
         if (options.nonEmpty) {
           logWarning("Using an existing SparkSession; some configuration may not take effect.")
         }
-        logInfo("1 using current thread's active session")
         return session
       }
 
@@ -882,7 +881,6 @@ object SparkSession {
           if (options.nonEmpty) {
             logWarning("Using an existing SparkSession; some configuration may not take effect.")
           }
-          logInfo("2 using global session")
           return session
         }
 
@@ -902,7 +900,6 @@ object SparkSession {
           if (!sc.conf.contains("spark.app.name")) {
             sc.conf.setAppName(randomAppName)
           }
-          logDebug("3 using new sparkContext")
           sc
         }
         session = new SparkSession(sparkContext)
@@ -919,7 +916,6 @@ object SparkSession {
           }
         })
       }
-      logInfo("4 using created session")
       return session
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index fda49600bfc2a..ed9e8532b79eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -66,6 +66,7 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
  * Preprocess [[CreateTable]], to do some normalization and checking.
  */
 case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+  // catalog is a def and not a val/lazy val as the latter would introduce a circular reference
   private def catalog = sparkSession.sessionState.catalog
 
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index a5f51d8b9c980..da9833613a5ce 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -50,7 +50,7 @@ private[sql] class SessionState(
     val streamingQueryManager: StreamingQueryManager,
     val queryExecutionCreator: LogicalPlan => QueryExecution) {
 
-  /*
+  /**
    * Interface exposed to the user for registering user-defined functions.
    * Note that the user-defined functions must be deterministic.
    */
@@ -59,7 +59,7 @@ private[sql] class SessionState(
   // Logical query plan optimizer.
   val optimizer: Optimizer = new SparkOptimizer(catalog, conf, experimentalMethods)
 
-  /*
+  /**
    * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s
    * that listen for execution metrics.
    */
@@ -88,14 +88,14 @@ private[sql] class SessionState(
   /**
    * Get an identical copy of the `SessionState` and associate it with the given `SparkSession`
    */
-  def clone(sparkSession: SparkSession): SessionState = {
-    val sparkContext = sparkSession.sparkContext
+  def clone(newSparkSession: SparkSession): SessionState = {
+    val sparkContext = newSparkSession.sparkContext
     val confCopy = conf.clone()
     val hadoopConfCopy = SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy)
     val functionRegistryCopy = functionRegistry.clone()
     val sqlParser: ParserInterface = new SparkSqlParser(confCopy)
     val catalogCopy = catalog.clone(confCopy, hadoopConfCopy, functionRegistryCopy, sqlParser)
-    val queryExecution = (plan: LogicalPlan) => new QueryExecution(sparkSession, plan)
+    val queryExecution = (plan: LogicalPlan) => new QueryExecution(newSparkSession, plan)
 
     sparkContext.getConf.getAll.foreach { case (k, v) =>
       confCopy.setConfString(k, v)
@@ -103,14 +103,14 @@ private[sql] class SessionState(
 
     new SessionState(
       sparkContext,
-      sparkSession.sharedState,
+      newSparkSession.sharedState,
       confCopy,
-      experimentalMethods.copy,
+      experimentalMethods.clone(),
       functionRegistryCopy,
       catalogCopy,
       sqlParser,
-      SessionState.createAnalyzer(sparkSession, catalogCopy, confCopy),
-      new StreamingQueryManager(sparkSession),
+      SessionState.createAnalyzer(newSparkSession, catalogCopy, confCopy),
+      new StreamingQueryManager(newSparkSession),
       queryExecution)
   }
 
@@ -125,7 +125,6 @@ private[sql] class SessionState(
   }
 
   def addJar(path: String): Unit = sharedState.addJar(path)
-
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 6488591612cb7..b7a1923215d4c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -140,6 +140,11 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
     SparkSession.sqlListener.get()
   }
 
+  /*
+   * This belongs here more than in `SessionState`. However, does not seem that it can be
+   * removed from `SessionState` and `HiveSessionState` without using reflection in
+   * `AddJarCommand`.
+   */
   def addJar(path: String): Unit = {
     sparkContext.addJar(path)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
new file mode 100644
index 0000000000000..6a7eca109303b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+
+class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
+
+  protected var activeSession: SparkSession = _
+
+  protected def createSession(): Unit = {
+    activeSession = SparkSession.builder().master("local").getOrCreate()
+  }
+
+  override def beforeEach(): Unit = {
+    createSession()
+  }
+
+  override def afterEach(): Unit = {
+    activeSession.stop()
+  }
+
+  test("fork new session and inherit a copy of the session state") {
+    val forkedSession = activeSession.cloneSession()
+
+    assert(forkedSession ne activeSession)
+    assert(forkedSession.sessionState ne activeSession.sessionState)
+    assert(forkedSession.conf ne activeSession.conf)
+    // the rest of copying is tested individually for each field
+  }
+
+  test("fork new session and inherit RuntimeConfig options") {
+    val key = "spark-config-clone"
+    activeSession.conf.set(key, "active")
+
+    // inheritance
+    val forkedSession = activeSession.cloneSession()
+    assert(forkedSession ne activeSession)
+    assert(forkedSession.conf ne activeSession.conf)
+    assert(forkedSession.conf.get(key) == "active")
+
+    // independence
+    forkedSession.conf.set(key, "forked")
+    assert(activeSession.conf.get(key) == "active")
+    activeSession.conf.set(key, "dontcopyme")
+    assert(forkedSession.conf.get(key) == "forked")
+  }
+
+  test("fork new session and inherit function registry and udf") {
+    activeSession.udf.register("strlenScala", (_: String).length + (_: Int))
+    val forkedSession = activeSession.cloneSession()
+
+    // inheritance
+    assert(forkedSession ne activeSession)
+    assert(forkedSession.sessionState.functionRegistry ne
+      activeSession.sessionState.functionRegistry)
+    assert(forkedSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
+
+    // independence
+    forkedSession.sessionState.functionRegistry.dropFunction("strlenScala")
+    assert(activeSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
+    activeSession.udf.register("addone", (_: Int) + 1)
+    assert(forkedSession.sessionState.functionRegistry.lookupFunction("addone").isEmpty)
+  }
+
+  test("fork new session and inherit experimental methods") {
+    object DummyRule1 extends Rule[LogicalPlan] {
+      def apply(p: LogicalPlan): LogicalPlan = p
+    }
+    object DummyRule2 extends Rule[LogicalPlan] {
+      def apply(p: LogicalPlan): LogicalPlan = p
+    }
+    val optimizations = List(DummyRule1, DummyRule2)
+
+    activeSession.experimental.extraOptimizations = optimizations
+
+    val forkedSession = activeSession.cloneSession()
+
+    // inheritance
+    assert(forkedSession ne activeSession)
+    assert(forkedSession.experimental.extraOptimizations.toSet ==
+      activeSession.experimental.extraOptimizations.toSet)
+
+    // independence
+    forkedSession.experimental.extraOptimizations = List(DummyRule2)
+    assert(activeSession.experimental.extraOptimizations == optimizations)
+    activeSession.experimental.extraOptimizations = List(DummyRule1)
+    assert(forkedSession.experimental.extraOptimizations == List(DummyRule2))
+  }
+
+  test("fork new session and run query on inherited table") {
+    def checkTableExists(sparkSession: SparkSession): Unit = {
+      QueryTest.checkAnswer(sparkSession.sql(
+        """
+          |SELECT x.str, COUNT(*)
+          |FROM df x JOIN df y ON x.str = y.str
+          |GROUP BY x.str
+        """.stripMargin),
+        Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
+    }
+
+    SparkSession.setActiveSession(activeSession)
+
+    implicit val enc = Encoders.tuple(Encoders.scalaInt, Encoders.STRING)
+    activeSession
+      .createDataset[(Int, String)](Seq(1, 2, 3).map(i => (i, i.toString)))
+      .toDF("int", "str")
+      .createOrReplaceTempView("df")
+    checkTableExists(activeSession)
+
+    val forkedSession = activeSession.cloneSession()
+    SparkSession.setActiveSession(forkedSession)
+    checkTableExists(forkedSession)
+
+    SparkSession.clearActiveSession()
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index bcd971ae818d9..386d13d07a95f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -18,18 +18,15 @@
 package org.apache.spark.sql
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.test.SharedSQLContext
 
 /**
  * Test cases for the builder pattern of [[SparkSession]].
  */
-class SparkSessionBuilderSuite extends SparkFunSuite with SharedSQLContext {
+class SparkSessionBuilderSuite extends SparkFunSuite {
 
   private var initialSession: SparkSession = _
 
-  override lazy val sparkContext: SparkContext = {
+  private lazy val sparkContext: SparkContext = {
     initialSession = SparkSession.builder()
       .master("local")
       .config("spark.ui.enabled", value = false)
@@ -126,114 +123,4 @@ class SparkSessionBuilderSuite extends SparkFunSuite with SharedSQLContext {
       session.stop()
     }
   }
-
-  test("fork new session and inherit a copy of the session state") {
-    val activeSession = SparkSession.builder().master("local").getOrCreate()
-    val forkedSession = activeSession.cloneSession()
-
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.sessionState ne activeSession.sessionState)
-    assert(forkedSession.conf ne activeSession.conf)
-    // the rest of copying is tested individually for each field
-
-    forkedSession.stop()
-  }
-
-  test("fork new session and inherit RuntimeConfig options") {
-    val activeSession = SparkSession
-      .builder()
-      .master("local")
-      .getOrCreate()
-
-    val key = "spark-config-clone"
-    activeSession.conf.set(key, "active")
-
-    // inheritance
-    val forkedSession = activeSession.cloneSession()
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.conf ne activeSession.conf)
-    assert(forkedSession.conf.get(key) == "active")
-
-    // independence
-    forkedSession.conf.set(key, "forked")
-    assert(activeSession.conf.get(key) == "active")
-    activeSession.conf.set(key, "dontcopyme")
-    assert(forkedSession.conf.get(key) == "forked")
-
-    forkedSession.stop()
-  }
-
-  test("fork new session and inherit function registry and udf") {
-    val activeSession = SparkSession.builder().master("local").getOrCreate()
-    activeSession.udf.register("strlenScala", (_: String).length + (_: Int))
-    val forkedSession = activeSession.cloneSession()
-
-    // inheritance
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.sessionState.functionRegistry ne
-      activeSession.sessionState.functionRegistry)
-    assert(forkedSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
-
-    // independence
-    forkedSession.sessionState.functionRegistry.dropFunction("strlenScala")
-    assert(activeSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
-    activeSession.udf.register("addone", (_: Int) + 1)
-    assert(forkedSession.sessionState.functionRegistry.lookupFunction("addone").isEmpty)
-
-    forkedSession.stop()
-  }
-
-  test("fork new session and inherit experimental methods") {
-    object DummyRule1 extends Rule[LogicalPlan] {
-      def apply(p: LogicalPlan): LogicalPlan = p
-    }
-    object DummyRule2 extends Rule[LogicalPlan] {
-      def apply(p: LogicalPlan): LogicalPlan = p
-    }
-    val optimizations = List(DummyRule1, DummyRule2)
-
-    val activeSession = SparkSession.builder().master("local").getOrCreate()
-    activeSession.experimental.extraOptimizations = optimizations
-
-    val forkedSession = activeSession.cloneSession()
-
-    // inheritance
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.experimental.extraOptimizations.toSet ==
-      activeSession.experimental.extraOptimizations.toSet)
-
-    // independence
-    forkedSession.experimental.extraOptimizations = List(DummyRule2)
-    assert(activeSession.experimental.extraOptimizations == optimizations)
-    activeSession.experimental.extraOptimizations = List(DummyRule1)
-    assert(forkedSession.experimental.extraOptimizations == List(DummyRule2))
-
-    forkedSession.stop()
-  }
-
-  test("fork new session and run query on inherited table") {
-    def checkTableExists(sparkSession: SparkSession): Unit = {
-      QueryTest.checkAnswer(sparkSession.sql(
-        """
-          |SELECT x.str, COUNT(*)
-          |FROM df x JOIN df y ON x.str = y.str
-          |GROUP BY x.str
-        """.stripMargin),
-        Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
-    }
-
-    val activeSession = SparkSession.builder().master("local").getOrCreate()
-    SparkSession.setActiveSession(activeSession)
-    import activeSession.implicits._
-
-    Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str").createOrReplaceTempView("df")
-    checkTableExists(activeSession)
-
-    val forkedSession = activeSession.cloneSession()
-    SparkSession.setActiveSession(forkedSession)
-    checkTableExists(forkedSession)
-
-    SparkSession.clearActiveSession()
-    forkedSession.stop()
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index fc481e45c426f..4e29c8a3fa37b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -493,7 +493,7 @@ class CatalogSuite
     }
   }
 
-  test("clone SessionCatalog") {
+  test("clone Catalog") {
     // need to test tempTables are cloned
     assert(spark.catalog.listTables().collect().isEmpty)
 
@@ -503,9 +503,12 @@ class CatalogSuite
     val forkedSession = spark.cloneSession()
     assert(forkedSession.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
 
-    dropTable("my_temp_table")
+    dropTable("my_temp_table") // drop table in original session
     assert(spark.catalog.listTables().collect().map(_.name).toSet == Set())
     assert(forkedSession.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
+    forkedSession.sessionState.catalog
+      .createTempView("fork_table", Range(1, 2, 3, 4), overrideIfExists = true)
+    assert(spark.catalog.listTables().collect().map(_.name).toSet == Set())
   }
 
   // TODO: add tests for the rest of them
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 0bcfbb110c1e2..1a10837663c3d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -65,7 +65,7 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
 
   object TestSessionState {
 
-    def createTestConf: SQLConf = {
+    def testConf: SQLConf = {
       new SQLConf {
         clear()
         override def clear(): Unit = {
@@ -77,7 +77,7 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
     }
 
     def apply(sparkSession: SparkSession): TestSessionState = {
-      val sqlConf = createTestConf
+      val sqlConf = testConf
       val initHelper = SessionState(sparkSession, Some(sqlConf))
 
       new TestSessionState(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 3d8a607a3126a..28366b5cf5b61 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -72,13 +72,13 @@ private[sql] class HiveSessionCatalog(
   }
 
   /**
-   * Get an identical copy of the `SessionCatalog`.
+   * Get an identical copy of the `HiveSessionCatalog`.
    * The temporary tables and function registry are retained.
    * The table relation cache will not be populated.
-   * @note `externalCatalog` and `globalTempViewManager` are from shared state, don't need deep copy
-   * `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
+   * @note `externalCatalog` and `globalTempViewManager` are from shared state, don't need
+   * deep copy. `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
    * All arguments passed in should be associated with `sparkSession`.
-   * This should ideally override `SessionCatalog.copy` but does not at present, since
+   * This should ideally override `SessionCatalog.clone()` but does not at present, since
    * `HiveMetastoreCatalog` is dependent on `SparkSession`.
    */
   def clone(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index c72abfda3409c..bb860d05c94a1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.{QueryExecution, SparkPlanner}
 import org.apache.spark.sql.execution.command.CacheTableCommand
 import org.apache.spark.sql.hive._
 import org.apache.spark.sql.hive.client.HiveClient
-import org.apache.spark.sql.internal.{NonClosableMutableURLClassLoader, SharedState, SQLConf}
+import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf}
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.util.{ShutdownHookManager, Utils}
@@ -116,9 +116,14 @@ class TestHiveContext(
 private[hive] class TestHiveSparkSession(
     @transient private val sc: SparkContext,
     @transient private val existingSharedState: Option[SharedState],
+    @transient private val parentSessionState: Option[SessionState],
     private val loadTestTables: Boolean)
   extends SparkSession(sc) with Logging { self =>
 
+  def this(sc: SparkContext, existingSharedState: Option[SharedState], loadTestTables: Boolean) {
+    this(sc, existingSharedState, None, loadTestTables)
+  }
+
   def this(sc: SparkContext, loadTestTables: Boolean) {
     this(
       sc,
@@ -148,12 +153,24 @@ private[hive] class TestHiveSparkSession(
   // TODO: Let's remove TestHiveSessionState. Otherwise, we are not really testing the reflection
   // logic based on the setting of CATALOG_IMPLEMENTATION.
   @transient
-  override lazy val sessionState: TestHiveSessionState = TestHiveSessionState(self)
+  override lazy val sessionState: TestHiveSessionState = {
+    parentSessionState
+      .map(_.clone(this))
+      .getOrElse(TestHiveSessionState(self))
+      .asInstanceOf[TestHiveSessionState]
+  }
 
   override def newSession(): TestHiveSparkSession = {
     new TestHiveSparkSession(sc, Some(sharedState), loadTestTables)
   }
 
+  override def cloneSession(): TestHiveSparkSession = {
+    val clonedSession =
+      new TestHiveSparkSession(sc, Some(sharedState), Some(sessionState), loadTestTables)
+    clonedSession.sessionState // force initialization
+    clonedSession
+  }
+
   private var cacheTables: Boolean = false
 
   def setCacheTables(c: Boolean): Unit = {
@@ -517,7 +534,25 @@ private[hive] class TestHiveSessionState(
       analyzer,
       streamingQueryManager,
       queryExecutionCreator,
-      plannerCreator) {}
+      plannerCreator) {
+
+  override def clone(newSparkSession: SparkSession): TestHiveSessionState = {
+    val copyHelper = super.clone(newSparkSession)
+    new TestHiveSessionState(
+      sparkContext,
+      sharedState,
+      copyHelper.conf,
+      copyHelper.experimentalMethods,
+      copyHelper.functionRegistry,
+      copyHelper.catalog,
+      copyHelper.sqlParser,
+      copyHelper.metadataHive,
+      copyHelper.analyzer,
+      copyHelper.streamingQueryManager,
+      queryExecutionCreator,
+      plannerCreator)
+  }
+}
 
 private[hive] object TestHiveSessionState {
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
new file mode 100644
index 0000000000000..553fc20a9d2cc
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.sql.SessionStateSuite
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class HiveSessionStateSuite extends SessionStateSuite with TestHiveSingleton {
+
+  override def createSession(): Unit = {
+    activeSession = hiveContext.newSession().sparkSession
+  }
+
+  override def afterEach(): Unit = {}
+
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 86b030e44b679..06bbcbc134c27 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -218,7 +218,7 @@ class HiveSparkSubmitSuite
   }
 
   // TODO: SPARK-19540 re-enable this test
-  test("set hive.metastore.warehouse.dir") {
+  ignore("set hive.metastore.warehouse.dir") {
     // In this test, we set hive.metastore.warehouse.dir in hive-site.xml but
     // not set spark.sql.warehouse.dir. So, the warehouse dir should be
     // the value of hive.metastore.warehouse.dir. Also, the value of
@@ -238,7 +238,6 @@ class HiveSparkSubmitSuite
 
     // Write a hive-site.xml containing a setting of hive.metastore.warehouse.dir.
     val hiveSiteDir = Utils.createTempDir()
-    println(s"kunal $hiveSiteDir")
     val file = new File(hiveSiteDir.getCanonicalPath, "hive-site.xml")
     val bw = new BufferedWriter(new FileWriter(file))
     bw.write(hiveSiteXmlContent)
@@ -449,7 +448,6 @@ object SetWarehouseLocationTest extends Logging {
     val providedExpectedWarehouseLocation =
       sparkConf.getOption("spark.sql.test.expectedWarehouseDir")
 
-    println(s"kunal providedExpectedWarehouseLocation: $providedExpectedWarehouseLocation")
     val (sparkSession, expectedWarehouseLocation) = providedExpectedWarehouseLocation match {
       case Some(warehouseDir) =>
         // If spark.sql.test.expectedWarehouseDir is set, the warehouse dir is set

From 8a8d47b517a308b3fd79c226865cc7e78042575c Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Thu, 23 Feb 2017 11:14:47 -0800
Subject: [PATCH 19/31] Simplify TestSQLContext. Review feedback.

---
 .../catalog/SessionCatalogSuite.scala         |  2 +-
 .../apache/spark/sql/SessionStateSuite.scala  |  1 +
 .../spark/sql/internal/CatalogSuite.scala     |  3 +
 .../spark/sql/test/TestSQLContext.scala       | 64 +++----------------
 .../spark/sql/hive/HiveSessionCatalog.scala   |  4 +-
 .../spark/sql/hive/HiveSessionState.scala     | 16 ++---
 6 files changed, 24 insertions(+), 66 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 908e9c8f4e65f..f7a9db2c53eaf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1211,10 +1211,10 @@ class SessionCatalogSuite extends PlanTest {
       new Configuration(),
       new SimpleFunctionRegistry,
       CatalystSqlParser)
+    assert(original ne clone)
     assert(clone.getTempView("copytest1") == Option(tempTable1))
 
     // check if clone and original independent
-    assert(original ne clone)
     clone.dropTable(TableIdentifier("copytest1"), ignoreIfNotExists = false, purge = false)
     assert(original.getTempView("copytest1") == Option(tempTable1))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index 6a7eca109303b..4499e99397577 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -128,6 +128,7 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
     checkTableExists(activeSession)
 
     val forkedSession = activeSession.cloneSession()
+    assert(forkedSession ne activeSession)
     SparkSession.setActiveSession(forkedSession)
     checkTableExists(forkedSession)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index 4e29c8a3fa37b..2f02eb72709e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -500,9 +500,12 @@ class CatalogSuite
     createTempTable("my_temp_table")
     assert(spark.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
 
+    // inheritance
     val forkedSession = spark.cloneSession()
+    assert(spark ne forkedSession)
     assert(forkedSession.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
 
+    // independence
     dropTable("my_temp_table") // drop table in original session
     assert(spark.catalog.listTables().collect().map(_.name).toSet == Set())
     assert(forkedSession.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index 1a10837663c3d..f73b7c58b7820 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -40,62 +40,16 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
     this(new SparkConf)
   }
 
-  class TestSessionState(
-      sparkContext: SparkContext,
-      sharedState: SharedState,
-      conf: SQLConf,
-      experimentalMethods: ExperimentalMethods,
-      functionRegistry: FunctionRegistry,
-      catalog: SessionCatalog,
-      sqlParser: ParserInterface,
-      analyzer: Analyzer,
-      streamingQueryManager: StreamingQueryManager,
-      queryExecution: LogicalPlan => QueryExecution)
-    extends SessionState(
-        sparkContext,
-        sharedState,
-        conf,
-        experimentalMethods,
-        functionRegistry,
-        catalog,
-        sqlParser,
-        analyzer,
-        streamingQueryManager,
-        queryExecution) {}
-
-  object TestSessionState {
-
-    def testConf: SQLConf = {
-      new SQLConf {
-        clear()
-        override def clear(): Unit = {
-          super.clear()
-          // Make sure we start with the default test configs even after clear
-          TestSQLContext.overrideConfs.foreach { case (key, value) => setConfString(key, value) }
-        }
-      }
-    }
-
-    def apply(sparkSession: SparkSession): TestSessionState = {
-      val sqlConf = testConf
-      val initHelper = SessionState(sparkSession, Some(sqlConf))
-
-      new TestSessionState(
-        sparkSession.sparkContext,
-        sparkSession.sharedState,
-        sqlConf,
-        initHelper.experimentalMethods,
-        initHelper.functionRegistry,
-        initHelper.catalog,
-        initHelper.sqlParser,
-        initHelper.analyzer,
-        initHelper.streamingQueryManager,
-        initHelper.queryExecutionCreator)
-    }
-  }
-
   @transient
-  override lazy val sessionState: SessionState = TestSessionState(this)
+  override lazy val sessionState: SessionState = SessionState(this, Some(
+    new SQLConf {
+      clear()
+      override def clear(): Unit = {
+        super.clear()
+        // Make sure we start with the default test configs even after clear
+        TestSQLContext.overrideConfs.foreach { case (key, value) => setConfString(key, value) }
+      }
+    }))
 
   // Needed for Java tests
   def loadTestData(): Unit = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 28366b5cf5b61..5222e1b9659c6 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -82,14 +82,14 @@ private[sql] class HiveSessionCatalog(
    * `HiveMetastoreCatalog` is dependent on `SparkSession`.
    */
   def clone(
-      sparkSession: SparkSession,
+      newSparkSession: SparkSession,
       conf: SQLConf,
       hadoopConf: Configuration,
       functionRegistry: FunctionRegistry,
       parser: ParserInterface): HiveSessionCatalog = {
 
     val catalog = HiveSessionCatalog(
-      sparkSession,
+      newSparkSession,
       functionResourceLoader,
       functionRegistry,
       conf,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index edbe9df66602b..10a3102e7a7ee 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -107,33 +107,33 @@ private[hive] class HiveSessionState(
     conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC)
   }
 
-  override def clone(sparkSession: SparkSession): HiveSessionState = {
-    val sparkContext = sparkSession.sparkContext
-    val copyHelper = super.clone(sparkSession)
+  override def clone(newSparkSession: SparkSession): HiveSessionState = {
+    val sparkContext = newSparkSession.sparkContext
+    val copyHelper = super.clone(newSparkSession)
     val catalogCopy = catalog.clone(
-      sparkSession,
+      newSparkSession,
       copyHelper.conf,
       SessionState.newHadoopConf(sparkContext.hadoopConfiguration, copyHelper.conf),
       copyHelper.functionRegistry,
       copyHelper.sqlParser)
     val hiveClient =
-      sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
+      newSparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
         .newSession()
 
     new HiveSessionState(
       sparkContext,
-      sparkSession.sharedState,
+      newSparkSession.sharedState,
       copyHelper.conf,
       copyHelper.experimentalMethods,
       copyHelper.functionRegistry,
       catalogCopy,
       copyHelper.sqlParser,
       hiveClient,
-      HiveSessionState.createAnalyzer(sparkSession, catalogCopy, copyHelper.conf),
+      HiveSessionState.createAnalyzer(newSparkSession, catalogCopy, copyHelper.conf),
       copyHelper.streamingQueryManager,
       copyHelper.queryExecutionCreator,
       HiveSessionState.createPlannerCreator(
-        sparkSession,
+        newSparkSession,
         copyHelper.conf,
         copyHelper.experimentalMethods))
   }

From ffc205826de95c9a467bc68509f9915137440799 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Thu, 23 Feb 2017 17:28:21 -0800
Subject: [PATCH 20/31] (attempt to) Fix tests.

---
 .../apache/spark/sql/SessionStateSuite.scala  |   4 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala |   1 +
 .../sql/hive/client/HiveClientImpl.scala      |   1 +
 .../apache/spark/sql/hive/test/TestHive.scala | 135 +++++-------------
 .../sql/hive/HiveSessionStateSuite.scala      |  18 ++-
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |   6 +-
 6 files changed, 54 insertions(+), 111 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index 4499e99397577..efde669ee43cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -107,7 +107,7 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(forkedSession.experimental.extraOptimizations == List(DummyRule2))
   }
 
-  test("fork new session and run query on inherited table") {
+  test("fork new sessions and run query on inherited table") {
     def checkTableExists(sparkSession: SparkSession): Unit = {
       QueryTest.checkAnswer(sparkSession.sql(
         """
@@ -131,6 +131,8 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
     assert(forkedSession ne activeSession)
     SparkSession.setActiveSession(forkedSession)
     checkTableExists(forkedSession)
+    checkTableExists(activeSession.cloneSession())
+    checkTableExists(forkedSession.cloneSession())
 
     SparkSession.clearActiveSession()
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 3963a96bc599d..ea52c961cb74a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -38,6 +38,7 @@ import org.apache.spark.sql.types._
  * cleaned up to integrate more nicely with [[HiveExternalCatalog]].
  */
 private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Logging {
+  // these are def_s and not val/lazy val since the latter would introduce circular references
   private def sessionState = sparkSession.sessionState.asInstanceOf[HiveSessionState]
   private def tableRelationCache = sparkSession.sessionState.catalog.tableRelationCache
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index dc9c3ff33542d..7d793dd0d33b0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -274,6 +274,7 @@ private[hive] class HiveClientImpl(
     val original = Thread.currentThread().getContextClassLoader
     // Set the thread local metastore client to the client associated with this HiveClientImpl.
     Hive.set(client)
+    Hive.get(conf)
     // The classloader in clientLoader could be changed after addJar, always use the latest
     // classloader
     state.getConf.setClassLoader(clientLoader.classLoader)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index bb860d05c94a1..48cbea3a68d94 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -85,7 +85,7 @@ class TestHiveContext(
     new TestHiveContext(sparkSession.newSession())
   }
 
-  override def sessionState: TestHiveSessionState = sparkSession.sessionState
+  override def sessionState: HiveSessionState = sparkSession.sessionState
 
   def setCacheTables(c: Boolean): Unit = {
     sparkSession.setCacheTables(c)
@@ -116,14 +116,9 @@ class TestHiveContext(
 private[hive] class TestHiveSparkSession(
     @transient private val sc: SparkContext,
     @transient private val existingSharedState: Option[SharedState],
-    @transient private val parentSessionState: Option[SessionState],
     private val loadTestTables: Boolean)
   extends SparkSession(sc) with Logging { self =>
 
-  def this(sc: SparkContext, existingSharedState: Option[SharedState], loadTestTables: Boolean) {
-    this(sc, existingSharedState, None, loadTestTables)
-  }
-
   def this(sc: SparkContext, loadTestTables: Boolean) {
     this(
       sc,
@@ -150,27 +145,44 @@ private[hive] class TestHiveSparkSession(
     existingSharedState.getOrElse(new SharedState(sc))
   }
 
-  // TODO: Let's remove TestHiveSessionState. Otherwise, we are not really testing the reflection
-  // logic based on the setting of CATALOG_IMPLEMENTATION.
-  @transient
-  override lazy val sessionState: TestHiveSessionState = {
-    parentSessionState
-      .map(_.clone(this))
-      .getOrElse(TestHiveSessionState(self))
-      .asInstanceOf[TestHiveSessionState]
+  private def createHiveSessionState: HiveSessionState = {
+    val testConf =
+      new SQLConf {
+        clear()
+        override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false)
+        override def clear(): Unit = {
+          super.clear()
+          TestHiveContext.overrideConfs.foreach { case (k, v) => setConfString(k, v) }
+        }
+      }
+    val queryExecutionCreator = (plan: LogicalPlan) => new TestHiveQueryExecution(this, plan)
+    val initHelper = HiveSessionState(this, Some(testConf))
+    sparkContext.getConf.getAll.foreach { case (k, v) =>
+      testConf.setConfString(k, v)
+    }
+
+    new HiveSessionState(
+      sparkContext,
+      sharedState,
+      testConf,
+      initHelper.experimentalMethods,
+      initHelper.functionRegistry,
+      initHelper.catalog,
+      initHelper.sqlParser,
+      initHelper.metadataHive,
+      initHelper.analyzer,
+      initHelper.streamingQueryManager,
+      queryExecutionCreator,
+      initHelper.plannerCreator)
   }
 
+  @transient
+  override lazy val sessionState: HiveSessionState = createHiveSessionState
+
   override def newSession(): TestHiveSparkSession = {
     new TestHiveSparkSession(sc, Some(sharedState), loadTestTables)
   }
 
-  override def cloneSession(): TestHiveSparkSession = {
-    val clonedSession =
-      new TestHiveSparkSession(sc, Some(sharedState), Some(sessionState), loadTestTables)
-    clonedSession.sessionState // force initialization
-    clonedSession
-  }
-
   private var cacheTables: Boolean = false
 
   def setCacheTables(c: Boolean): Unit = {
@@ -509,87 +521,6 @@ private[hive] class TestHiveQueryExecution(
   }
 }
 
-private[hive] class TestHiveSessionState(
-    sparkContext: SparkContext,
-    sharedState: SharedState,
-    conf: SQLConf,
-    experimentalMethods: ExperimentalMethods,
-    functionRegistry: org.apache.spark.sql.catalyst.analysis.FunctionRegistry,
-    catalog: HiveSessionCatalog,
-    sqlParser: ParserInterface,
-    metadataHive: HiveClient,
-    analyzer: Analyzer,
-    streamingQueryManager: StreamingQueryManager,
-    queryExecutionCreator: LogicalPlan => TestHiveQueryExecution,
-    plannerCreator: () => SparkPlanner)
-  extends HiveSessionState(
-      sparkContext,
-      sharedState,
-      conf,
-      experimentalMethods,
-      functionRegistry,
-      catalog,
-      sqlParser,
-      metadataHive,
-      analyzer,
-      streamingQueryManager,
-      queryExecutionCreator,
-      plannerCreator) {
-
-  override def clone(newSparkSession: SparkSession): TestHiveSessionState = {
-    val copyHelper = super.clone(newSparkSession)
-    new TestHiveSessionState(
-      sparkContext,
-      sharedState,
-      copyHelper.conf,
-      copyHelper.experimentalMethods,
-      copyHelper.functionRegistry,
-      copyHelper.catalog,
-      copyHelper.sqlParser,
-      copyHelper.metadataHive,
-      copyHelper.analyzer,
-      copyHelper.streamingQueryManager,
-      queryExecutionCreator,
-      plannerCreator)
-  }
-}
-
-private[hive] object TestHiveSessionState {
-
-  def createTestConf: SQLConf = {
-    new SQLConf {
-      clear()
-      override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false)
-      override def clear(): Unit = {
-        super.clear()
-        TestHiveContext.overrideConfs.foreach { case (k, v) => setConfString(k, v) }
-      }
-    }
-  }
-
-  def apply(sparkSession: TestHiveSparkSession): TestHiveSessionState = {
-    val sqlConf = createTestConf
-    val initHelper = HiveSessionState(sparkSession, Some(sqlConf))
-    val queryExecutionCreator = (plan: LogicalPlan) =>
-      new TestHiveQueryExecution(sparkSession, plan)
-
-    new TestHiveSessionState(
-      sparkSession.sparkContext,
-      sparkSession.sharedState,
-      sqlConf,
-      initHelper.experimentalMethods,
-      initHelper.functionRegistry,
-      initHelper.catalog,
-      initHelper.sqlParser,
-      initHelper.metadataHive,
-      initHelper.analyzer,
-      initHelper.streamingQueryManager,
-      queryExecutionCreator,
-      initHelper.plannerCreator)
-  }
-
-}
-
 
 private[hive] object TestHiveContext {
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
index 553fc20a9d2cc..ce51c2b1ef698 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
@@ -17,15 +17,25 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.SessionStateSuite
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.sql._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
-class HiveSessionStateSuite extends SessionStateSuite with TestHiveSingleton {
+class HiveSessionStateSuite
+  extends SessionStateSuite
+  with TestHiveSingleton
+  with BeforeAndAfterEach {
 
-  override def createSession(): Unit = {
-    activeSession = hiveContext.newSession().sparkSession
+
+  override def beforeEach(): Unit = {
+    createSession()
   }
 
   override def afterEach(): Unit = {}
 
+  override def createSession(): Unit = {
+    activeSession = hiveContext.sparkSession
+  }
+
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 06bbcbc134c27..8f0d5d886c9d5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -217,8 +217,7 @@ class HiveSparkSubmitSuite
     runSparkSubmit(args)
   }
 
-  // TODO: SPARK-19540 re-enable this test
-  ignore("set hive.metastore.warehouse.dir") {
+  test("set hive.metastore.warehouse.dir") {
     // In this test, we set hive.metastore.warehouse.dir in hive-site.xml but
     // not set spark.sql.warehouse.dir. So, the warehouse dir should be
     // the value of hive.metastore.warehouse.dir. Also, the value of
@@ -298,8 +297,7 @@ class HiveSparkSubmitSuite
     runSparkSubmit(args)
   }
 
-  // TODO: SPARK-19540 re-enable this test
-  ignore("SPARK-18360: default table path of tables in default database should depend on the " +
+  test("SPARK-18360: default table path of tables in default database should depend on the " +
     "location of default database") {
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val args = Seq(

From 16824f916e87fd90706f9dfd7b7dd81d87b732dd Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Thu, 23 Feb 2017 18:12:23 -0800
Subject: [PATCH 21/31] Review.

---
 .../src/main/scala/org/apache/spark/sql/SparkSession.scala | 2 +-
 .../org/apache/spark/sql/hive/client/HiveClientImpl.scala  | 1 +
 .../apache/spark/sql/hive/HiveSessionCatalogSuite.scala    | 1 +
 .../org/apache/spark/sql/hive/HiveSessionStateSuite.scala  | 7 ++-----
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 91663943125e6..8b4fa669158e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -1004,7 +1004,7 @@ object SparkSession {
 
   /**
    * Helper method to create an instance of `SessionState` based on `className` from conf.
-   * The result is either `SessionState` or `HiveSessionState`
+   * The result is either `SessionState` or `HiveSessionState`.
    */
   private def instantiateSessionState(
       className: String,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 7d793dd0d33b0..dd68893b63129 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -274,6 +274,7 @@ private[hive] class HiveClientImpl(
     val original = Thread.currentThread().getContextClassLoader
     // Set the thread local metastore client to the client associated with this HiveClientImpl.
     Hive.set(client)
+    // replace conf in cached hive with current conf
     Hive.get(conf)
     // The classloader in clientLoader could be changed after addJar, always use the latest
     // classloader
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
index fef94d42d5860..c217ec241ce77 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
@@ -49,6 +49,7 @@ class HiveSessionCatalogSuite extends SessionCatalogSuite {
       new Configuration(),
       new SimpleFunctionRegistry,
       CatalystSqlParser)
+    assert(original ne clone)
     assert(clone.getTempView("copytest1") == Option(tempTable1))
 
     // check if clone and original independent
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
index ce51c2b1ef698..7ac91aae5f92b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
@@ -22,11 +22,8 @@ import org.scalatest.BeforeAndAfterEach
 import org.apache.spark.sql._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
-class HiveSessionStateSuite
-  extends SessionStateSuite
-  with TestHiveSingleton
-  with BeforeAndAfterEach {
-
+class HiveSessionStateSuite extends SessionStateSuite
+  with TestHiveSingleton with BeforeAndAfterEach {
 
   override def beforeEach(): Unit = {
     createSession()

From fd11ee2289ae26b3061659dc26b1f09ded32d039 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Fri, 24 Feb 2017 12:01:12 -0800
Subject: [PATCH 22/31] Update test case.

---
 .../scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
index 7ac91aae5f92b..f5f3ce5aada06 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
@@ -32,7 +32,7 @@ class HiveSessionStateSuite extends SessionStateSuite
   override def afterEach(): Unit = {}
 
   override def createSession(): Unit = {
-    activeSession = hiveContext.sparkSession
+    activeSession = spark.newSession()
   }
 
 }

From 437b0bca7bc29809083f26b8a4848d53d999d097 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Fri, 24 Feb 2017 16:02:27 -0800
Subject: [PATCH 23/31] Add throwing exception if wrong SessionState clone is
 called. Update tests.

---
 .../spark/sql/internal/SessionState.scala     | 32 +++++++-------
 .../apache/spark/sql/SessionStateSuite.scala  | 15 ++-----
 .../spark/sql/internal/CatalogSuite.scala     |  1 +
 .../sql/internal/SQLConfEntrySuite.scala      |  1 +
 .../spark/sql/hive/HiveSessionCatalog.scala   | 11 ++++-
 .../spark/sql/hive/HiveSessionState.scala     | 42 ++++++++++++-------
 .../apache/spark/sql/hive/test/TestHive.scala |  4 +-
 .../sql/hive/HiveSessionCatalogSuite.scala    | 14 ++-----
 8 files changed, 65 insertions(+), 55 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index da9833613a5ce..6c397e29f97c8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -17,12 +17,9 @@
 
 package org.apache.spark.sql.internal
 
-import java.io.File
-
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
 
-import org.apache.spark.SparkContext
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog._
@@ -91,15 +88,16 @@ private[sql] class SessionState(
   def clone(newSparkSession: SparkSession): SessionState = {
     val sparkContext = newSparkSession.sparkContext
     val confCopy = conf.clone()
-    val hadoopConfCopy = SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy)
     val functionRegistryCopy = functionRegistry.clone()
     val sqlParser: ParserInterface = new SparkSqlParser(confCopy)
-    val catalogCopy = catalog.clone(confCopy, hadoopConfCopy, functionRegistryCopy, sqlParser)
-    val queryExecution = (plan: LogicalPlan) => new QueryExecution(newSparkSession, plan)
+    val catalogCopy = catalog.clone(
+      confCopy,
+      SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy),
+      functionRegistryCopy,
+      sqlParser)
+    val queryExecutionCreator = (plan: LogicalPlan) => new QueryExecution(newSparkSession, plan)
 
-    sparkContext.getConf.getAll.foreach { case (k, v) =>
-      confCopy.setConfString(k, v)
-    }
+    SessionState.mergeSparkConf(confCopy, sparkContext.getConf)
 
     new SessionState(
       sparkContext,
@@ -111,7 +109,7 @@ private[sql] class SessionState(
       sqlParser,
       SessionState.createAnalyzer(newSparkSession, catalogCopy, confCopy),
       new StreamingQueryManager(newSparkSession),
-      queryExecution)
+      queryExecutionCreator)
   }
 
   // ------------------------------------------------------
@@ -144,9 +142,7 @@ object SessionState {
     val sqlConf = conf.getOrElse(new SQLConf)
 
     // Automatically extract all entries and put them in our SQLConf
-    sparkContext.getConf.getAll.foreach { case (k, v) =>
-      sqlConf.setConfString(k, v)
-    }
+    mergeSparkConf(sqlConf, sparkContext.getConf)
 
     // Internal catalog for managing functions registered by the user.
     val functionRegistry = FunctionRegistry.builtin.clone()
@@ -232,4 +228,12 @@ object SessionState {
     }
   }
 
+  /**
+   * Extract entries from `SparkConf` and put them in the `SQLConf`
+   */
+  def mergeSparkConf(sqlConf: SQLConf, sparkConf: SparkConf): Unit = {
+    sparkConf.getAll.foreach { case (k, v) =>
+      sqlConf.setConfString(k, v)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index efde669ee43cb..aec99be391908 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -39,15 +39,6 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
     activeSession.stop()
   }
 
-  test("fork new session and inherit a copy of the session state") {
-    val forkedSession = activeSession.cloneSession()
-
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.sessionState ne activeSession.sessionState)
-    assert(forkedSession.conf ne activeSession.conf)
-    // the rest of copying is tested individually for each field
-  }
-
   test("fork new session and inherit RuntimeConfig options") {
     val key = "spark-config-clone"
     activeSession.conf.set(key, "active")
@@ -97,6 +88,7 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
 
     // inheritance
     assert(forkedSession ne activeSession)
+    assert(forkedSession.experimental ne activeSession.experimental)
     assert(forkedSession.experimental.extraOptimizations.toSet ==
       activeSession.experimental.extraOptimizations.toSet)
 
@@ -129,10 +121,11 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
 
     val forkedSession = activeSession.cloneSession()
     assert(forkedSession ne activeSession)
+    assert(forkedSession.sessionState ne activeSession.sessionState)
     SparkSession.setActiveSession(forkedSession)
     checkTableExists(forkedSession)
-    checkTableExists(activeSession.cloneSession())
-    checkTableExists(forkedSession.cloneSession())
+    checkTableExists(activeSession.cloneSession()) // ability to clone multiple times
+    checkTableExists(forkedSession.cloneSession()) // clone of clone
 
     SparkSession.clearActiveSession()
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index 2f02eb72709e0..d14e0634d17eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -503,6 +503,7 @@ class CatalogSuite
     // inheritance
     val forkedSession = spark.cloneSession()
     assert(spark ne forkedSession)
+    assert(spark.catalog ne forkedSession.catalog)
     assert(forkedSession.catalog.listTables().collect().map(_.name).toSet == Set("my_temp_table"))
 
     // independence
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
index 16273da754ee9..f2456c7704064 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
@@ -196,6 +196,7 @@ class SQLConfEntrySuite extends SparkFunSuite {
     // inheritance
     original.setConfString(key, "orig")
     val clone = original.clone()
+    assert(original ne clone)
     assert(clone.getConfString(key, "noentry") === "orig")
 
     // independence
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 5222e1b9659c6..fa08643384080 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.hive.ql.exec.{FunctionRegistry => HiveFunctionRegistry}
 import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, GenericUDF, GenericUDTF}
 
 import org.apache.spark.sql.{AnalysisException, SparkSession}
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{CatalystConf, FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.catalog.{FunctionResourceLoader, GlobalTempViewManager, SessionCatalog}
@@ -77,7 +77,7 @@ private[sql] class HiveSessionCatalog(
    * The table relation cache will not be populated.
    * @note `externalCatalog` and `globalTempViewManager` are from shared state, don't need
    * deep copy. `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
-   * All arguments passed in should be associated with `sparkSession`.
+   * All arguments passed in should be associated with `newSparkSession`.
    * This should ideally override `SessionCatalog.clone()` but does not at present, since
    * `HiveMetastoreCatalog` is dependent on `SparkSession`.
    */
@@ -105,6 +105,13 @@ private[sql] class HiveSessionCatalog(
     catalog
   }
 
+  override def clone(
+    conf: CatalystConf,
+    hadoopConf: Configuration,
+    functionRegistry: FunctionRegistry,
+    parser: ParserInterface): HiveSessionCatalog = throw new UnsupportedOperationException(
+    "to clone HiveSessionCatalog, use the other clone method that also accepts a SparkSession")
+
   // For testing only
   private[hive] def getCachedDataSourceTable(table: TableIdentifier): LogicalPlan = {
     val key = metastoreCatalog.getQualifiedTableName(table)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 10a3102e7a7ee..39d24a99e4595 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.{QueryExecution, SparkPlanner}
+import org.apache.spark.sql.execution.{QueryExecution, SparkPlanner, SparkSqlParser}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf}
@@ -107,35 +107,47 @@ private[hive] class HiveSessionState(
     conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC)
   }
 
+  /**
+   * Get an identical copy of the `HiveSessionState`.
+   * This should ideally reuse the `SessionState.clone` but cannot do so.
+   * Doing that will throw an exception when trying to clone the catalog.
+   */
   override def clone(newSparkSession: SparkSession): HiveSessionState = {
     val sparkContext = newSparkSession.sparkContext
-    val copyHelper = super.clone(newSparkSession)
+    val confCopy = conf.clone()
+    val functionRegistryCopy = functionRegistry.clone()
+    val experimentalMethodsCopy = experimentalMethods.clone()
+    val sqlParser: ParserInterface = new SparkSqlParser(confCopy)
     val catalogCopy = catalog.clone(
       newSparkSession,
-      copyHelper.conf,
-      SessionState.newHadoopConf(sparkContext.hadoopConfiguration, copyHelper.conf),
-      copyHelper.functionRegistry,
-      copyHelper.sqlParser)
+      confCopy,
+      SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy),
+      functionRegistryCopy,
+      sqlParser)
+    val queryExecutionCreator = (plan: LogicalPlan) => new QueryExecution(newSparkSession, plan)
+
     val hiveClient =
       newSparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
         .newSession()
 
+    SessionState.mergeSparkConf(confCopy, sparkContext.getConf)
+
     new HiveSessionState(
       sparkContext,
       newSparkSession.sharedState,
-      copyHelper.conf,
-      copyHelper.experimentalMethods,
-      copyHelper.functionRegistry,
+      confCopy,
+      experimentalMethodsCopy,
+      functionRegistryCopy,
       catalogCopy,
-      copyHelper.sqlParser,
+      sqlParser,
       hiveClient,
-      HiveSessionState.createAnalyzer(newSparkSession, catalogCopy, copyHelper.conf),
-      copyHelper.streamingQueryManager,
-      copyHelper.queryExecutionCreator,
+      HiveSessionState.createAnalyzer(newSparkSession, catalogCopy, confCopy),
+      new StreamingQueryManager(newSparkSession),
+      queryExecutionCreator,
       HiveSessionState.createPlannerCreator(
         newSparkSession,
-        copyHelper.conf,
-        copyHelper.experimentalMethods))
+        confCopy,
+        experimentalMethodsCopy))
   }
 
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 48cbea3a68d94..d4c9dc0a611bf 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -157,9 +157,7 @@ private[hive] class TestHiveSparkSession(
       }
     val queryExecutionCreator = (plan: LogicalPlan) => new TestHiveQueryExecution(this, plan)
     val initHelper = HiveSessionState(this, Some(testConf))
-    sparkContext.getConf.getAll.foreach { case (k, v) =>
-      testConf.setConfString(k, v)
-    }
+    SessionState.mergeSparkConf(testConf, sparkContext.getConf)
 
     new HiveSessionState(
       sparkContext,
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
index c217ec241ce77..7ac0774389131 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
@@ -30,21 +30,16 @@ import org.apache.spark.sql.internal.SQLConf
 class HiveSessionCatalogSuite extends SessionCatalogSuite {
 
   test("clone HiveSessionCatalog") {
-    val sparkSession = SparkSession.builder().master("local").enableHiveSupport().getOrCreate()
-    val original = HiveSessionCatalog(
-      sparkSession,
-      DummyFunctionResourceLoader,
-      new SimpleFunctionRegistry,
-      new SQLConf,
-      new Configuration(),
-      CatalystSqlParser)
+    val hiveSession = SparkSession.builder().master("local").enableHiveSupport().getOrCreate()
+    assert(hiveSession.sessionState.catalog.isInstanceOf[HiveSessionCatalog])
+    val original = hiveSession.sessionState.catalog.asInstanceOf[HiveSessionCatalog]
 
     val tempTable1 = Range(1, 10, 1, 10)
     original.createTempView("copytest1", tempTable1, overrideIfExists = false)
 
     // check if tables copied over
     val clone = original.clone(
-      sparkSession,
+      hiveSession,
       new SQLConf,
       new Configuration(),
       new SimpleFunctionRegistry,
@@ -60,5 +55,4 @@ class HiveSessionCatalogSuite extends SessionCatalogSuite {
     original.createTempView("copytest2", tempTable2, overrideIfExists = false)
     assert(clone.getTempView("copytest2").isEmpty)
   }
-
 }

From 300d3a05e43dee853b452973eea8a707d486dd61 Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Mon, 27 Feb 2017 16:46:47 -0800
Subject: [PATCH 24/31] Most of the changes from review.

---
 .../sql/catalyst/catalog/SessionCatalog.scala |  6 +--
 .../catalog/SessionCatalogSuite.scala         | 33 ++++++++++++-
 .../spark/sql/ExperimentalMethods.scala       |  6 ++-
 .../spark/sql/internal/SessionState.scala     | 46 +++++++++----------
 .../spark/sql/internal/SharedState.scala      |  6 ---
 .../apache/spark/sql/SessionStateSuite.scala  | 11 -----
 .../spark/sql/internal/CatalogSuite.scala     |  3 --
 .../spark/sql/test/TestSQLContext.scala       | 15 ++----
 .../spark/sql/hive/HiveSessionCatalog.scala   | 12 ++---
 .../spark/sql/hive/HiveSessionState.scala     | 30 ++++++------
 .../apache/spark/sql/hive/test/TestHive.scala |  8 ++--
 .../sql/hive/HiveSessionCatalogSuite.scala    |  4 +-
 .../sql/hive/HiveSessionStateSuite.scala      | 14 +++++-
 13 files changed, 103 insertions(+), 91 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 0c7f5567f2717..059cd117b5c03 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -1183,9 +1183,9 @@ class SessionCatalog(
 
   /**
    * Get an identical copy of the `SessionCatalog`.
-   * The temporary tables and function registry are retained.
+   * The temporary views and function registry are retained.
    * The table relation cache will not be populated.
-   * @note `externalCatalog` and `globalTempViewManager` are from shared state, don't need deep
+   * @note `externalCatalog` and `globalTempViewManager` are from shared state, do not need deep
    * copy. `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
    * All arguments passed in should be associated with a particular `SparkSession`.
    */
@@ -1194,7 +1194,6 @@ class SessionCatalog(
       hadoopConf: Configuration,
       functionRegistry: FunctionRegistry,
       parser: ParserInterface): SessionCatalog = {
-
     val catalog = new SessionCatalog(
       externalCatalog,
       globalTempViewManager,
@@ -1212,5 +1211,4 @@ class SessionCatalog(
 
     catalog
   }
-
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index f7a9db2c53eaf..7bdf3a9f05a0e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1199,7 +1199,7 @@ class SessionCatalogSuite extends PlanTest {
     }
   }
 
-  test("copy SessionCatalog") {
+  test("clone SessionCatalog - temp views") {
     val externalCatalog = newEmptyCatalog()
     val original = new SessionCatalog(externalCatalog)
     val tempTable1 = Range(1, 10, 1, 10)
@@ -1222,4 +1222,35 @@ class SessionCatalogSuite extends PlanTest {
     original.createTempView("copytest2", tempTable2, overrideIfExists = false)
     assert(clone.getTempView("copytest2").isEmpty)
   }
+
+  test("clone SessionCatalog - current db") {
+    val externalCatalog = newEmptyCatalog()
+    val original = new SessionCatalog(externalCatalog)
+    val tempTable1 = Range(1, 10, 1, 10)
+    val db1 = "copytest1"
+    original.createTempView(db1, tempTable1, overrideIfExists = false)
+    original.setCurrentDatabase(db1)
+
+    // check if current db copied over
+    val clone = original.clone(
+      SimpleCatalystConf(caseSensitiveAnalysis = true),
+      new Configuration(),
+      new SimpleFunctionRegistry,
+      CatalystSqlParser)
+    assert(original ne clone)
+    assert(clone.getCurrentDatabase == db1)
+
+    // check if clone and original independent
+    val db2 = "copytest2"
+    val tempTable2 = Range(1, 20, 2, 20)
+    clone.createTempView(db2, tempTable2, overrideIfExists = false)
+    clone.setCurrentDatabase(db2)
+    assert(original.getCurrentDatabase == db1)
+
+    val db3 = "copytest3"
+    val tempTable3 = Range(1, 30, 2, 30)
+    original.createTempView(db3, tempTable3, overrideIfExists = false)
+    original.setCurrentDatabase(db3)
+    assert(clone.getCurrentDatabase == db2)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index bd8dd6ea3fe0f..7890394371025 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -48,8 +48,10 @@ class ExperimentalMethods private[sql]() {
 
   override def clone(): ExperimentalMethods = {
     val result = new ExperimentalMethods
-    result.extraStrategies = extraStrategies
-    result.extraOptimizations = extraOptimizations
+    synchronized {
+      result.extraStrategies = extraStrategies
+      result.extraOptimizations = extraOptimizations
+    }
     result
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 6c397e29f97c8..a1698bbba059b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.streaming.StreamingQueryManager
@@ -34,6 +35,13 @@ import org.apache.spark.sql.util.ExecutionListenerManager
 
 /**
  * A class that holds all session-specific state in a given [[SparkSession]].
+ * @param functionRegistry Internal catalog for managing functions registered by the user.
+ * @param catalog Internal catalog for managing table and database states.
+ * @param sqlParser Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
+ * @param analyzer Logical query plan analyzer for resolving unresolved attributes and relations.
+ * @param streamingQueryManager Interface to start and stop
+ *                              [[org.apache.spark.sql.streaming.StreamingQuery]]s.
+ * @param queryExecutionCreator Lambda to create a [[QueryExecution]] from a [[LogicalPlan]]
  */
 private[sql] class SessionState(
     sparkContext: SparkContext,
@@ -53,7 +61,9 @@ private[sql] class SessionState(
    */
   val udf: UDFRegistration = new UDFRegistration(functionRegistry)
 
-  // Logical query plan optimizer.
+  /**
+   *   Logical query plan optimizer.
+   */
   val optimizer: Optimizer = new SparkOptimizer(catalog, conf, experimentalMethods)
 
   /**
@@ -129,32 +139,23 @@ private[sql] class SessionState(
 object SessionState {
 
   def apply(sparkSession: SparkSession): SessionState = {
-    apply(sparkSession, None)
+    apply(sparkSession, new SQLConf)
   }
 
-  def apply(
-      sparkSession: SparkSession,
-      conf: Option[SQLConf]): SessionState = {
-
+  def apply(sparkSession: SparkSession, sqlConf: SQLConf): SessionState = {
     val sparkContext = sparkSession.sparkContext
 
-    // SQL-specific key-value configurations.
-    val sqlConf = conf.getOrElse(new SQLConf)
-
     // Automatically extract all entries and put them in our SQLConf
     mergeSparkConf(sqlConf, sparkContext.getConf)
 
-    // Internal catalog for managing functions registered by the user.
     val functionRegistry = FunctionRegistry.builtin.clone()
 
     // A class for loading resources specified by a function.
     val functionResourceLoader: FunctionResourceLoader =
       createFunctionResourceLoader(sparkContext, sparkSession.sharedState)
 
-    // Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
     val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
 
-    // Internal catalog for managing table and database states.
     val catalog = new SessionCatalog(
       sparkSession.sharedState.externalCatalog,
       sparkSession.sharedState.globalTempViewManager,
@@ -164,10 +165,8 @@ object SessionState {
       newHadoopConf(sparkContext.hadoopConfiguration, sqlConf),
       sqlParser)
 
-    // Logical query plan analyzer for resolving unresolved attributes and relations.
     val analyzer: Analyzer = createAnalyzer(sparkSession, catalog, sqlConf)
 
-    // Interface to start and stop [[StreamingQuery]]s.
     val streamingQueryManager: StreamingQueryManager = new StreamingQueryManager(sparkSession)
 
     val queryExecutionCreator = (plan: LogicalPlan) => new QueryExecution(sparkSession, plan)
@@ -188,7 +187,6 @@ object SessionState {
   def createFunctionResourceLoader(
       sparkContext: SparkContext,
       sharedState: SharedState): FunctionResourceLoader = {
-
     new FunctionResourceLoader {
       override def loadResource(resource: FunctionResource): Unit = {
         resource.resourceType match {
@@ -203,23 +201,25 @@ object SessionState {
     }
   }
 
-  def newHadoopConf(copyHadoopConf: Configuration, sqlConf: SQLConf): Configuration = {
-    val hadoopConf = new Configuration(copyHadoopConf)
-    sqlConf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, v) }
-    hadoopConf
+  def newHadoopConf(hadoopConf: Configuration, sqlConf: SQLConf): Configuration = {
+    val newHadoopConf = new Configuration(hadoopConf)
+    sqlConf.getAllConfs.foreach { case (k, v) => if (v ne null) newHadoopConf.set(k, v) }
+    newHadoopConf
   }
 
-  def createAnalyzer(
+  /**
+   * Create an logical query plan `Analyzer` with rules specific to a non-Hive `SessionState`.
+   */
+  private def createAnalyzer(
       sparkSession: SparkSession,
       catalog: SessionCatalog,
       sqlConf: SQLConf): Analyzer = {
-
     new Analyzer(catalog, sqlConf) {
-      override val extendedResolutionRules =
+      override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
         new FindDataSourceTable(sparkSession) ::
         new ResolveSQLOnFile(sparkSession) :: Nil
 
-      override val postHocResolutionRules =
+      override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
         PreprocessTableCreation(sparkSession) ::
         PreprocessTableInsertion(sqlConf) ::
         DataSourceAnalysis(sqlConf) :: Nil
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index b7a1923215d4c..57dce0574953f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -140,14 +140,8 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
     SparkSession.sqlListener.get()
   }
 
-  /*
-   * This belongs here more than in `SessionState`. However, does not seem that it can be
-   * removed from `SessionState` and `HiveSessionState` without using reflection in
-   * `AddJarCommand`.
-   */
   def addJar(path: String): Unit = {
     sparkContext.addJar(path)
-
     val uri = new Path(path).toUri
     val jarURL = if (uri.getScheme == null) {
       // `path` is a local file path without a URL scheme
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index aec99be391908..85027a35e34ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -35,10 +35,6 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
     createSession()
   }
 
-  override def afterEach(): Unit = {
-    activeSession.stop()
-  }
-
   test("fork new session and inherit RuntimeConfig options") {
     val key = "spark-config-clone"
     activeSession.conf.set(key, "active")
@@ -81,9 +77,7 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
       def apply(p: LogicalPlan): LogicalPlan = p
     }
     val optimizations = List(DummyRule1, DummyRule2)
-
     activeSession.experimental.extraOptimizations = optimizations
-
     val forkedSession = activeSession.cloneSession()
 
     // inheritance
@@ -110,8 +104,6 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
         Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
     }
 
-    SparkSession.setActiveSession(activeSession)
-
     implicit val enc = Encoders.tuple(Encoders.scalaInt, Encoders.STRING)
     activeSession
       .createDataset[(Int, String)](Seq(1, 2, 3).map(i => (i, i.toString)))
@@ -122,11 +114,8 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
     val forkedSession = activeSession.cloneSession()
     assert(forkedSession ne activeSession)
     assert(forkedSession.sessionState ne activeSession.sessionState)
-    SparkSession.setActiveSession(forkedSession)
     checkTableExists(forkedSession)
     checkTableExists(activeSession.cloneSession()) // ability to clone multiple times
     checkTableExists(forkedSession.cloneSession()) // clone of clone
-
-    SparkSession.clearActiveSession()
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index d14e0634d17eb..91bd2dae4fe5d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -514,7 +514,4 @@ class CatalogSuite
       .createTempView("fork_table", Range(1, 2, 3, 4), overrideIfExists = true)
     assert(spark.catalog.listTables().collect().map(_.name).toSet == Set())
   }
-
-  // TODO: add tests for the rest of them
-
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
index f73b7c58b7820..898a2fb4f329b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala
@@ -18,14 +18,8 @@
 package org.apache.spark.sql.test
 
 import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.sql.{ExperimentalMethods, SparkSession}
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
-import org.apache.spark.sql.catalyst.catalog.SessionCatalog
-import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.internal.{SessionState, SharedState, SQLConf}
-import org.apache.spark.sql.streaming.StreamingQueryManager
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.internal.{SessionState, SQLConf}
 
 /**
  * A special [[SparkSession]] prepared for testing.
@@ -41,7 +35,8 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
   }
 
   @transient
-  override lazy val sessionState: SessionState = SessionState(this, Some(
+  override lazy val sessionState: SessionState = SessionState(
+    this,
     new SQLConf {
       clear()
       override def clear(): Unit = {
@@ -49,7 +44,7 @@ private[sql] class TestSparkSession(sc: SparkContext) extends SparkSession(sc) {
         // Make sure we start with the default test configs even after clear
         TestSQLContext.overrideConfs.foreach { case (key, value) => setConfString(key, value) }
       }
-    }))
+    })
 
   // Needed for Java tests
   def loadTestData(): Unit = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index fa08643384080..71ab286783608 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -87,7 +87,6 @@ private[sql] class HiveSessionCatalog(
       hadoopConf: Configuration,
       functionRegistry: FunctionRegistry,
       parser: ParserInterface): HiveSessionCatalog = {
-
     val catalog = HiveSessionCatalog(
       newSparkSession,
       functionResourceLoader,
@@ -257,7 +256,6 @@ private[sql] object HiveSessionCatalog {
       conf: SQLConf,
       hadoopConf: Configuration,
       parser: ParserInterface): HiveSessionCatalog = {
-
     // Catalog for handling data source tables. TODO: This really doesn't belong here since it is
     // essentially a cache for metastore tables. However, it relies on a lot of session-specific
     // things so it would be a lot of work to split its functionality between HiveSessionCatalog
@@ -268,10 +266,10 @@ private[sql] object HiveSessionCatalog {
       sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
       sparkSession.sharedState.globalTempViewManager,
       metastoreCatalog,
-      functionResourceLoader: FunctionResourceLoader,
-      functionRegistry: FunctionRegistry,
-      conf: SQLConf,
-      hadoopConf: Configuration,
-      parser: ParserInterface)
+      functionResourceLoader,
+      functionRegistry,
+      conf,
+      hadoopConf,
+      parser)
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 39d24a99e4595..52b5947590fc0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.{QueryExecution, SparkPlanner, SparkSqlParser}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.client.HiveClient
@@ -31,6 +32,10 @@ import org.apache.spark.sql.streaming.StreamingQueryManager
 
 /**
  * A class that holds all session-specific state in a given [[SparkSession]] backed by Hive.
+ * @param catalog A Hive client used for interacting with the metastore.
+ * @param analyzer An analyzer that uses the Hive metastore.
+ * @param plannerCreator Lambda to create a [[SparkPlanner]] that converts optimized logical
+ *                       plans to physical plans.
  */
 private[hive] class HiveSessionState(
     sparkContext: SparkContext,
@@ -41,7 +46,7 @@ private[hive] class HiveSessionState(
     override val catalog: HiveSessionCatalog,
     sqlParser: ParserInterface,
     val metadataHive: HiveClient,
-    override val analyzer: Analyzer,
+    analyzer: Analyzer,
     streamingQueryManager: StreamingQueryManager,
     queryExecutionCreator: LogicalPlan => QueryExecution,
     val plannerCreator: () => SparkPlanner)
@@ -155,13 +160,10 @@ private[hive] class HiveSessionState(
 object HiveSessionState {
 
   def apply(sparkSession: SparkSession): HiveSessionState = {
-    apply(sparkSession, None)
+    apply(sparkSession, new SQLConf)
   }
 
-  def apply(
-      sparkSession: SparkSession,
-      conf: Option[SQLConf]): HiveSessionState = {
-
+  def apply(sparkSession: SparkSession, conf: SQLConf): HiveSessionState = {
     val initHelper = SessionState(sparkSession, conf)
 
     val sparkContext = sparkSession.sparkContext
@@ -174,12 +176,10 @@ object HiveSessionState {
       SessionState.newHadoopConf(sparkContext.hadoopConfiguration, initHelper.conf),
       initHelper.sqlParser)
 
-    // A Hive client used for interacting with the metastore.
     val metadataHive: HiveClient =
       sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog].client
         .newSession()
 
-    // An analyzer that uses the Hive metastore.
     val analyzer: Analyzer = createAnalyzer(sparkSession, catalog, initHelper.conf)
 
     val plannerCreator = createPlannerCreator(
@@ -202,19 +202,21 @@ object HiveSessionState {
       plannerCreator)
   }
 
-  def createAnalyzer(
+  /**
+   * Create an logical query plan `Analyzer` with rules specific to a `HiveSessionState`.
+   */
+  private def createAnalyzer(
       sparkSession: SparkSession,
       catalog: HiveSessionCatalog,
       sqlConf: SQLConf): Analyzer = {
-
     new Analyzer(catalog, sqlConf) {
-      override val extendedResolutionRules =
+      override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
         new ResolveHiveSerdeTable(sparkSession) ::
         new FindDataSourceTable(sparkSession) ::
         new FindHiveSerdeTable(sparkSession) ::
         new ResolveSQLOnFile(sparkSession) :: Nil
 
-      override val postHocResolutionRules =
+      override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
         catalog.ParquetConversions ::
         catalog.OrcConversions ::
         PreprocessTableCreation(sparkSession) ::
@@ -226,11 +228,10 @@ object HiveSessionState {
     }
   }
 
-  def createPlannerCreator(
+  private def createPlannerCreator(
       associatedSparkSession: SparkSession,
       sqlConf: SQLConf,
       experimentalMethods: ExperimentalMethods): () => SparkPlanner = {
-
     () =>
       new SparkPlanner(
           associatedSparkSession.sparkContext,
@@ -255,5 +256,4 @@ object HiveSessionState {
         }
       }
   }
-
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index d4c9dc0a611bf..15929747d1147 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -145,7 +145,8 @@ private[hive] class TestHiveSparkSession(
     existingSharedState.getOrElse(new SharedState(sc))
   }
 
-  private def createHiveSessionState: HiveSessionState = {
+  @transient
+  override lazy val sessionState: HiveSessionState = {
     val testConf =
       new SQLConf {
         clear()
@@ -156,7 +157,7 @@ private[hive] class TestHiveSparkSession(
         }
       }
     val queryExecutionCreator = (plan: LogicalPlan) => new TestHiveQueryExecution(this, plan)
-    val initHelper = HiveSessionState(this, Some(testConf))
+    val initHelper = HiveSessionState(this, testConf)
     SessionState.mergeSparkConf(testConf, sparkContext.getConf)
 
     new HiveSessionState(
@@ -174,9 +175,6 @@ private[hive] class TestHiveSparkSession(
       initHelper.plannerCreator)
   }
 
-  @transient
-  override lazy val sessionState: HiveSessionState = createHiveSessionState
-
   override def newSession(): TestHiveSparkSession = {
     new TestHiveSparkSession(sc, Some(sharedState), loadTestTables)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
index 7ac0774389131..7ebdac2a2fe53 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
@@ -19,15 +19,15 @@ package org.apache.spark.sql.hive
 
 import org.apache.hadoop.conf.Configuration
 
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.SimpleFunctionRegistry
-import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.Range
 import org.apache.spark.sql.internal.SQLConf
 
-class HiveSessionCatalogSuite extends SessionCatalogSuite {
+class HiveSessionCatalogSuite extends SparkFunSuite {
 
   test("clone HiveSessionCatalog") {
     val hiveSession = SparkSession.builder().master("local").enableHiveSupport().getOrCreate()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
index f5f3ce5aada06..536c275af6b3a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
@@ -22,6 +22,9 @@ import org.scalatest.BeforeAndAfterEach
 import org.apache.spark.sql._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
+/**
+ * Run all tests from `SessionStateSuite` with a `HiveSessionState`.
+ */
 class HiveSessionStateSuite extends SessionStateSuite
   with TestHiveSingleton with BeforeAndAfterEach {
 
@@ -31,8 +34,15 @@ class HiveSessionStateSuite extends SessionStateSuite
 
   override def afterEach(): Unit = {}
 
-  override def createSession(): Unit = {
-    activeSession = spark.newSession()
+  override def afterAll(): Unit = {
+    try {
+      hiveContext.reset()
+    } finally {
+      super.afterAll()
+    }
   }
 
+  override def createSession(): Unit = {
+    activeSession = spark.newSession() // TestHiveSparkSession from TestHiveSingleton
+  }
 }

From 3ee271f14dde66ee1aefeb4f463c36b514e4f28a Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Sun, 5 Mar 2017 21:28:14 -0800
Subject: [PATCH 25/31] All but one review feedback.

---
 .../org/apache/spark/sql/SparkSession.scala    |  6 ++++++
 .../apache/spark/sql/SessionStateSuite.scala   | 18 ++++++++++++++++--
 .../spark/sql/hive/HiveSessionStateSuite.scala |  2 ++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 8b4fa669158e4..1f23e6fa0f317 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -67,6 +67,12 @@ import org.apache.spark.util.Utils
  *     .config("spark.some.config.option", "some-value")
  *     .getOrCreate()
  * }}}
+ *
+ * @param sparkContext The Spark context associated with this Spark session.
+ * @param existingSharedState If supplied, use the existing shared state
+ *                            instead of creating a new one.
+ * @param parentSessionState If supplied, inherit all session state (i.e. temporary 
+ *                            views, SQL config, UDFs etc) from parent.
  */
 @InterfaceStability.Stable
 class SparkSession private(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index 85027a35e34ae..706de2a16a1d2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -17,18 +17,27 @@
 
 package org.apache.spark.sql
 
+import org.scalatest.BeforeAndAfterAll
 import org.scalatest.BeforeAndAfterEach
 
+import org.apache.spark.SparkContext
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 
-class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
+class SessionStateSuite extends SparkFunSuite
+    with BeforeAndAfterEach with BeforeAndAfterAll {
 
   protected var activeSession: SparkSession = _
+  protected var sparkContext: SparkContext = null
+
+  override def beforeAll(): Unit = {
+    sparkContext = SparkSession.builder().master("local").getOrCreate().sparkContext
+  }
 
   protected def createSession(): Unit = {
-    activeSession = SparkSession.builder().master("local").getOrCreate()
+    activeSession =
+      SparkSession.builder().master("local").sparkContext(sparkContext).getOrCreate()
   }
 
   override def beforeEach(): Unit = {
@@ -118,4 +127,9 @@ class SessionStateSuite extends SparkFunSuite with BeforeAndAfterEach {
     checkTableExists(activeSession.cloneSession()) // ability to clone multiple times
     checkTableExists(forkedSession.cloneSession()) // clone of clone
   }
+
+  test("fork new session and inherit reference to SharedState") {
+    val forkedSession = activeSession.cloneSession()
+    assert(activeSession.sharedState eq forkedSession.sharedState)
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
index 536c275af6b3a..3dd473fa3d187 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
@@ -34,6 +34,8 @@ class HiveSessionStateSuite extends SessionStateSuite
 
   override def afterEach(): Unit = {}
 
+  override def beforeAll(): Unit = {}
+
   override def afterAll(): Unit = {
     try {
       hiveContext.reset()

From 2740c63634b41de54a8913ea71e3c8927a9969ca Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 6 Mar 2017 13:55:33 -0800
Subject: [PATCH 26/31] Fix tests

---
 .../scala/org/apache/spark/sql/SessionStateSuite.scala |  6 ++++++
 .../apache/spark/sql/hive/HiveSessionStateSuite.scala  | 10 +---------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index 706de2a16a1d2..bd884819c6e02 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -44,6 +44,12 @@ class SessionStateSuite extends SparkFunSuite
     createSession()
   }
 
+  override def afterAll(): Unit = {
+    if (sparkContext != null) {
+      sparkContext.stop()
+    }
+  }
+
   test("fork new session and inherit RuntimeConfig options") {
     val key = "spark-config-clone"
     activeSession.conf.set(key, "active")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
index 3dd473fa3d187..fa88dc1304610 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
@@ -28,20 +28,12 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton
 class HiveSessionStateSuite extends SessionStateSuite
   with TestHiveSingleton with BeforeAndAfterEach {
 
-  override def beforeEach(): Unit = {
-    createSession()
-  }
-
   override def afterEach(): Unit = {}
 
   override def beforeAll(): Unit = {}
 
   override def afterAll(): Unit = {
-    try {
-      hiveContext.reset()
-    } finally {
-      super.afterAll()
-    }
+    hiveContext.reset()
   }
 
   override def createSession(): Unit = {

From 0f167db45f8393065ebeb43ff6d7014d642c0267 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 6 Mar 2017 14:37:44 -0800
Subject: [PATCH 27/31] Clean up tests

---
 .../apache/spark/sql/SessionStateSuite.scala  | 158 ++++++++++--------
 .../sql/hive/HiveSessionStateSuite.scala      |  15 +-
 2 files changed, 96 insertions(+), 77 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index bd884819c6e02..d7966690ab55d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql
 import org.scalatest.BeforeAndAfterAll
 import org.scalatest.BeforeAndAfterEach
 
-import org.apache.spark.SparkContext
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -28,84 +27,98 @@ import org.apache.spark.sql.catalyst.rules.Rule
 class SessionStateSuite extends SparkFunSuite
     with BeforeAndAfterEach with BeforeAndAfterAll {
 
+  /**
+   * A shared SparkSession for all tests in this suite. Make sure you reset any changes to this
+   * session as this is a singleton HiveSparkSession in HiveSessionStateSuite and it's shared
+   * with all Hive test suites.
+   */
   protected var activeSession: SparkSession = _
-  protected var sparkContext: SparkContext = null
 
   override def beforeAll(): Unit = {
-    sparkContext = SparkSession.builder().master("local").getOrCreate().sparkContext
-  }
-
-  protected def createSession(): Unit = {
-    activeSession =
-      SparkSession.builder().master("local").sparkContext(sparkContext).getOrCreate()
-  }
-
-  override def beforeEach(): Unit = {
-    createSession()
+    activeSession = SparkSession.builder().master("local").getOrCreate()
   }
 
   override def afterAll(): Unit = {
-    if (sparkContext != null) {
-      sparkContext.stop()
+    if (activeSession != null) {
+      activeSession.stop()
+      activeSession = null
     }
+    super.afterAll()
   }
 
   test("fork new session and inherit RuntimeConfig options") {
     val key = "spark-config-clone"
     activeSession.conf.set(key, "active")
-
-    // inheritance
-    val forkedSession = activeSession.cloneSession()
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.conf ne activeSession.conf)
-    assert(forkedSession.conf.get(key) == "active")
-
-    // independence
-    forkedSession.conf.set(key, "forked")
-    assert(activeSession.conf.get(key) == "active")
-    activeSession.conf.set(key, "dontcopyme")
-    assert(forkedSession.conf.get(key) == "forked")
+    try {
+      // inheritance
+      val forkedSession = activeSession.cloneSession()
+      assert(forkedSession ne activeSession)
+      assert(forkedSession.conf ne activeSession.conf)
+      assert(forkedSession.conf.get(key) == "active")
+
+      // independence
+      forkedSession.conf.set(key, "forked")
+      assert(activeSession.conf.get(key) == "active")
+      activeSession.conf.set(key, "dontcopyme")
+      assert(forkedSession.conf.get(key) == "forked")
+    } finally {
+      activeSession.conf.unset(key)
+    }
   }
 
   test("fork new session and inherit function registry and udf") {
-    activeSession.udf.register("strlenScala", (_: String).length + (_: Int))
-    val forkedSession = activeSession.cloneSession()
-
-    // inheritance
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.sessionState.functionRegistry ne
-      activeSession.sessionState.functionRegistry)
-    assert(forkedSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
-
-    // independence
-    forkedSession.sessionState.functionRegistry.dropFunction("strlenScala")
-    assert(activeSession.sessionState.functionRegistry.lookupFunction("strlenScala").nonEmpty)
-    activeSession.udf.register("addone", (_: Int) + 1)
-    assert(forkedSession.sessionState.functionRegistry.lookupFunction("addone").isEmpty)
+    val testFuncName1 = "strlenScala"
+    val testFuncName2 = "addone"
+    try {
+      activeSession.udf.register(testFuncName1, (_: String).length + (_: Int))
+      val forkedSession = activeSession.cloneSession()
+
+      // inheritance
+      assert(forkedSession ne activeSession)
+      assert(forkedSession.sessionState.functionRegistry ne
+        activeSession.sessionState.functionRegistry)
+      assert(forkedSession.sessionState.functionRegistry.lookupFunction(testFuncName1).nonEmpty)
+
+      // independence
+      forkedSession.sessionState.functionRegistry.dropFunction(testFuncName1)
+      assert(activeSession.sessionState.functionRegistry.lookupFunction(testFuncName1).nonEmpty)
+      activeSession.udf.register(testFuncName2, (_: Int) + 1)
+      assert(forkedSession.sessionState.functionRegistry.lookupFunction(testFuncName2).isEmpty)
+    } finally {
+      activeSession.sessionState.functionRegistry.dropFunction(testFuncName1)
+      activeSession.sessionState.functionRegistry.dropFunction(testFuncName2)
+    }
   }
 
   test("fork new session and inherit experimental methods") {
-    object DummyRule1 extends Rule[LogicalPlan] {
-      def apply(p: LogicalPlan): LogicalPlan = p
-    }
-    object DummyRule2 extends Rule[LogicalPlan] {
-      def apply(p: LogicalPlan): LogicalPlan = p
+    val originalExtraOptimizations = activeSession.experimental.extraOptimizations
+    val originalExtraStrategies = activeSession.experimental.extraStrategies
+    try {
+      object DummyRule1 extends Rule[LogicalPlan] {
+        def apply(p: LogicalPlan): LogicalPlan = p
+      }
+      object DummyRule2 extends Rule[LogicalPlan] {
+        def apply(p: LogicalPlan): LogicalPlan = p
+      }
+      val optimizations = List(DummyRule1, DummyRule2)
+      activeSession.experimental.extraOptimizations = optimizations
+      val forkedSession = activeSession.cloneSession()
+
+      // inheritance
+      assert(forkedSession ne activeSession)
+      assert(forkedSession.experimental ne activeSession.experimental)
+      assert(forkedSession.experimental.extraOptimizations.toSet ==
+        activeSession.experimental.extraOptimizations.toSet)
+
+      // independence
+      forkedSession.experimental.extraOptimizations = List(DummyRule2)
+      assert(activeSession.experimental.extraOptimizations == optimizations)
+      activeSession.experimental.extraOptimizations = List(DummyRule1)
+      assert(forkedSession.experimental.extraOptimizations == List(DummyRule2))
+    } finally {
+      activeSession.experimental.extraOptimizations = originalExtraOptimizations
+      activeSession.experimental.extraStrategies = originalExtraStrategies
     }
-    val optimizations = List(DummyRule1, DummyRule2)
-    activeSession.experimental.extraOptimizations = optimizations
-    val forkedSession = activeSession.cloneSession()
-
-    // inheritance
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.experimental ne activeSession.experimental)
-    assert(forkedSession.experimental.extraOptimizations.toSet ==
-      activeSession.experimental.extraOptimizations.toSet)
-
-    // independence
-    forkedSession.experimental.extraOptimizations = List(DummyRule2)
-    assert(activeSession.experimental.extraOptimizations == optimizations)
-    activeSession.experimental.extraOptimizations = List(DummyRule1)
-    assert(forkedSession.experimental.extraOptimizations == List(DummyRule2))
   }
 
   test("fork new sessions and run query on inherited table") {
@@ -119,19 +132,26 @@ class SessionStateSuite extends SparkFunSuite
         Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil)
     }
 
-    implicit val enc = Encoders.tuple(Encoders.scalaInt, Encoders.STRING)
+    val spark = activeSession
+    // Cannot use `import activeSession.implicits._` due to the compiler limitation.
+    import spark.implicits._
+
     activeSession
       .createDataset[(Int, String)](Seq(1, 2, 3).map(i => (i, i.toString)))
       .toDF("int", "str")
       .createOrReplaceTempView("df")
-    checkTableExists(activeSession)
-
-    val forkedSession = activeSession.cloneSession()
-    assert(forkedSession ne activeSession)
-    assert(forkedSession.sessionState ne activeSession.sessionState)
-    checkTableExists(forkedSession)
-    checkTableExists(activeSession.cloneSession()) // ability to clone multiple times
-    checkTableExists(forkedSession.cloneSession()) // clone of clone
+    try {
+      checkTableExists(activeSession)
+
+      val forkedSession = activeSession.cloneSession()
+      assert(forkedSession ne activeSession)
+      assert(forkedSession.sessionState ne activeSession.sessionState)
+      checkTableExists(forkedSession)
+      checkTableExists(activeSession.cloneSession()) // ability to clone multiple times
+      checkTableExists(forkedSession.cloneSession()) // clone of clone
+    } finally {
+      activeSession.sql("drop table df")
+    }
   }
 
   test("fork new session and inherit reference to SharedState") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
index fa88dc1304610..67c77fb62f4e1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionStateSuite.scala
@@ -28,15 +28,14 @@ import org.apache.spark.sql.hive.test.TestHiveSingleton
 class HiveSessionStateSuite extends SessionStateSuite
   with TestHiveSingleton with BeforeAndAfterEach {
 
-  override def afterEach(): Unit = {}
-
-  override def beforeAll(): Unit = {}
-
-  override def afterAll(): Unit = {
-    hiveContext.reset()
+  override def beforeAll(): Unit = {
+    // Reuse the singleton session
+    activeSession = spark
   }
 
-  override def createSession(): Unit = {
-    activeSession = spark.newSession() // TestHiveSparkSession from TestHiveSingleton
+  override def afterAll(): Unit = {
+    // Set activeSession to null to avoid stopping the singleton session
+    activeSession = null
+    super.afterAll()
   }
 }

From 2f0b1adf98807f0ca9f48ab46b1f5f7521c365c2 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 6 Mar 2017 16:45:51 -0800
Subject: [PATCH 28/31] fix SessionCatalogSuite

---
 .../spark/sql/catalyst/catalog/SessionCatalogSuite.scala      | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index b73ad3bec6071..4f8354aa19e86 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1225,6 +1225,10 @@ class SessionCatalogSuite extends PlanTest {
 
   test("clone SessionCatalog - current db") {
     val externalCatalog = newEmptyCatalog()
+    externalCatalog.createDatabase(newDb("copytest1"), true)
+    externalCatalog.createDatabase(newDb("copytest2"), true)
+    externalCatalog.createDatabase(newDb("copytest3"), true)
+
     val original = new SessionCatalog(externalCatalog)
     val tempTable1 = Range(1, 10, 1, 10)
     val db1 = "copytest1"

From c41e7bc1fbebf46655b7414b6c63b93853d1b843 Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Mon, 6 Mar 2017 22:56:35 -0800
Subject: [PATCH 29/31] More cleanup

---
 .../sql/catalyst/catalog/SessionCatalog.scala |  19 ++-
 .../catalog/SessionCatalogSuite.scala         |   4 +-
 .../spark/sql/ExperimentalMethods.scala       |   6 +-
 .../org/apache/spark/sql/SparkSession.scala   |  24 ++--
 .../spark/sql/internal/SessionState.scala     | 117 +++++++++++-------
 .../spark/sql/internal/SharedState.scala      |  16 ---
 .../spark/sql/hive/HiveSessionCatalog.scala   |  35 +++---
 .../spark/sql/hive/HiveSessionState.scala     |  24 ++--
 .../sql/hive/HiveSessionCatalogSuite.scala    |   2 +-
 9 files changed, 128 insertions(+), 119 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 113fc617607fa..df45c070f9129 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -50,7 +50,6 @@ object SessionCatalog {
 class SessionCatalog(
     externalCatalog: ExternalCatalog,
     globalTempViewManager: GlobalTempViewManager,
-    functionResourceLoader: FunctionResourceLoader,
     functionRegistry: FunctionRegistry,
     conf: CatalystConf,
     hadoopConf: Configuration,
@@ -66,11 +65,11 @@ class SessionCatalog(
     this(
       externalCatalog,
       new GlobalTempViewManager("global_temp"),
-      DummyFunctionResourceLoader,
       functionRegistry,
       conf,
       new Configuration(),
       CatalystSqlParser)
+    functionResourceLoader = DummyFunctionResourceLoader
   }
 
   // For testing only.
@@ -92,6 +91,8 @@ class SessionCatalog(
   @GuardedBy("this")
   protected var currentDb = formatDatabaseName(DEFAULT_DATABASE)
 
+  @volatile var functionResourceLoader: FunctionResourceLoader = _
+
   /**
    * Checks if the given name conforms the Hive standard ("[a-zA-z_0-9]+"),
    * i.e. if this name only contains characters, numbers, and _.
@@ -990,6 +991,9 @@ class SessionCatalog(
    * by a tuple (resource type, resource uri).
    */
   def loadFunctionResources(resources: Seq[FunctionResource]): Unit = {
+    if (functionResourceLoader == null) {
+      throw new IllegalStateException("functionResourceLoader has not yet been initialized")
+    }
     resources.foreach(functionResourceLoader.loadResource)
   }
 
@@ -1186,14 +1190,10 @@ class SessionCatalog(
   }
 
   /**
-   * Get an identical copy of the `SessionCatalog`.
-   * The temporary views and function registry are retained.
-   * The table relation cache will not be populated.
-   * @note `externalCatalog` and `globalTempViewManager` are from shared state, do not need deep
-   * copy. `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
-   * All arguments passed in should be associated with a particular `SparkSession`.
+   * Create a new [[SessionCatalog]] with the provided parameters. `externalCatalog` and
+   * `globalTempViewManager` are `inherited`, while `currentDb` and `tempTables` are copied.
    */
-  def clone(
+  def newSessionCatalogWith(
       conf: CatalystConf,
       hadoopConf: Configuration,
       functionRegistry: FunctionRegistry,
@@ -1201,7 +1201,6 @@ class SessionCatalog(
     val catalog = new SessionCatalog(
       externalCatalog,
       globalTempViewManager,
-      functionResourceLoader,
       functionRegistry,
       conf,
       hadoopConf,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 4f8354aa19e86..84e95e64f18f8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1206,7 +1206,7 @@ class SessionCatalogSuite extends PlanTest {
     original.createTempView("copytest1", tempTable1, overrideIfExists = false)
 
     // check if tables copied over
-    val clone = original.clone(
+    val clone = original.newSessionCatalogWith(
       SimpleCatalystConf(caseSensitiveAnalysis = true),
       new Configuration(),
       new SimpleFunctionRegistry,
@@ -1236,7 +1236,7 @@ class SessionCatalogSuite extends PlanTest {
     original.setCurrentDatabase(db1)
 
     // check if current db copied over
-    val clone = original.clone(
+    val clone = original.newSessionCatalogWith(
       SimpleCatalystConf(caseSensitiveAnalysis = true),
       new Configuration(),
       new SimpleFunctionRegistry,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
index 7890394371025..bd8dd6ea3fe0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/ExperimentalMethods.scala
@@ -48,10 +48,8 @@ class ExperimentalMethods private[sql]() {
 
   override def clone(): ExperimentalMethods = {
     val result = new ExperimentalMethods
-    synchronized {
-      result.extraStrategies = extraStrategies
-      result.extraOptimizations = extraOptimizations
-    }
+    result.extraStrategies = extraStrategies
+    result.extraOptimizations = extraOptimizations
     result
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 8726136425057..49562578b23cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -80,10 +80,6 @@ class SparkSession private(
     @transient private val parentSessionState: Option[SessionState])
   extends Serializable with Closeable with Logging { self =>
 
-  private[sql] def this(sc: SparkContext, existingSharedState: Option[SharedState]) {
-    this(sc, existingSharedState, None)
-  }
-
   private[sql] def this(sc: SparkContext) {
     this(sc, None, None)
   }
@@ -129,9 +125,11 @@ class SparkSession private(
   lazy val sessionState: SessionState = {
     parentSessionState
       .map(_.clone(this))
-      .getOrElse(SparkSession.instantiateSessionState(
-        SparkSession.sessionStateClassName(sparkContext.conf),
-        self))
+      .getOrElse {
+        SparkSession.instantiateSessionState(
+          SparkSession.sessionStateClassName(sparkContext.conf),
+          self)
+      }
   }
 
   /**
@@ -221,13 +219,12 @@ class SparkSession private(
    * @since 2.0.0
    */
   def newSession(): SparkSession = {
-    new SparkSession(sparkContext, Some(sharedState))
+    new SparkSession(sparkContext, Some(sharedState), parentSessionState = None)
   }
 
   /**
-   * :: Experimental ::
    * Create an identical copy of this `SparkSession`, sharing the underlying `SparkContext`
-   * and cached data. All the state of this session (i.e. SQL configurations, temporary tables,
+   * and shared state. All the state of this session (i.e. SQL configurations, temporary tables,
    * registered functions) is copied over, and the cloned session is set up with the same shared
    * state as this session. The cloned session is independent of this session, that is, any
    * non-global change in either session is not reflected in the other.
@@ -236,12 +233,8 @@ class SparkSession private(
    * This method will force the initialization of the shared state to ensure that parent
    * and child sessions are set up with the same shared state. If the underlying catalog
    * implementation is Hive, this will initialize the metastore, which may take some time.
-   *
-   * @since 2.2.0
    */
-  @Experimental
-  @InterfaceStability.Evolving
-  def cloneSession(): SparkSession = {
+  private[sql] def cloneSession(): SparkSession = {
     val result = new SparkSession(sparkContext, Some(sharedState), Some(sessionState))
     result.sessionState // force copy of SessionState
     result
@@ -919,6 +912,7 @@ object SparkSession {
           }
         })
       }
+
       return session
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index a1698bbba059b..89974e68cf633 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -17,7 +17,10 @@
 
 package org.apache.spark.sql.internal
 
+import java.io.File
+
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql._
@@ -35,6 +38,10 @@ import org.apache.spark.sql.util.ExecutionListenerManager
 
 /**
  * A class that holds all session-specific state in a given [[SparkSession]].
+ * @param sparkContext The [[SparkContext]].
+ * @param sharedState The shared state.
+ * @param conf SQL-specific key-value configurations.
+ * @param experimentalMethods The experimental methods.
  * @param functionRegistry Internal catalog for managing functions registered by the user.
  * @param catalog Internal catalog for managing table and database states.
  * @param sqlParser Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
@@ -55,6 +62,38 @@ private[sql] class SessionState(
     val streamingQueryManager: StreamingQueryManager,
     val queryExecutionCreator: LogicalPlan => QueryExecution) {
 
+  def newHadoopConf(): Configuration = SessionState.newHadoopConf(
+    sparkContext.hadoopConfiguration,
+    conf)
+
+  def newHadoopConfWithOptions(options: Map[String, String]): Configuration = {
+    val hadoopConf = newHadoopConf()
+    options.foreach { case (k, v) =>
+      if ((v ne null) && k != "path" && k != "paths") {
+        hadoopConf.set(k, v)
+      }
+    }
+    hadoopConf
+  }
+
+  /**
+   * A class for loading resources specified by a function.
+   */
+  val functionResourceLoader: FunctionResourceLoader = {
+    new FunctionResourceLoader {
+      override def loadResource(resource: FunctionResource): Unit = {
+        resource.resourceType match {
+          case JarResource => addJar(resource.uri)
+          case FileResource => sparkContext.addFile(resource.uri)
+          case ArchiveResource =>
+            throw new AnalysisException(
+              "Archive is not allowed to be loaded. If YARN mode is used, " +
+                "please use --archives options while calling spark-submit.")
+        }
+      }
+    }
+  }
+
   /**
    * Interface exposed to the user for registering user-defined functions.
    * Note that the user-defined functions must be deterministic.
@@ -62,35 +101,21 @@ private[sql] class SessionState(
   val udf: UDFRegistration = new UDFRegistration(functionRegistry)
 
   /**
-   *   Logical query plan optimizer.
+   * Logical query plan optimizer.
    */
   val optimizer: Optimizer = new SparkOptimizer(catalog, conf, experimentalMethods)
 
-  /**
-   * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s
-   * that listen for execution metrics.
-   */
-  val listenerManager: ExecutionListenerManager = new ExecutionListenerManager
-
   /**
    * Planner that converts optimized logical plans to physical plans.
    */
   def planner: SparkPlanner =
     new SparkPlanner(sparkContext, conf, experimentalMethods.extraStrategies)
 
-  def newHadoopConf(): Configuration = SessionState.newHadoopConf(
-    sparkContext.hadoopConfiguration,
-    conf)
-
-  def newHadoopConfWithOptions(options: Map[String, String]): Configuration = {
-    val hadoopConf = newHadoopConf()
-    options.foreach { case (k, v) =>
-      if ((v ne null) && k != "path" && k != "paths") {
-        hadoopConf.set(k, v)
-      }
-    }
-    hadoopConf
-  }
+  /**
+   * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s
+   * that listen for execution metrics.
+   */
+  val listenerManager: ExecutionListenerManager = new ExecutionListenerManager
 
   /**
    * Get an identical copy of the `SessionState` and associate it with the given `SparkSession`
@@ -100,7 +125,7 @@ private[sql] class SessionState(
     val confCopy = conf.clone()
     val functionRegistryCopy = functionRegistry.clone()
     val sqlParser: ParserInterface = new SparkSqlParser(confCopy)
-    val catalogCopy = catalog.clone(
+    val catalogCopy = catalog.newSessionCatalogWith(
       confCopy,
       SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy),
       functionRegistryCopy,
@@ -132,7 +157,26 @@ private[sql] class SessionState(
     catalog.refreshTable(sqlParser.parseTableIdentifier(tableName))
   }
 
-  def addJar(path: String): Unit = sharedState.addJar(path)
+  /**
+   * Add a jar path to [[SparkContext]] and the classloader.
+   *
+   * Note: this method seems not access any session state, but the subclass `HiveSessionState` needs
+   * to add the jar to its hive client for the current session. Hence, it still needs to be in
+   * [[SessionState]].
+   */
+  def addJar(path: String): Unit = {
+    sparkContext.addJar(path)
+    val uri = new Path(path).toUri
+    val jarURL = if (uri.getScheme == null) {
+      // `path` is a local file path without a URL scheme
+      new File(path).toURI.toURL
+    } else {
+      // `path` is a URL with a scheme
+      uri.toURL
+    }
+    sharedState.jarClassLoader.addURL(jarURL)
+    Thread.currentThread().setContextClassLoader(sharedState.jarClassLoader)
+  }
 }
 
 
@@ -150,16 +194,11 @@ object SessionState {
 
     val functionRegistry = FunctionRegistry.builtin.clone()
 
-    // A class for loading resources specified by a function.
-    val functionResourceLoader: FunctionResourceLoader =
-      createFunctionResourceLoader(sparkContext, sparkSession.sharedState)
-
     val sqlParser: ParserInterface = new SparkSqlParser(sqlConf)
 
     val catalog = new SessionCatalog(
       sparkSession.sharedState.externalCatalog,
       sparkSession.sharedState.globalTempViewManager,
-      functionResourceLoader,
       functionRegistry,
       sqlConf,
       newHadoopConf(sparkContext.hadoopConfiguration, sqlConf),
@@ -171,7 +210,7 @@ object SessionState {
 
     val queryExecutionCreator = (plan: LogicalPlan) => new QueryExecution(sparkSession, plan)
 
-    new SessionState(
+    val sessionState = new SessionState(
       sparkContext,
       sparkSession.sharedState,
       sqlConf,
@@ -182,23 +221,11 @@ object SessionState {
       analyzer,
       streamingQueryManager,
       queryExecutionCreator)
-  }
-
-  def createFunctionResourceLoader(
-      sparkContext: SparkContext,
-      sharedState: SharedState): FunctionResourceLoader = {
-    new FunctionResourceLoader {
-      override def loadResource(resource: FunctionResource): Unit = {
-        resource.resourceType match {
-          case JarResource => sharedState.addJar(resource.uri)
-          case FileResource => sparkContext.addFile(resource.uri)
-          case ArchiveResource =>
-            throw new AnalysisException(
-              "Archive is not allowed to be loaded. If YARN mode is used, " +
-                "please use --archives options while calling spark-submit.")
-        }
-      }
-    }
+    // functionResourceLoader needs to access SessionState.addJar, so it cannot be created before
+    // creating SessionState. Setting `catalog.functionResourceLoader` here is safe since the caller
+    // cannot use SessionCatalog before we return SessionState.
+    catalog.functionResourceLoader = sessionState.functionResourceLoader
+    sessionState
   }
 
   def newHadoopConf(hadoopConf: Configuration, sqlConf: SQLConf): Configuration = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index fd7b1fcf295f4..86129fa87feaa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.internal
 
-import java.io.File
-
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
@@ -146,20 +144,6 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging {
     }
     SparkSession.sqlListener.get()
   }
-
-  def addJar(path: String): Unit = {
-    sparkContext.addJar(path)
-    val uri = new Path(path).toUri
-    val jarURL = if (uri.getScheme == null) {
-      // `path` is a local file path without a URL scheme
-      new File(path).toURI.toURL
-    } else {
-      // `path` is a URL with a scheme
-      uri.toURL
-    }
-    jarClassLoader.addURL(jarURL)
-    Thread.currentThread().setContextClassLoader(jarClassLoader)
-  }
 }
 
 object SharedState {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 9512b8b48eb32..6b7599e3d3401 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -44,7 +44,6 @@ private[sql] class HiveSessionCatalog(
     externalCatalog: HiveExternalCatalog,
     globalTempViewManager: GlobalTempViewManager,
     private val metastoreCatalog: HiveMetastoreCatalog,
-    functionResourceLoader: FunctionResourceLoader,
     functionRegistry: FunctionRegistry,
     conf: SQLConf,
     hadoopConf: Configuration,
@@ -52,7 +51,6 @@ private[sql] class HiveSessionCatalog(
   extends SessionCatalog(
       externalCatalog,
       globalTempViewManager,
-      functionResourceLoader,
       functionRegistry,
       conf,
       hadoopConf,
@@ -72,16 +70,10 @@ private[sql] class HiveSessionCatalog(
   }
 
   /**
-   * Get an identical copy of the `HiveSessionCatalog`.
-   * The temporary tables and function registry are retained.
-   * The table relation cache will not be populated.
-   * @note `externalCatalog` and `globalTempViewManager` are from shared state, don't need
-   * deep copy. `FunctionResourceLoader` is effectively stateless, also does not need deep copy.
-   * All arguments passed in should be associated with `newSparkSession`.
-   * This should ideally override `SessionCatalog.clone()` but does not at present, since
-   * `HiveMetastoreCatalog` is dependent on `SparkSession`.
+   * Create a new [[HiveSessionCatalog]] with the provided parameters. `externalCatalog` and
+   * `globalTempViewManager` are `inherited`, while `currentDb` and `tempTables` are copied.
    */
-  def clone(
+  def newSessionCatalogWith(
       newSparkSession: SparkSession,
       conf: SQLConf,
       hadoopConf: Configuration,
@@ -89,7 +81,6 @@ private[sql] class HiveSessionCatalog(
       parser: ParserInterface): HiveSessionCatalog = {
     val catalog = HiveSessionCatalog(
       newSparkSession,
-      functionResourceLoader,
       functionRegistry,
       conf,
       hadoopConf,
@@ -104,11 +95,19 @@ private[sql] class HiveSessionCatalog(
     catalog
   }
 
-  override def clone(
-    conf: CatalystConf,
-    hadoopConf: Configuration,
-    functionRegistry: FunctionRegistry,
-    parser: ParserInterface): HiveSessionCatalog = throw new UnsupportedOperationException(
+  /**
+   * The parent class [[SessionCatalog]] cannot access the [[SparkSession]] class, so we cannot add
+   * a [[SparkSession]] parameter to [[SessionCatalog.newSessionCatalogWith]]. However,
+   * [[HiveSessionCatalog]] requires a [[SparkSession]] parameter, so we can a new version of
+   * `newSessionCatalogWith` and disable this one.
+   *
+   * TODO Refactor HiveSessionCatalog to not use [[SparkSession]] directly.
+   */
+  override def newSessionCatalogWith(
+      conf: CatalystConf,
+      hadoopConf: Configuration,
+      functionRegistry: FunctionRegistry,
+      parser: ParserInterface): HiveSessionCatalog = throw new UnsupportedOperationException(
     "to clone HiveSessionCatalog, use the other clone method that also accepts a SparkSession")
 
   // For testing only
@@ -256,7 +255,6 @@ private[sql] object HiveSessionCatalog {
 
   def apply(
       sparkSession: SparkSession,
-      functionResourceLoader: FunctionResourceLoader,
       functionRegistry: FunctionRegistry,
       conf: SQLConf,
       hadoopConf: Configuration,
@@ -271,7 +269,6 @@ private[sql] object HiveSessionCatalog {
       sparkSession.sharedState.externalCatalog.asInstanceOf[HiveExternalCatalog],
       sparkSession.sharedState.globalTempViewManager,
       metastoreCatalog,
-      functionResourceLoader,
       functionRegistry,
       conf,
       hadoopConf,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 1180ef70e2192..6e80ea58fe8c3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -32,10 +32,19 @@ import org.apache.spark.sql.streaming.StreamingQueryManager
 
 /**
  * A class that holds all session-specific state in a given [[SparkSession]] backed by Hive.
- * @param catalog A Hive client used for interacting with the metastore.
- * @param analyzer An analyzer that uses the Hive metastore.
- * @param plannerCreator Lambda to create a [[SparkPlanner]] that converts optimized logical
- *                       plans to physical plans.
+ * @param sparkContext The [[SparkContext]].
+ * @param sharedState The shared state.
+ * @param conf SQL-specific key-value configurations.
+ * @param experimentalMethods The experimental methods.
+ * @param functionRegistry Internal catalog for managing functions registered by the user.
+ * @param catalog Internal catalog for managing table and database states.
+ * @param sqlParser Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
+ * @param metadataHive The Hive metadata client.
+ * @param analyzer Logical query plan analyzer for resolving unresolved attributes and relations.
+ * @param streamingQueryManager Interface to start and stop
+ *                              [[org.apache.spark.sql.streaming.StreamingQuery]]s.
+ * @param queryExecutionCreator Lambda to create a [[QueryExecution]] from a [[LogicalPlan]]
+ * @param plannerCreator Lambda to create a planner that takes into account Hive-specific strategies
  */
 private[hive] class HiveSessionState(
     sparkContext: SparkContext,
@@ -123,7 +132,7 @@ private[hive] class HiveSessionState(
     val functionRegistryCopy = functionRegistry.clone()
     val experimentalMethodsCopy = experimentalMethods.clone()
     val sqlParser: ParserInterface = new SparkSqlParser(confCopy)
-    val catalogCopy = catalog.clone(
+    val catalogCopy = catalog.newSessionCatalogWith(
       newSparkSession,
       confCopy,
       SessionState.newHadoopConf(sparkContext.hadoopConfiguration, confCopy),
@@ -170,7 +179,6 @@ object HiveSessionState {
 
     val catalog = HiveSessionCatalog(
       sparkSession,
-      SessionState.createFunctionResourceLoader(sparkContext, sparkSession.sharedState),
       initHelper.functionRegistry,
       initHelper.conf,
       SessionState.newHadoopConf(sparkContext.hadoopConfiguration, initHelper.conf),
@@ -187,7 +195,7 @@ object HiveSessionState {
       initHelper.conf,
       initHelper.experimentalMethods)
 
-    new HiveSessionState(
+    val hiveSessionState = new HiveSessionState(
       sparkContext,
       sparkSession.sharedState,
       initHelper.conf,
@@ -200,6 +208,8 @@ object HiveSessionState {
       initHelper.streamingQueryManager,
       initHelper.queryExecutionCreator,
       plannerCreator)
+    catalog.functionResourceLoader = hiveSessionState.functionResourceLoader
+    hiveSessionState
   }
 
   /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
index 7ebdac2a2fe53..a6cfcb1b2d8a0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
@@ -38,7 +38,7 @@ class HiveSessionCatalogSuite extends SparkFunSuite {
     original.createTempView("copytest1", tempTable1, overrideIfExists = false)
 
     // check if tables copied over
-    val clone = original.clone(
+    val clone = original.newSessionCatalogWith(
       hiveSession,
       new SQLConf,
       new Configuration(),

From 5eb6733d7e49ff64bcd4d6fbd21acca1d4c3187f Mon Sep 17 00:00:00 2001
From: Shixiong Zhu <shixiong@databricks.com>
Date: Tue, 7 Mar 2017 11:02:28 -0800
Subject: [PATCH 30/31] More tests

---
 .../catalog/SessionCatalogSuite.scala         |  24 ++--
 .../sql/hive/HiveSessionCatalogSuite.scala    | 108 +++++++++++++-----
 2 files changed, 90 insertions(+), 42 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 84e95e64f18f8..52b8225f70604 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -1212,11 +1212,11 @@ class SessionCatalogSuite extends PlanTest {
       new SimpleFunctionRegistry,
       CatalystSqlParser)
     assert(original ne clone)
-    assert(clone.getTempView("copytest1") == Option(tempTable1))
+    assert(clone.getTempView("copytest1") == Some(tempTable1))
 
     // check if clone and original independent
     clone.dropTable(TableIdentifier("copytest1"), ignoreIfNotExists = false, purge = false)
-    assert(original.getTempView("copytest1") == Option(tempTable1))
+    assert(original.getTempView("copytest1") == Some(tempTable1))
 
     val tempTable2 = Range(1, 20, 2, 10)
     original.createTempView("copytest2", tempTable2, overrideIfExists = false)
@@ -1225,14 +1225,15 @@ class SessionCatalogSuite extends PlanTest {
 
   test("clone SessionCatalog - current db") {
     val externalCatalog = newEmptyCatalog()
-    externalCatalog.createDatabase(newDb("copytest1"), true)
-    externalCatalog.createDatabase(newDb("copytest2"), true)
-    externalCatalog.createDatabase(newDb("copytest3"), true)
+    val db1 = "db1"
+    val db2 = "db2"
+    val db3 = "db3"
+
+    externalCatalog.createDatabase(newDb(db1), ignoreIfExists = true)
+    externalCatalog.createDatabase(newDb(db2), ignoreIfExists = true)
+    externalCatalog.createDatabase(newDb(db3), ignoreIfExists = true)
 
     val original = new SessionCatalog(externalCatalog)
-    val tempTable1 = Range(1, 10, 1, 10)
-    val db1 = "copytest1"
-    original.createTempView(db1, tempTable1, overrideIfExists = false)
     original.setCurrentDatabase(db1)
 
     // check if current db copied over
@@ -1245,15 +1246,8 @@ class SessionCatalogSuite extends PlanTest {
     assert(clone.getCurrentDatabase == db1)
 
     // check if clone and original independent
-    val db2 = "copytest2"
-    val tempTable2 = Range(1, 20, 2, 20)
-    clone.createTempView(db2, tempTable2, overrideIfExists = false)
     clone.setCurrentDatabase(db2)
     assert(original.getCurrentDatabase == db1)
-
-    val db3 = "copytest3"
-    val tempTable3 = Range(1, 30, 2, 30)
-    original.createTempView(db3, tempTable3, overrideIfExists = false)
     original.setCurrentDatabase(db3)
     assert(clone.getCurrentDatabase == db2)
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
index a6cfcb1b2d8a0..3b0f59b15916c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSessionCatalogSuite.scala
@@ -17,42 +17,96 @@
 
 package org.apache.spark.sql.hive
 
+import java.net.URI
+
 import org.apache.hadoop.conf.Configuration
 
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.SimpleFunctionRegistry
+import org.apache.spark.sql.catalyst.catalog.CatalogDatabase
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.Range
+import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.util.Utils
 
-class HiveSessionCatalogSuite extends SparkFunSuite {
+class HiveSessionCatalogSuite extends TestHiveSingleton {
 
   test("clone HiveSessionCatalog") {
-    val hiveSession = SparkSession.builder().master("local").enableHiveSupport().getOrCreate()
-    assert(hiveSession.sessionState.catalog.isInstanceOf[HiveSessionCatalog])
-    val original = hiveSession.sessionState.catalog.asInstanceOf[HiveSessionCatalog]
-
-    val tempTable1 = Range(1, 10, 1, 10)
-    original.createTempView("copytest1", tempTable1, overrideIfExists = false)
-
-    // check if tables copied over
-    val clone = original.newSessionCatalogWith(
-      hiveSession,
-      new SQLConf,
-      new Configuration(),
-      new SimpleFunctionRegistry,
-      CatalystSqlParser)
-    assert(original ne clone)
-    assert(clone.getTempView("copytest1") == Option(tempTable1))
-
-    // check if clone and original independent
-    clone.dropTable(TableIdentifier("copytest1"), ignoreIfNotExists = false, purge = false)
-    assert(original.getTempView("copytest1") == Option(tempTable1))
-
-    val tempTable2 = Range(1, 20, 2, 10)
-    original.createTempView("copytest2", tempTable2, overrideIfExists = false)
-    assert(clone.getTempView("copytest2").isEmpty)
+    val original = spark.sessionState.catalog.asInstanceOf[HiveSessionCatalog]
+
+    val tempTableName1 = "copytest1"
+    val tempTableName2 = "copytest2"
+    try {
+      val tempTable1 = Range(1, 10, 1, 10)
+      original.createTempView(tempTableName1, tempTable1, overrideIfExists = false)
+
+      // check if tables copied over
+      val clone = original.newSessionCatalogWith(
+        spark,
+        new SQLConf,
+        new Configuration(),
+        new SimpleFunctionRegistry,
+        CatalystSqlParser)
+      assert(original ne clone)
+      assert(clone.getTempView(tempTableName1) == Some(tempTable1))
+
+      // check if clone and original independent
+      clone.dropTable(TableIdentifier(tempTableName1), ignoreIfNotExists = false, purge = false)
+      assert(original.getTempView(tempTableName1) == Some(tempTable1))
+
+      val tempTable2 = Range(1, 20, 2, 10)
+      original.createTempView(tempTableName2, tempTable2, overrideIfExists = false)
+      assert(clone.getTempView(tempTableName2).isEmpty)
+    } finally {
+      // Drop the created temp views from the global singleton HiveSession.
+      original.dropTable(TableIdentifier(tempTableName1), ignoreIfNotExists = true, purge = true)
+      original.dropTable(TableIdentifier(tempTableName2), ignoreIfNotExists = true, purge = true)
+    }
+  }
+
+  test("clone SessionCatalog - current db") {
+    val original = spark.sessionState.catalog.asInstanceOf[HiveSessionCatalog]
+    val originalCurrentDatabase = original.getCurrentDatabase
+    val db1 = "db1"
+    val db2 = "db2"
+    val db3 = "db3"
+    try {
+      original.createDatabase(newDb(db1), ignoreIfExists = true)
+      original.createDatabase(newDb(db2), ignoreIfExists = true)
+      original.createDatabase(newDb(db3), ignoreIfExists = true)
+
+      original.setCurrentDatabase(db1)
+
+      // check if tables copied over
+      val clone = original.newSessionCatalogWith(
+        spark,
+        new SQLConf,
+        new Configuration(),
+        new SimpleFunctionRegistry,
+        CatalystSqlParser)
+
+      // check if current db copied over
+      assert(original ne clone)
+      assert(clone.getCurrentDatabase == db1)
+
+      // check if clone and original independent
+      clone.setCurrentDatabase(db2)
+      assert(original.getCurrentDatabase == db1)
+      original.setCurrentDatabase(db3)
+      assert(clone.getCurrentDatabase == db2)
+    } finally {
+      // Drop the created databases from the global singleton HiveSession.
+      original.dropDatabase(db1, ignoreIfNotExists = true, cascade = true)
+      original.dropDatabase(db2, ignoreIfNotExists = true, cascade = true)
+      original.dropDatabase(db3, ignoreIfNotExists = true, cascade = true)
+      original.setCurrentDatabase(originalCurrentDatabase)
+    }
+  }
+
+  def newUriForDatabase(): URI = new URI(Utils.createTempDir().toURI.toString.stripSuffix("/"))
+
+  def newDb(name: String): CatalogDatabase = {
+    CatalogDatabase(name, name + " description", newUriForDatabase(), Map.empty)
   }
 }

From 05abcf801f57c861435600fcc5fb2cecf6f3e11f Mon Sep 17 00:00:00 2001
From: Kunal Khamar <kkhamar@outlook.com>
Date: Tue, 7 Mar 2017 14:01:16 -0800
Subject: [PATCH 31/31] Update tests and a param comment.

---
 .../org/apache/spark/sql/SessionStateSuite.scala      | 11 ++++++-----
 .../org/apache/spark/sql/hive/HiveSessionState.scala  |  3 ++-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
index d7966690ab55d..2d5e37242a58b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SessionStateSuite.scala
@@ -48,8 +48,9 @@ class SessionStateSuite extends SparkFunSuite
 
   test("fork new session and inherit RuntimeConfig options") {
     val key = "spark-config-clone"
-    activeSession.conf.set(key, "active")
     try {
+      activeSession.conf.set(key, "active")
+
       // inheritance
       val forkedSession = activeSession.cloneSession()
       assert(forkedSession ne activeSession)
@@ -136,11 +137,11 @@ class SessionStateSuite extends SparkFunSuite
     // Cannot use `import activeSession.implicits._` due to the compiler limitation.
     import spark.implicits._
 
-    activeSession
-      .createDataset[(Int, String)](Seq(1, 2, 3).map(i => (i, i.toString)))
-      .toDF("int", "str")
-      .createOrReplaceTempView("df")
     try {
+      activeSession
+        .createDataset[(Int, String)](Seq(1, 2, 3).map(i => (i, i.toString)))
+        .toDF("int", "str")
+        .createOrReplaceTempView("df")
       checkTableExists(activeSession)
 
       val forkedSession = activeSession.cloneSession()
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 6e80ea58fe8c3..a6647985dbb90 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -37,7 +37,8 @@ import org.apache.spark.sql.streaming.StreamingQueryManager
  * @param conf SQL-specific key-value configurations.
  * @param experimentalMethods The experimental methods.
  * @param functionRegistry Internal catalog for managing functions registered by the user.
- * @param catalog Internal catalog for managing table and database states.
+ * @param catalog Internal catalog for managing table and database states that uses Hive client for
+ *                interacting with the metastore.
  * @param sqlParser Parser that extracts expressions, plans, table identifiers etc. from SQL texts.
  * @param metadataHive The Hive metadata client.
  * @param analyzer Logical query plan analyzer for resolving unresolved attributes and relations.