From 7d001843f6a161298702aaf20079f1611b18141b Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Sun, 27 Mar 2016 13:15:54 +0000
Subject: [PATCH 01/38] init import.

---
 .../spark/sql/execution/command/ddl.scala     | 66 ++++++++++++++++++-
 1 file changed, 63 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 0e51abb44b91d..63b18c27d3247 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.{AnalysisException, Row, SQLContext}
+import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.CatalogFunction
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalog.TablePartitionSpec
@@ -86,13 +87,49 @@ case class DescribeDatabase(
     extended: Boolean)(sql: String)
   extends NativeDDLCommand(sql) with Logging
 
+/**
+ * The DDL command that creates a function.
+ * alias: the class name that implements the created function.
+ * resources: Jars, files, or archives which need to be added to the environment when the function
+ *            is referenced for the first time by a session.
+ * isTemp: indicates if it is a temporary function.
+ */
 case class CreateFunction(
     databaseName: Option[String],
     functionName: String,
     alias: String,
     resources: Seq[(String, String)],
     isTemp: Boolean)(sql: String)
-  extends NativeDDLCommand(sql) with Logging
+  extends NativeDDLCommand(sql) with Logging {
+
+  override def run(sqlContext: SQLContext): Seq[Row] = {
+    val func = FunctionIdentifier(functionName, databaseName)
+    val catalogFunc = CatalogFunction(func, alias)
+    if (isTemp) {
+      println(s"CreateFunction: $func, $catalogFunc")
+      // Set `ignoreIfExists` to false, so if the temporary function already exists,
+      // an exception will be thrown.
+      sqlContext.sessionState.catalog.createTempFunction(catalogFunc, false)
+    } else {
+      // Check if the function to create is already existing. If so, throw exception.
+      var funcExisting: Boolean = true
+      try {
+        if (sqlContext.sessionState.catalog.getFunction(func) == null) {
+          funcExisting = false
+        }
+      } catch {
+        case _: NoSuchFunctionException => funcExisting = false
+      }
+      if (funcExisting) {
+        val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
+        throw new AnalysisException(
+          s"Function '$functionName' already exists in database '$dbName'.")
+      }
+      sqlContext.sessionState.catalog.createFunction(catalogFunc)
+    }
+    Seq.empty[Row]
+  }
+}
 
 /**
  * The DDL command that drops a function.
@@ -104,7 +141,30 @@ case class DropFunction(
     functionName: String,
     ifExists: Boolean,
     isTemp: Boolean)(sql: String)
-  extends NativeDDLCommand(sql) with Logging
+  extends NativeDDLCommand(sql) with Logging {
+
+  override def run(sqlContext: SQLContext): Seq[Row] = {
+    println("call DropFunction")
+    if (isTemp) {
+      println("isTemp")
+      require(databaseName.isEmpty,
+        "attempted to drop a temporary function while specifying a database")
+      sqlContext.sessionState.catalog.dropTempFunction(functionName, ifExists)
+    } else {
+      val func = FunctionIdentifier(functionName, databaseName)
+      if (!ifExists) {
+        // getFunction can throw NoSuchFunctionException itself, or return null.
+        val existingFunc = sqlContext.sessionState.catalog.getFunction(func)
+        if (existingFunc == null) {
+          val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
+          throw new NoSuchFunctionException(dbName, functionName)
+        }
+      }
+      sqlContext.sessionState.catalog.dropFunction(func)
+    }
+    Seq.empty[Row]
+  }
+}
 
 case class AlterTableRename(
     oldName: TableIdentifier,

From 35ad7ae14ebff693a8e15dd231eb69be7061402f Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Tue, 29 Mar 2016 12:10:19 +0000
Subject: [PATCH 02/38] Fix hive temp function feature.

---
 .../catalyst/analysis/FunctionRegistry.scala  | 44 ++++++---
 .../spark/sql/execution/command/ddl.scala     |  6 +-
 .../org/apache/spark/sql/hive/hiveUDFs.scala  | 95 +++++++++++++------
 3 files changed, 101 insertions(+), 44 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index e9788b7e4dd51..96a2778b86f5c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.util.StringKeyHashMap
+import org.apache.spark.util.Utils
 
 
 /** A catalog for looking up user defined functions, used by an [[Analyzer]]. */
@@ -52,6 +53,14 @@ trait FunctionRegistry {
   /** Drop a function and return whether the function existed. */
   def dropFunction(name: String): Boolean
 
+  /** Get the builder of the specified function name and class name. */
+  def getFunctionBuilder(
+    name: String,
+    functionClassName: String): FunctionBuilder = {
+    val clazz = Utils.getContextOrSparkClassLoader.loadClass(functionClassName)
+    val (_, (_, builder)) = FunctionRegistry.expression(name, clazz.asInstanceOf[Class[Expression]])
+    builder
+  }
 }
 
 class SimpleFunctionRegistry extends FunctionRegistry {
@@ -67,9 +76,14 @@ class SimpleFunctionRegistry extends FunctionRegistry {
   }
 
   override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
+    val builder = functionBuilders.get(name)
+    if (builder.isEmpty) {
+      throw new AnalysisException(s"undefined function $name")
+    }
     val func = synchronized {
-      functionBuilders.get(name).map(_._2).getOrElse {
-        throw new AnalysisException(s"undefined function $name")
+      Try(builder.map(_._2)) match {
+        case Success(e) => e.get
+        case Failure(e) => throw new AnalysisException(e.getMessage)
       }
     }
     func(children)
@@ -337,13 +351,12 @@ object FunctionRegistry {
     fr
   }
 
-  /** See usage above. */
-  def expression[T <: Expression](name: String)
-      (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
-
+  def expression[T <: Expression](
+      name: String,
+      runtimeClass: Class[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
     // See if we can find a constructor that accepts Seq[Expression]
-    val varargCtor = Try(tag.runtimeClass.getDeclaredConstructor(classOf[Seq[_]])).toOption
-    val builder = (expressions: Seq[Expression]) => {
+    val varargCtor = Try(runtimeClass.getDeclaredConstructor(classOf[Seq[_]])).toOption
+    val builder: FunctionBuilder = (expressions: Seq[Expression]) => {
       if (varargCtor.isDefined) {
         // If there is an apply method that accepts Seq[Expression], use that one.
         Try(varargCtor.get.newInstance(expressions).asInstanceOf[Expression]) match {
@@ -353,7 +366,7 @@ object FunctionRegistry {
       } else {
         // Otherwise, find an ctor method that matches the number of arguments, and use that.
         val params = Seq.fill(expressions.size)(classOf[Expression])
-        val f = Try(tag.runtimeClass.getDeclaredConstructor(params : _*)) match {
+        val f = Try(runtimeClass.getDeclaredConstructor(params : _*)) match {
           case Success(e) =>
             e
           case Failure(e) =>
@@ -366,14 +379,19 @@ object FunctionRegistry {
       }
     }
 
-    val clazz = tag.runtimeClass
-    val df = clazz.getAnnotation(classOf[ExpressionDescription])
+    val df = runtimeClass.getAnnotation(classOf[ExpressionDescription])
     if (df != null) {
       (name,
-        (new ExpressionInfo(clazz.getCanonicalName, name, df.usage(), df.extended()),
+        (new ExpressionInfo(runtimeClass.getCanonicalName, name, df.usage(), df.extended()),
         builder))
     } else {
-      (name, (new ExpressionInfo(clazz.getCanonicalName, name), builder))
+      (name, (new ExpressionInfo(runtimeClass.getCanonicalName, name), builder))
     }
   }
+ 
+  /** See usage above. */
+  def expression[T <: Expression](name: String)
+      (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
+    expression(name, tag.runtimeClass.asInstanceOf[Class[T]])
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 63b18c27d3247..58e0f3388199b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -106,10 +106,10 @@ case class CreateFunction(
     val func = FunctionIdentifier(functionName, databaseName)
     val catalogFunc = CatalogFunction(func, alias)
     if (isTemp) {
-      println(s"CreateFunction: $func, $catalogFunc")
       // Set `ignoreIfExists` to false, so if the temporary function already exists,
       // an exception will be thrown.
-      sqlContext.sessionState.catalog.createTempFunction(catalogFunc, false)
+      val builder = sqlContext.sessionState.functionRegistry.getFunctionBuilder(functionName, alias)
+      sqlContext.sessionState.catalog.createTempFunction(functionName, builder, false)
     } else {
       // Check if the function to create is already existing. If so, throw exception.
       var funcExisting: Boolean = true
@@ -144,9 +144,7 @@ case class DropFunction(
   extends NativeDDLCommand(sql) with Logging {
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
-    println("call DropFunction")
     if (isTemp) {
-      println("isTemp")
       require(databaseName.isEmpty,
         "attempted to drop a temporary function while specifying a database")
       sqlContext.sessionState.catalog.dropTempFunction(functionName, ifExists)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index c07c428895c26..d2462bff66e05 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -55,61 +55,102 @@ private[hive] class HiveFunctionRegistry(
     }
   }
 
-  override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
-    Try(underlying.lookupFunction(name, children)).getOrElse {
-      // We only look it up to see if it exists, but do not include it in the HiveUDF since it is
-      // not always serializable.
-      val functionInfo: FunctionInfo =
-        Option(getFunctionInfo(name.toLowerCase)).getOrElse(
-          throw new AnalysisException(s"undefined function $name"))
+  override def getFunctionBuilder(
+      name: String,
+      functionClassName: String): FunctionBuilder = {
+    val hiveUDFWrapper = new HiveFunctionWrapper(functionClassName)
+    val hiveUDFClass = hiveUDFWrapper.createFunction().getClass
+    genHiveUDFBuilder(name, functionClassName, hiveUDFClass, null, hiveUDFWrapper)
+  }
 
-      val functionClassName = functionInfo.getFunctionClass.getName
+  /**
+   * Generates a Spark FunctionBuilder for a Hive UDF which is specified by a given classname.
+   */
+  def genHiveUDFBuilder(
+      name: String,
+      functionClassName: String,
+      hiveUDFClass: Class[_],
+      functionInfo: FunctionInfo = null,
+      wrapper: HiveFunctionWrapper = null): FunctionBuilder = {
+    val hiveUDFWrapper =
+      if (wrapper == null) {
+        if (functionInfo == null) {
+          new HiveFunctionWrapper(functionClassName)
+        } else {
+          new HiveFunctionWrapper(functionClassName, functionInfo.getGenericUDF)
+        }
+      } else {
+        wrapper
+      }
 
-      // When we instantiate hive UDF wrapper class, we may throw exception if the input expressions
-      // don't satisfy the hive UDF, such as type mismatch, input number mismatch, etc. Here we
-      // catch the exception and throw AnalysisException instead.
+    val builder = (children: Seq[Expression]) => {
       try {
-        if (classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) {
+        if (classOf[GenericUDFMacro].isAssignableFrom(hiveUDFClass)) {
           val udf = HiveGenericUDF(
-            name, new HiveFunctionWrapper(functionClassName, functionInfo.getGenericUDF), children)
+            name, hiveUDFWrapper, children)
           udf.dataType // Force it to check input data types.
           udf
-        } else if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) {
-          val udf = HiveSimpleUDF(name, new HiveFunctionWrapper(functionClassName), children)
+        } else if (classOf[UDF].isAssignableFrom(hiveUDFClass)) {
+          val udf = HiveSimpleUDF(name, hiveUDFWrapper, children)
           udf.dataType // Force it to check input data types.
           udf
-        } else if (classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) {
-          val udf = HiveGenericUDF(name, new HiveFunctionWrapper(functionClassName), children)
+        } else if (classOf[GenericUDF].isAssignableFrom(hiveUDFClass)) {
+          val udf = HiveGenericUDF(name, hiveUDFWrapper, children)
           udf.dataType // Force it to check input data types.
           udf
         } else if (
-          classOf[AbstractGenericUDAFResolver].isAssignableFrom(functionInfo.getFunctionClass)) {
-          val udaf = HiveUDAFFunction(name, new HiveFunctionWrapper(functionClassName), children)
+          classOf[AbstractGenericUDAFResolver].isAssignableFrom(hiveUDFClass)) {
+          val udaf = HiveUDAFFunction(name, hiveUDFWrapper, children)
           udaf.dataType // Force it to check input data types.
           udaf
-        } else if (classOf[UDAF].isAssignableFrom(functionInfo.getFunctionClass)) {
+        } else if (classOf[UDAF].isAssignableFrom(hiveUDFClass)) {
           val udaf = HiveUDAFFunction(
-            name, new HiveFunctionWrapper(functionClassName), children, isUDAFBridgeRequired = true)
+            name, hiveUDFWrapper, children, isUDAFBridgeRequired = true)
           udaf.dataType  // Force it to check input data types.
           udaf
-        } else if (classOf[GenericUDTF].isAssignableFrom(functionInfo.getFunctionClass)) {
-          val udtf = HiveGenericUDTF(name, new HiveFunctionWrapper(functionClassName), children)
+        } else if (classOf[GenericUDTF].isAssignableFrom(hiveUDFClass)) {
+          val udtf = HiveGenericUDTF(name, hiveUDFWrapper, children)
           udtf.elementTypes // Force it to check input data types.
           udtf
         } else {
-          throw new AnalysisException(s"No handler for udf ${functionInfo.getFunctionClass}")
+          throw new AnalysisException(s"No handler for udf ${hiveUDFClass}")
         }
       } catch {
         case analysisException: AnalysisException =>
-          // If the exception is an AnalysisException, just throw it.
           throw analysisException
         case throwable: Throwable =>
-          // If there is any other error, we throw an AnalysisException.
-          val errorMessage = s"No handler for Hive udf ${functionInfo.getFunctionClass} " +
+          val errorMessage = s"No handler for Hive udf ${hiveUDFClass} " +
             s"because: ${throwable.getMessage}."
           throw new AnalysisException(errorMessage)
       }
     }
+    builder
+  }
+
+  override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
+    val builder = underlying.lookupFunctionBuilder(name)
+    if (builder.isDefined) {
+      builder.get(children)
+    } else {
+      // We only look it up to see if it exists, but do not include it in the HiveUDF since it is
+      // not always serializable.
+      val functionInfo: FunctionInfo =
+        Option(getFunctionInfo(name.toLowerCase)).getOrElse(
+          throw new AnalysisException(s"undefined function $name"))
+
+      val functionClassName = functionInfo.getFunctionClass.getName
+
+      // When we instantiate hive UDF wrapper class, we may throw exception if the input expressions
+      // don't satisfy the hive UDF, such as type mismatch, input number mismatch, etc. Here we
+      // catch the exception and throw AnalysisException instead.
+      val builder =
+        if (classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) {
+          genHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass, functionInfo)
+        } else {
+          genHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass)
+        }
+      builder(children)
+    }
   }
 
   override def registerFunction(name: String, info: ExpressionInfo, builder: FunctionBuilder)

From cb29f0fb9d66b3bf3774dd9f76a82af365c39deb Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Tue, 29 Mar 2016 13:08:37 +0000
Subject: [PATCH 03/38] Fix scala style.

---
 .../apache/spark/sql/catalyst/analysis/FunctionRegistry.scala   | 2 +-
 .../main/scala/org/apache/spark/sql/execution/command/ddl.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 96a2778b86f5c..1567799a29ee6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -388,7 +388,7 @@ object FunctionRegistry {
       (name, (new ExpressionInfo(runtimeClass.getCanonicalName, name), builder))
     }
   }
- 
+
   /** See usage above. */
   def expression[T <: Expression](name: String)
       (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 58e0f3388199b..fd0702be2a0cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Row, SQLContext}
-import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException
 import org.apache.spark.sql.catalyst.catalog.CatalogFunction
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalog.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}

From 77848c910a78165416e7c069b25eaaee1b9c5a93 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Tue, 29 Mar 2016 14:02:27 +0000
Subject: [PATCH 04/38] Use HiveFunctionRegistry to find Generator function in
 HiveSqlParser.

---
 .../spark/sql/hive/HiveSessionState.scala     |  2 +-
 .../sql/hive/execution/HiveSqlParser.scala    | 19 ++++++++-------
 .../org/apache/spark/sql/hive/hiveUDFs.scala  | 24 ++++++++++++++-----
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 11ef0fd1bbbee..d2d20027c2fae 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -71,7 +71,7 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
   /**
    * Parser for HiveQl query texts.
    */
-  override lazy val sqlParser: ParserInterface = HiveSqlParser
+  override lazy val sqlParser: ParserInterface = HiveSqlParser(this)
 
   /**
    * Planner that takes into account Hive-specific strategies.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
index d6a08fcc57252..a770c9cd074ca 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
@@ -21,27 +21,28 @@ import scala.collection.JavaConverters._
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry
+import org.apache.hadoop.hive.ql.exec.{FunctionRegistry => HiveFunctionRegistry}
 import org.apache.hadoop.hive.ql.parse.EximUtil
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.ng._
 import org.apache.spark.sql.catalyst.parser.ng.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkSqlAstBuilder
-import org.apache.spark.sql.hive.{CreateTableAsSelect => CTAS, CreateViewAsSelect => CreateView}
+import org.apache.spark.sql.hive.{CreateTableAsSelect => CTAS, CreateViewAsSelect => CreateView, HiveSessionState}
 import org.apache.spark.sql.hive.{HiveGenericUDTF, HiveSerDe}
 import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
 
 /**
  * Concrete parser for HiveQl statements.
  */
-object HiveSqlParser extends AbstractSqlParser {
-  val astBuilder = new HiveSqlAstBuilder
+case class HiveSqlParser(sessionState: HiveSessionState) extends AbstractSqlParser {
+  val astBuilder = new HiveSqlAstBuilder(sessionState)
 
   override protected def nativeCommand(sqlText: String): LogicalPlan = {
     HiveNativeCommand(sqlText)
@@ -51,7 +52,7 @@ object HiveSqlParser extends AbstractSqlParser {
 /**
  * Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier.
  */
-class HiveSqlAstBuilder extends SparkSqlAstBuilder {
+class HiveSqlAstBuilder(sessionState: HiveSessionState) extends SparkSqlAstBuilder {
   import ParserUtils._
 
   /**
@@ -280,10 +281,12 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder {
       name: String,
       expressions: Seq[Expression],
       ctx: LateralViewContext): Generator = {
-    val info = Option(FunctionRegistry.getFunctionInfo(name.toLowerCase)).getOrElse {
-      throw new ParseException(s"Couldn't find Generator function '$name'", ctx)
+    val functionRegistry = sessionState.functionRegistry
+    val func = functionRegistry.lookupFunction(name, expressions)
+    func match {
+      case g: Generator => g
+      case _ => throw new ParseException(s"Couldn't find Generator function '$name'", ctx)
     }
-    HiveGenericUDTF(name, new HiveFunctionWrapper(info.getFunctionClass.getName), expressions)
   }
 
   /**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index d2462bff66e05..6e4a06bf9a853 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -88,29 +88,41 @@ private[hive] class HiveFunctionRegistry(
         if (classOf[GenericUDFMacro].isAssignableFrom(hiveUDFClass)) {
           val udf = HiveGenericUDF(
             name, hiveUDFWrapper, children)
-          udf.dataType // Force it to check input data types.
+          if (udf.resolved) {
+            udf.dataType // Force it to check input data types.
+          }
           udf
         } else if (classOf[UDF].isAssignableFrom(hiveUDFClass)) {
           val udf = HiveSimpleUDF(name, hiveUDFWrapper, children)
-          udf.dataType // Force it to check input data types.
+          if (udf.resolved) {
+            udf.dataType // Force it to check input data types.
+          }
           udf
         } else if (classOf[GenericUDF].isAssignableFrom(hiveUDFClass)) {
           val udf = HiveGenericUDF(name, hiveUDFWrapper, children)
-          udf.dataType // Force it to check input data types.
+          if (udf.resolved) {
+            udf.dataType // Force it to check input data types.
+          }
           udf
         } else if (
           classOf[AbstractGenericUDAFResolver].isAssignableFrom(hiveUDFClass)) {
           val udaf = HiveUDAFFunction(name, hiveUDFWrapper, children)
-          udaf.dataType // Force it to check input data types.
+          if (udaf.resolved) {
+            udaf.dataType // Force it to check input data types.
+          }
           udaf
         } else if (classOf[UDAF].isAssignableFrom(hiveUDFClass)) {
           val udaf = HiveUDAFFunction(
             name, hiveUDFWrapper, children, isUDAFBridgeRequired = true)
-          udaf.dataType  // Force it to check input data types.
+          if (udaf.resolved) {
+            udaf.dataType  // Force it to check input data types.
+          }
           udaf
         } else if (classOf[GenericUDTF].isAssignableFrom(hiveUDFClass)) {
           val udtf = HiveGenericUDTF(name, hiveUDFWrapper, children)
-          udtf.elementTypes // Force it to check input data types.
+          if (udtf.resolved) {
+            udtf.elementTypes // Force it to check input data types.
+          }
           udtf
         } else {
           throw new AnalysisException(s"No handler for udf ${hiveUDFClass}")

From b8dda845dde32202c60db668270e73a0d3513309 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Tue, 29 Mar 2016 15:34:50 +0000
Subject: [PATCH 05/38] Fix describe function problem.

---
 .../catalyst/analysis/FunctionRegistry.scala  |  8 +++---
 .../sql/catalyst/catalog/SessionCatalog.scala |  3 +-
 .../catalog/SessionCatalogSuite.scala         | 28 ++++++++++++-------
 .../spark/sql/execution/command/ddl.scala     |  5 ++--
 .../org/apache/spark/sql/hive/hiveUDFs.scala  |  8 ++++--
 5 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 1567799a29ee6..ed227c1eaec5b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -54,12 +54,12 @@ trait FunctionRegistry {
   def dropFunction(name: String): Boolean
 
   /** Get the builder of the specified function name and class name. */
-  def getFunctionBuilder(
+  def getFunctionBuilderAndInfo(
     name: String,
-    functionClassName: String): FunctionBuilder = {
+    functionClassName: String): (ExpressionInfo, FunctionBuilder) = {
     val clazz = Utils.getContextOrSparkClassLoader.loadClass(functionClassName)
-    val (_, (_, builder)) = FunctionRegistry.expression(name, clazz.asInstanceOf[Class[Expression]])
-    builder
+    val (_, (info, builder)) = FunctionRegistry.expression(name, clazz.asInstanceOf[Class[Expression]])
+    (info, builder)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 7165db1d5d2ad..ee39b4c470989 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -463,12 +463,13 @@ class SessionCatalog(
    */
   def createTempFunction(
       name: String,
+      info: ExpressionInfo,
       funcDefinition: FunctionBuilder,
       ignoreIfExists: Boolean): Unit = {
     if (functionRegistry.lookupFunctionBuilder(name).isDefined && !ignoreIfExists) {
       throw new AnalysisException(s"Temporary function '$name' already exists.")
     }
-    functionRegistry.registerFunction(name, funcDefinition)
+    functionRegistry.registerFunction(name, info, funcDefinition)
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index acd97592b6a6b..9282f86b32cf6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.catalog
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.{Range, SubqueryAlias}
 
 
@@ -685,18 +685,21 @@ class SessionCatalogSuite extends SparkFunSuite {
     val catalog = new SessionCatalog(newBasicCatalog())
     val tempFunc1 = (e: Seq[Expression]) => e.head
     val tempFunc2 = (e: Seq[Expression]) => e.last
-    catalog.createTempFunction("temp1", tempFunc1, ignoreIfExists = false)
-    catalog.createTempFunction("temp2", tempFunc2, ignoreIfExists = false)
+    val info1 = new ExpressionInfo("tempFunc1", "temp1")
+    val info2 = new ExpressionInfo("tempFunc2", "temp2")
+    catalog.createTempFunction("temp1", info1, tempFunc1, ignoreIfExists = false)
+    catalog.createTempFunction("temp2", info2, tempFunc2, ignoreIfExists = false)
     assert(catalog.getTempFunction("temp1") == Some(tempFunc1))
     assert(catalog.getTempFunction("temp2") == Some(tempFunc2))
     assert(catalog.getTempFunction("temp3") == None)
     val tempFunc3 = (e: Seq[Expression]) => Literal(e.size)
+    val info3 = new ExpressionInfo("tempFunc3", "temp1")
     // Temporary function already exists
     intercept[AnalysisException] {
-      catalog.createTempFunction("temp1", tempFunc3, ignoreIfExists = false)
+      catalog.createTempFunction("temp1", info3, tempFunc3, ignoreIfExists = false)
     }
     // Temporary function is overridden
-    catalog.createTempFunction("temp1", tempFunc3, ignoreIfExists = true)
+    catalog.createTempFunction("temp1", info3, tempFunc3, ignoreIfExists = true)
     assert(catalog.getTempFunction("temp1") == Some(tempFunc3))
   }
 
@@ -726,8 +729,9 @@ class SessionCatalogSuite extends SparkFunSuite {
 
   test("drop temp function") {
     val catalog = new SessionCatalog(newBasicCatalog())
+    val info = new ExpressionInfo("tempFunc", "func1")
     val tempFunc = (e: Seq[Expression]) => e.head
-    catalog.createTempFunction("func1", tempFunc, ignoreIfExists = false)
+    catalog.createTempFunction("func1", info, tempFunc, ignoreIfExists = false)
     assert(catalog.getTempFunction("func1") == Some(tempFunc))
     catalog.dropTempFunction("func1", ignoreIfNotExists = false)
     assert(catalog.getTempFunction("func1") == None)
@@ -758,8 +762,9 @@ class SessionCatalogSuite extends SparkFunSuite {
 
   test("lookup temp function") {
     val catalog = new SessionCatalog(newBasicCatalog())
+    val info1 = new ExpressionInfo("tempFunc1", "func1")
     val tempFunc1 = (e: Seq[Expression]) => e.head
-    catalog.createTempFunction("func1", tempFunc1, ignoreIfExists = false)
+    catalog.createTempFunction("func1", info1, tempFunc1, ignoreIfExists = false)
     assert(catalog.lookupFunction("func1", Seq(Literal(1), Literal(2), Literal(3))) == Literal(1))
     catalog.dropTempFunction("func1", ignoreIfNotExists = false)
     intercept[AnalysisException] {
@@ -809,8 +814,9 @@ class SessionCatalogSuite extends SparkFunSuite {
   test("rename temp function") {
     val externalCatalog = newBasicCatalog()
     val sessionCatalog = new SessionCatalog(externalCatalog)
+    val info = new ExpressionInfo("tempFunc", "func1")
     val tempFunc = (e: Seq[Expression]) => e.head
-    sessionCatalog.createTempFunction("func1", tempFunc, ignoreIfExists = false)
+    sessionCatalog.createTempFunction("func1", info, tempFunc, ignoreIfExists = false)
     sessionCatalog.setCurrentDatabase("db2")
     // If a database is specified, we'll always rename the function in that database
     sessionCatalog.renameFunction(
@@ -853,12 +859,14 @@ class SessionCatalogSuite extends SparkFunSuite {
 
   test("list functions") {
     val catalog = new SessionCatalog(newBasicCatalog())
+    val info1 = new ExpressionInfo("tempFunc1", "func1")
+    val info2 = new ExpressionInfo("tempFunc2", "yes_me")
     val tempFunc1 = (e: Seq[Expression]) => e.head
     val tempFunc2 = (e: Seq[Expression]) => e.last
     catalog.createFunction(newFunc("func2", Some("db2")))
     catalog.createFunction(newFunc("not_me", Some("db2")))
-    catalog.createTempFunction("func1", tempFunc1, ignoreIfExists = false)
-    catalog.createTempFunction("yes_me", tempFunc2, ignoreIfExists = false)
+    catalog.createTempFunction("func1", info1, tempFunc1, ignoreIfExists = false)
+    catalog.createTempFunction("yes_me", info2, tempFunc2, ignoreIfExists = false)
     assert(catalog.listFunctions("db1", "*").toSet ==
       Set(FunctionIdentifier("func1"),
         FunctionIdentifier("yes_me")))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index fd0702be2a0cf..0e5ce0f9f452e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -108,8 +108,9 @@ case class CreateFunction(
     if (isTemp) {
       // Set `ignoreIfExists` to false, so if the temporary function already exists,
       // an exception will be thrown.
-      val builder = sqlContext.sessionState.functionRegistry.getFunctionBuilder(functionName, alias)
-      sqlContext.sessionState.catalog.createTempFunction(functionName, builder, false)
+      val (info, builder) =
+        sqlContext.sessionState.functionRegistry.getFunctionBuilderAndInfo(functionName, alias)
+      sqlContext.sessionState.catalog.createTempFunction(functionName, info, builder, false)
     } else {
       // Check if the function to create is already existing. If so, throw exception.
       var funcExisting: Boolean = true
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 6e4a06bf9a853..c3f09db4870c0 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -55,12 +55,14 @@ private[hive] class HiveFunctionRegistry(
     }
   }
 
-  override def getFunctionBuilder(
+  override def getFunctionBuilderAndInfo(
       name: String,
-      functionClassName: String): FunctionBuilder = {
+      functionClassName: String): (ExpressionInfo, FunctionBuilder) = {
     val hiveUDFWrapper = new HiveFunctionWrapper(functionClassName)
     val hiveUDFClass = hiveUDFWrapper.createFunction().getClass
-    genHiveUDFBuilder(name, functionClassName, hiveUDFClass, null, hiveUDFWrapper)
+    val info = new ExpressionInfo(functionClassName, name)
+    val builder = genHiveUDFBuilder(name, functionClassName, hiveUDFClass, null, hiveUDFWrapper)
+    (info, builder)
   }
 
   /**

From ee957db463300c0b3f9e5194d9400d598b29509d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Tue, 29 Mar 2016 23:10:54 +0000
Subject: [PATCH 06/38] Fix scala style.

---
 .../apache/spark/sql/catalyst/analysis/FunctionRegistry.scala  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index ed227c1eaec5b..90f6fe4560fc1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -58,7 +58,8 @@ trait FunctionRegistry {
     name: String,
     functionClassName: String): (ExpressionInfo, FunctionBuilder) = {
     val clazz = Utils.getContextOrSparkClassLoader.loadClass(functionClassName)
-    val (_, (info, builder)) = FunctionRegistry.expression(name, clazz.asInstanceOf[Class[Expression]])
+    val (_, (info, builder)) =
+      FunctionRegistry.expression(name, clazz.asInstanceOf[Class[Expression]])
     (info, builder)
   }
 }

From 133ce1a35d5d7f0f7461be723fbc8bf6db73a5f2 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Wed, 30 Mar 2016 02:34:56 +0000
Subject: [PATCH 07/38] Fix test.

---
 .../org/apache/spark/sql/hive/execution/HiveQuerySuite.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 79774f5913900..7bbbcddb81d3d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -55,6 +55,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
     TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
     // Add Locale setting
     Locale.setDefault(Locale.US)
+    sql("DROP TEMPORARY FUNCTION IF EXISTS udtf_count2")
   }
 
   override def afterAll() {
@@ -62,7 +63,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
       TestHive.cacheTables = false
       TimeZone.setDefault(originalTimeZone)
       Locale.setDefault(originalLocale)
-      sql("DROP TEMPORARY FUNCTION udtf_count2")
+      sql("DROP TEMPORARY FUNCTION IF EXISTS udtf_count2")
     } finally {
       super.afterAll()
     }

From e05b108f9aeedf5d8e3fb5c7145940619811a858 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Wed, 30 Mar 2016 03:21:25 +0000
Subject: [PATCH 08/38] Address some comments.

---
 .../sql/catalyst/analysis/FunctionRegistry.scala   |  4 ++--
 .../apache/spark/sql/execution/command/ddl.scala   | 14 ++++++--------
 .../apache/spark/sql/hive/HiveSessionState.scala   |  2 +-
 .../spark/sql/hive/execution/HiveSqlParser.scala   |  7 +++----
 .../scala/org/apache/spark/sql/hive/hiveUDFs.scala | 10 +++++-----
 5 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 90f6fe4560fc1..a45c46512a94d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -53,8 +53,8 @@ trait FunctionRegistry {
   /** Drop a function and return whether the function existed. */
   def dropFunction(name: String): Boolean
 
-  /** Get the builder of the specified function name and class name. */
-  def getFunctionBuilderAndInfo(
+  /* Return the FunctionBuilder and ExpressionInfo for the specified function name and classname. */
+  def makeFunctionBuilderAndInfo(
     name: String,
     functionClassName: String): (ExpressionInfo, FunctionBuilder) = {
     val clazz = Utils.getContextOrSparkClassLoader.loadClass(functionClassName)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index c8ab656f234e0..c46bca78342af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -198,18 +198,16 @@ case class CreateFunction(
       // Set `ignoreIfExists` to false, so if the temporary function already exists,
       // an exception will be thrown.
       val (info, builder) =
-        sqlContext.sessionState.functionRegistry.getFunctionBuilderAndInfo(functionName, alias)
+        sqlContext.sessionState.functionRegistry.makeFunctionBuilderAndInfo(functionName, alias)
       sqlContext.sessionState.catalog.createTempFunction(functionName, info, builder, false)
     } else {
       // Check if the function to create is already existing. If so, throw exception.
-      var funcExisting: Boolean = true
-      try {
-        if (sqlContext.sessionState.catalog.getFunction(func) == null) {
-          funcExisting = false
+      var funcExisting: Boolean =
+        try {
+          sqlContext.sessionState.catalog.getFunction(func) != null
+        } catch {
+          case _: NoSuchFunctionException => false
         }
-      } catch {
-        case _: NoSuchFunctionException => funcExisting = false
-      }
       if (funcExisting) {
         val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
         throw new AnalysisException(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index d2d20027c2fae..86c486e1ed98b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -71,7 +71,7 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
   /**
    * Parser for HiveQl query texts.
    */
-  override lazy val sqlParser: ParserInterface = HiveSqlParser(this)
+  override lazy val sqlParser: ParserInterface = new HiveSqlParser(functionRegistry)
 
   /**
    * Planner that takes into account Hive-specific strategies.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
index a770c9cd074ca..f88294e535c0b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
@@ -41,8 +41,8 @@ import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
 /**
  * Concrete parser for HiveQl statements.
  */
-case class HiveSqlParser(sessionState: HiveSessionState) extends AbstractSqlParser {
-  val astBuilder = new HiveSqlAstBuilder(sessionState)
+class HiveSqlParser(functionRegistry: FunctionRegistry) extends AbstractSqlParser {
+  val astBuilder = new HiveSqlAstBuilder(functionRegistry)
 
   override protected def nativeCommand(sqlText: String): LogicalPlan = {
     HiveNativeCommand(sqlText)
@@ -52,7 +52,7 @@ case class HiveSqlParser(sessionState: HiveSessionState) extends AbstractSqlPars
 /**
  * Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier.
  */
-class HiveSqlAstBuilder(sessionState: HiveSessionState) extends SparkSqlAstBuilder {
+class HiveSqlAstBuilder(functionRegistry: FunctionRegistry) extends SparkSqlAstBuilder {
   import ParserUtils._
 
   /**
@@ -281,7 +281,6 @@ class HiveSqlAstBuilder(sessionState: HiveSessionState) extends SparkSqlAstBuild
       name: String,
       expressions: Seq[Expression],
       ctx: LateralViewContext): Generator = {
-    val functionRegistry = sessionState.functionRegistry
     val func = functionRegistry.lookupFunction(name, expressions)
     func match {
       case g: Generator => g
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index c3f09db4870c0..53760ee11e639 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -55,20 +55,20 @@ private[hive] class HiveFunctionRegistry(
     }
   }
 
-  override def getFunctionBuilderAndInfo(
+  override def makeFunctionBuilderAndInfo(
       name: String,
       functionClassName: String): (ExpressionInfo, FunctionBuilder) = {
     val hiveUDFWrapper = new HiveFunctionWrapper(functionClassName)
     val hiveUDFClass = hiveUDFWrapper.createFunction().getClass
     val info = new ExpressionInfo(functionClassName, name)
-    val builder = genHiveUDFBuilder(name, functionClassName, hiveUDFClass, null, hiveUDFWrapper)
+    val builder = makeHiveUDFBuilder(name, functionClassName, hiveUDFClass, null, hiveUDFWrapper)
     (info, builder)
   }
 
   /**
    * Generates a Spark FunctionBuilder for a Hive UDF which is specified by a given classname.
    */
-  def genHiveUDFBuilder(
+  def makeHiveUDFBuilder(
       name: String,
       functionClassName: String,
       hiveUDFClass: Class[_],
@@ -159,9 +159,9 @@ private[hive] class HiveFunctionRegistry(
       // catch the exception and throw AnalysisException instead.
       val builder =
         if (classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) {
-          genHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass, functionInfo)
+          makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass, functionInfo)
         } else {
-          genHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass)
+          makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass)
         }
       builder(children)
     }

From 314c4db851da5e832fb995f5538e66d4c1ddc89a Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Wed, 30 Mar 2016 04:00:24 +0000
Subject: [PATCH 09/38] Add resources to CatalogFunction.

---
 .../spark/sql/catalyst/catalog/interface.scala |  6 +++++-
 .../catalyst/catalog/CatalogTestCases.scala    |  5 +++--
 .../catalyst/catalog/SessionCatalogSuite.scala |  4 +++-
 .../spark/sql/execution/command/ddl.scala      |  2 +-
 .../spark/sql/hive/client/HiveClientImpl.scala | 18 +++++++++++++++---
 5 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 303846d31332f..f43d9e6d85000 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -171,8 +171,12 @@ abstract class ExternalCatalog {
  *
  * @param identifier name of the function
  * @param className fully qualified class name, e.g. "org.apache.spark.util.MyFunc"
+ * @param resources resource types and Uris used by the function
  */
-case class CatalogFunction(identifier: FunctionIdentifier, className: String)
+case class CatalogFunction(
+    identifier: FunctionIdentifier,
+    className: String,
+    resources: Seq[(String, String)])
 
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
index 959bd564d9ff6..9d922f5d3bd33 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
@@ -433,7 +433,8 @@ abstract class CatalogTestCases extends SparkFunSuite with BeforeAndAfterEach {
   test("get function") {
     val catalog = newBasicCatalog()
     assert(catalog.getFunction("db2", "func1") ==
-      CatalogFunction(FunctionIdentifier("func1", Some("db2")), funcClass))
+      CatalogFunction(FunctionIdentifier("func1", Some("db2")), funcClass,
+        Seq.empty[(String, String)]))
     intercept[AnalysisException] {
       catalog.getFunction("db2", "does_not_exist")
     }
@@ -557,7 +558,7 @@ abstract class CatalogTestUtils {
   }
 
   def newFunc(name: String, database: Option[String] = None): CatalogFunction = {
-    CatalogFunction(FunctionIdentifier(name, database), funcClass)
+    CatalogFunction(FunctionIdentifier(name, database), funcClass, Seq.empty[(String, String)])
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 9282f86b32cf6..d85e1192f1e19 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -743,7 +743,9 @@ class SessionCatalogSuite extends SparkFunSuite {
 
   test("get function") {
     val catalog = new SessionCatalog(newBasicCatalog())
-    val expected = CatalogFunction(FunctionIdentifier("func1", Some("db2")), funcClass)
+    val expected =
+      CatalogFunction(FunctionIdentifier("func1", Some("db2")), funcClass,
+      Seq.empty[(String, String)])
     assert(catalog.getFunction(FunctionIdentifier("func1", Some("db2"))) == expected)
     // Get function without explicitly specifying database
     catalog.setCurrentDatabase("db2")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index c46bca78342af..edca52aac02b7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -193,7 +193,7 @@ case class CreateFunction(
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val func = FunctionIdentifier(functionName, databaseName)
-    val catalogFunc = CatalogFunction(func, alias)
+    val catalogFunc = CatalogFunction(func, alias, resources)
     if (isTemp) {
       // Set `ignoreIfExists` to false, so if the temporary function already exists,
       // an exception will be thrown.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index a31178e3472d6..d35d8b4d21357 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.cli.CliSessionState
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.metastore.{TableType => HiveTableType}
-import org.apache.hadoop.hive.metastore.api.{Database => HiveDatabase, FieldSchema, Function => HiveFunction, FunctionType, PrincipalType, ResourceUri}
+import org.apache.hadoop.hive.metastore.api.{Database => HiveDatabase, FieldSchema, Function => HiveFunction, FunctionType, PrincipalType, ResourceType, ResourceUri}
 import org.apache.hadoop.hive.ql.Driver
 import org.apache.hadoop.hive.ql.metadata.{Hive, Partition => HivePartition, Table => HiveTable}
 import org.apache.hadoop.hive.ql.plan.AddPartitionDesc
@@ -611,6 +611,9 @@ private[hive] class HiveClientImpl(
       .asInstanceOf[Class[_ <: org.apache.hadoop.hive.ql.io.HiveOutputFormat[_, _]]]
 
   private def toHiveFunction(f: CatalogFunction, db: String): HiveFunction = {
+    val resourceUris = f.resources.map { resource =>
+      new ResourceUri(ResourceType.valueOf(resource._1), resource._2)
+    }
     new HiveFunction(
       f.identifier.funcName,
       db,
@@ -619,12 +622,21 @@ private[hive] class HiveClientImpl(
       PrincipalType.USER,
       (System.currentTimeMillis / 1000).toInt,
       FunctionType.JAVA,
-      List.empty[ResourceUri].asJava)
+      resourceUris.asJava)
   }
 
   private def fromHiveFunction(hf: HiveFunction): CatalogFunction = {
     val name = FunctionIdentifier(hf.getFunctionName, Option(hf.getDbName))
-    new CatalogFunction(name, hf.getClassName)
+    val resources = hf.getResourceUris.asScala.map { uri =>
+      val resourceType = uri.getResourceType() match {
+        case ResourceType.ARCHIVE => "archive"
+        case ResourceType.FILE => "file"
+        case ResourceType.JAR => "jar"
+        case r => throw new SparkException(s"Unknown resource type: $r")
+      }
+      (resourceType, uri.getUri())
+    }
+    new CatalogFunction(name, hf.getClassName, resources)
   }
 
   private def toHiveColumn(c: CatalogColumn): FieldSchema = {

From c370c479b388606226b91e19da791f328014559e Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Wed, 30 Mar 2016 09:51:47 +0000
Subject: [PATCH 10/38] Let Spark load Hive permanent function and create
 expression.

---
 .../spark/sql/execution/command/ddl.scala     |  1 +
 .../spark/sql/hive/HiveSessionState.scala     |  4 +-
 .../sql/hive/client/HiveClientImpl.scala      |  8 ++-
 .../org/apache/spark/sql/hive/hiveUDFs.scala  | 65 ++++++++++++++-----
 .../apache/spark/sql/hive/test/TestHive.scala |  9 ++-
 .../sql/hive/execution/SQLQuerySuite.scala    | 26 ++++++++
 6 files changed, 88 insertions(+), 25 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index edca52aac02b7..15c4718713830 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -207,6 +207,7 @@ case class CreateFunction(
           sqlContext.sessionState.catalog.getFunction(func) != null
         } catch {
           case _: NoSuchFunctionException => false
+          case _: AnalysisException => false // HiveExternalCatalog wraps all exceptions with it.
         }
       if (funcExisting) {
         val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 86c486e1ed98b..ad8d7a353819e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.internal.{SessionState, SQLConf}
 /**
  * A class that holds all session-specific state in a given [[HiveContext]].
  */
-private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx) {
+private[hive] class HiveSessionState(val ctx: HiveContext) extends SessionState(ctx) {
 
   override lazy val conf: SQLConf = new SQLConf {
     override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false)
@@ -40,7 +40,7 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
    * Note that HiveUDFs will be overridden by functions registered in this context.
    */
   override lazy val functionRegistry: FunctionRegistry = {
-    new HiveFunctionRegistry(FunctionRegistry.builtin.copy(), ctx.executionHive)
+    new HiveFunctionRegistry(FunctionRegistry.builtin.copy(), ctx.executionHive, this)
   }
 
   /**
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index d35d8b4d21357..84d8324731f8b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -21,6 +21,7 @@ import java.io.{File, PrintStream}
 
 import scala.collection.JavaConverters._
 import scala.language.reflectiveCalls
+import scala.util.Try
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -557,7 +558,10 @@ private[hive] class HiveClientImpl(
   override def getFunctionOption(
       db: String,
       name: String): Option[CatalogFunction] = withHiveState {
-    Option(client.getFunction(db, name)).map(fromHiveFunction)
+    client.getFunction(db, name)
+    Try {
+      Option(client.getFunction(db, name)).map(fromHiveFunction)
+    }.getOrElse(None)
   }
 
   override def listFunctions(db: String, pattern: String): Seq[String] = withHiveState {
@@ -612,7 +616,7 @@ private[hive] class HiveClientImpl(
 
   private def toHiveFunction(f: CatalogFunction, db: String): HiveFunction = {
     val resourceUris = f.resources.map { resource =>
-      new ResourceUri(ResourceType.valueOf(resource._1), resource._2)
+      new ResourceUri(ResourceType.valueOf(resource._1.toUpperCase), resource._2)
     }
     new HiveFunction(
       f.identifier.funcName,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 53760ee11e639..9e6eb8a839c3b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -32,8 +32,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.Obje
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{analysis, InternalRow}
+import org.apache.spark.sql.catalyst.{analysis, FunctionIdentifier, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
+import org.apache.spark.sql.catalyst.catalog.CatalogFunction
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
@@ -44,7 +45,8 @@ import org.apache.spark.sql.types._
 
 private[hive] class HiveFunctionRegistry(
     underlying: analysis.FunctionRegistry,
-    executionHive: HiveClientImpl)
+    executionHive: HiveClientImpl,
+    sessionStage: HiveSessionState)
   extends analysis.FunctionRegistry with HiveInspectors {
 
   def getFunctionInfo(name: String): FunctionInfo = {
@@ -55,6 +57,26 @@ private[hive] class HiveFunctionRegistry(
     }
   }
 
+  def loadHivePermanentFunction(name: String): Option[CatalogFunction] = {
+    val databaseName = sessionStage.catalog.getCurrentDatabase
+    val func = FunctionIdentifier(name, Option(databaseName))
+    val catalogFunc =
+      if (sessionStage.catalog.listFunctions(databaseName, name).size != 0) {
+        Some(sessionStage.catalog.getFunction(func))
+      } else {
+        None
+      }
+    catalogFunc.map(_.resources.foreach { resource =>
+      resource._1.toLowerCase match {
+        case "jar" => sessionStage.ctx.addJar(resource._2)
+        case _ =>
+          sessionStage.ctx.runSqlHive(s"ADD FILE ${resource._2}")
+          sessionStage.ctx.sparkContext.addFile(resource._2)
+      }
+    })
+    catalogFunc
+  }
+
   override def makeFunctionBuilderAndInfo(
       name: String,
       functionClassName: String): (ExpressionInfo, FunctionBuilder) = {
@@ -148,22 +170,29 @@ private[hive] class HiveFunctionRegistry(
     } else {
       // We only look it up to see if it exists, but do not include it in the HiveUDF since it is
       // not always serializable.
-      val functionInfo: FunctionInfo =
-        Option(getFunctionInfo(name.toLowerCase)).getOrElse(
-          throw new AnalysisException(s"undefined function $name"))
-
-      val functionClassName = functionInfo.getFunctionClass.getName
-
-      // When we instantiate hive UDF wrapper class, we may throw exception if the input expressions
-      // don't satisfy the hive UDF, such as type mismatch, input number mismatch, etc. Here we
-      // catch the exception and throw AnalysisException instead.
-      val builder =
-        if (classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) {
-          makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass, functionInfo)
-        } else {
-          makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass)
-        }
-      builder(children)
+      val optFunctionInfo = Option(getFunctionInfo(name.toLowerCase))
+      if (optFunctionInfo.isEmpty) {
+        val catalogFunc = loadHivePermanentFunction(name).getOrElse(
+            throw new AnalysisException(s"undefined function $name"))
+
+        val functionClassName = catalogFunc.className
+        val (_, builder) = makeFunctionBuilderAndInfo(name, functionClassName)
+        builder(children)
+      } else {
+        val functionInfo = optFunctionInfo.get
+        val functionClassName = functionInfo.getFunctionClass.getName
+
+        // When we instantiate hive UDF wrapper class, we may throw exception if the input
+        // expressions don't satisfy the hive UDF, such as type mismatch, input number mismatch,
+        // etc. Here we catch the exception and throw AnalysisException instead.
+        val builder =
+          if (classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) {
+            makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass, functionInfo)
+          } else {
+            makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass)
+          }
+        builder(children)
+      }
     }
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 4afc8d18a6f8a..3a65154f8d72f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -202,7 +202,8 @@ class TestHiveContext private[hive](
 
     override lazy val functionRegistry = {
       new TestHiveFunctionRegistry(
-        org.apache.spark.sql.catalyst.analysis.FunctionRegistry.builtin.copy(), self.executionHive)
+        org.apache.spark.sql.catalyst.analysis.FunctionRegistry.builtin.copy(), self.executionHive,
+        this)
     }
   }
 
@@ -528,8 +529,10 @@ class TestHiveContext private[hive](
 
 }
 
-private[hive] class TestHiveFunctionRegistry(fr: SimpleFunctionRegistry, client: HiveClientImpl)
-  extends HiveFunctionRegistry(fr, client) {
+private[hive] class TestHiveFunctionRegistry(
+    fr: SimpleFunctionRegistry,
+    client: HiveClientImpl,
+    sessionState: HiveSessionState) extends HiveFunctionRegistry(fr, client, sessionState) {
 
   private val removedFunctions =
     collection.mutable.ArrayBuffer.empty[(String, (ExpressionInfo, FunctionBuilder))]
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 6199253d34db0..c292f3f469ae2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -24,6 +24,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, FunctionRegistry}
+import org.apache.spark.sql.catalyst.parser.ng.ParseException
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation}
@@ -83,6 +84,31 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkAnswer(
       sql("SELECT udtf_count2(a) FROM (SELECT 1 AS a FROM src LIMIT 3) t"),
       Row(3) :: Row(3) :: Nil)
+
+    sql("DROP TEMPORARY FUNCTION udtf_count2")
+  }
+
+  test("permanent UDTF") {
+    sql(
+      s"""
+        |CREATE FUNCTION udtf_count_temp
+        |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
+        |USING JAR '${hiveContext.getHiveFile("TestUDTF.jar").getCanonicalPath()}'
+      """.stripMargin)
+
+    checkAnswer(
+      sql("SELECT key, cc FROM src LATERAL VIEW udtf_count_temp(value) dd AS cc"),
+      Row(97, 500) :: Row(97, 500) :: Nil)
+
+    checkAnswer(
+      sql("SELECT udtf_count_temp(a) FROM (SELECT 1 AS a FROM src LIMIT 3) t"),
+      Row(3) :: Row(3) :: Nil)
+
+    sql("DROP FUNCTION udtf_count_temp")
+    val errMsg = intercept[ParseException] {
+      sql("SELECT key, cc FROM src LATERAL VIEW udtf_count_temp(value) dd AS cc")
+    }.getMessage
+    assert(errMsg.contains("undefined function udtf_count_temp"))
   }
 
   test("SPARK-6835: udtf in lateral view") {

From acf9299a79684dbb063efa035323726c753c682d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Wed, 30 Mar 2016 10:05:34 +0000
Subject: [PATCH 11/38] A little refactoring.

---
 .../org/apache/spark/sql/hive/hiveUDFs.scala  | 22 +++++--------------
 1 file changed, 6 insertions(+), 16 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 9e6eb8a839c3b..2700d6fa3a657 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -83,7 +83,7 @@ private[hive] class HiveFunctionRegistry(
     val hiveUDFWrapper = new HiveFunctionWrapper(functionClassName)
     val hiveUDFClass = hiveUDFWrapper.createFunction().getClass
     val info = new ExpressionInfo(functionClassName, name)
-    val builder = makeHiveUDFBuilder(name, functionClassName, hiveUDFClass, null, hiveUDFWrapper)
+    val builder = makeHiveUDFBuilder(name, functionClassName, hiveUDFClass, hiveUDFWrapper)
     (info, builder)
   }
 
@@ -94,19 +94,7 @@ private[hive] class HiveFunctionRegistry(
       name: String,
       functionClassName: String,
       hiveUDFClass: Class[_],
-      functionInfo: FunctionInfo = null,
-      wrapper: HiveFunctionWrapper = null): FunctionBuilder = {
-    val hiveUDFWrapper =
-      if (wrapper == null) {
-        if (functionInfo == null) {
-          new HiveFunctionWrapper(functionClassName)
-        } else {
-          new HiveFunctionWrapper(functionClassName, functionInfo.getGenericUDF)
-        }
-      } else {
-        wrapper
-      }
-
+      hiveUDFWrapper: HiveFunctionWrapper): FunctionBuilder = {
     val builder = (children: Seq[Expression]) => {
       try {
         if (classOf[GenericUDFMacro].isAssignableFrom(hiveUDFClass)) {
@@ -187,9 +175,11 @@ private[hive] class HiveFunctionRegistry(
         // etc. Here we catch the exception and throw AnalysisException instead.
         val builder =
           if (classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) {
-            makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass, functionInfo)
+            val wrapper = new HiveFunctionWrapper(functionClassName, functionInfo.getGenericUDF)
+            makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass, wrapper)
           } else {
-            makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass)
+            val wrapper = new HiveFunctionWrapper(functionClassName)
+            makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass, wrapper)
           }
         builder(children)
       }

From 2cab41cdc254764ed976842475ce2c2547b0ddcc Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Thu, 31 Mar 2016 09:32:07 +0000
Subject: [PATCH 12/38] Refactoring.

---
 .../catalyst/analysis/FunctionRegistry.scala  |  17 ++-
 .../sql/catalyst/catalog/SessionCatalog.scala |  14 +-
 .../apache/spark/sql/execution/SparkQl.scala  |   4 +-
 .../spark/sql/execution/SparkSqlParser.scala  |   5 +-
 .../spark/sql/execution/command/ddl.scala     |  36 ++---
 .../spark/sql/internal/SessionState.scala     |  16 ++
 .../execution/command/DDLCommandSuite.scala   |  12 +-
 .../HiveThriftServer2Suites.scala             | 138 +++++++++---------
 .../spark/sql/hive/HiveSessionState.scala     |  16 +-
 .../sql/hive/client/HiveClientImpl.scala      |  12 +-
 .../org/apache/spark/sql/hive/hiveUDFs.scala  |  90 ++++++------
 .../sql/hive/execution/HiveQuerySuite.scala   |   1 -
 .../sql/hive/execution/SQLQuerySuite.scala    |  72 ++++-----
 13 files changed, 241 insertions(+), 192 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index a45c46512a94d..724dab57d21ab 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -53,14 +53,15 @@ trait FunctionRegistry {
   /** Drop a function and return whether the function existed. */
   def dropFunction(name: String): Boolean
 
-  /* Return the FunctionBuilder and ExpressionInfo for the specified function name and classname. */
-  def makeFunctionBuilderAndInfo(
-    name: String,
-    functionClassName: String): (ExpressionInfo, FunctionBuilder) = {
-    val clazz = Utils.getContextOrSparkClassLoader.loadClass(functionClassName)
-    val (_, (info, builder)) =
-      FunctionRegistry.expression(name, clazz.asInstanceOf[Class[Expression]])
-    (info, builder)
+  /**
+   * Construct a [[FunctionBuilder]] based on the provided class that represents a function.
+   *
+   * This performs reflection to decide what type of [[Expression]] to return in the builder.
+   * This is useful for creating temporary functions.
+   */
+  def makeFunctionBuilder(name: String, functionClassName: String): FunctionBuilder = {
+    // TODO: at least support UDAFs here
+    throw new UnsupportedOperationException("Use sqlContext.udf.register(...) instead.")
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 783dfc60b5ccf..4a9700c72808b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystConf, SimpleCatalystConf}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, SimpleFunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchFunctionException, SimpleFunctionRegistry}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
@@ -458,6 +458,18 @@ class SessionCatalog(
     externalCatalog.getFunction(db, name.funcName)
   }
 
+  /**
+   * Check if a function is already existing.
+   *
+   */
+  def functionExists(name: FunctionIdentifier): Boolean = {
+    try {
+      getFunction(name) != null
+    } catch {
+      case _: NoSuchFunctionException => false
+      case _: AnalysisException => false // HiveExternalCatalog wraps all exceptions with it.
+    }
+  }
 
   // ----------------------------------------------------------------
   // | Methods that interact with temporary and metastore functions |
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
index 6fe04757ba2d1..cd008e78bc6b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkQl.scala
@@ -218,7 +218,7 @@ private[sql] class SparkQl(conf: ParserConf = SimpleParserConf()) extends Cataly
             }
           case _ => parseFailed("Invalid CREATE FUNCTION command", node)
         }
-        CreateFunction(dbName, funcName, alias, resources, temp.isDefined)(node.source)
+        CreateFunction(dbName, funcName, alias, resources, temp.isDefined)
 
       // DROP [TEMPORARY] FUNCTION [IF EXISTS] function_name;
       case Token("TOK_DROPFUNCTION", args) =>
@@ -248,7 +248,7 @@ private[sql] class SparkQl(conf: ParserConf = SimpleParserConf()) extends Cataly
         val Seq(ifExists, temp) = getClauses(Seq(
           "TOK_IFEXISTS", "TOK_TEMPORARY"), otherArgs)
 
-        DropFunction(dbName, funcName, ifExists.isDefined, temp.isDefined)(node.source)
+        DropFunction(dbName, funcName, ifExists.isDefined, temp.isDefined)
 
       case Token("TOK_ALTERTABLE", alterTableArgs) =>
         AlterTableCommandParser.parse(node)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 8333074ecaf22..dd09097e6d342 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -301,8 +301,7 @@ class SparkSqlAstBuilder extends AstBuilder {
       function,
       string(ctx.className), // TODO this is not an alias.
       resources,
-      ctx.TEMPORARY != null)(
-      command(ctx))
+      ctx.TEMPORARY != null)
   }
 
   /**
@@ -315,7 +314,7 @@ class SparkSqlAstBuilder extends AstBuilder {
    */
   override def visitDropFunction(ctx: DropFunctionContext): LogicalPlan = withOrigin(ctx) {
     val (database, function) = visitFunctionName(ctx.qualifiedName)
-    DropFunction(database, function, ctx.EXISTS != null, ctx.TEMPORARY != null)(command(ctx))
+    DropFunction(database, function, ctx.EXISTS != null, ctx.TEMPORARY != null)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 15c4718713830..a0a7e42d4a755 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogFunction}
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalog.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, ExpressionInfo}
 import org.apache.spark.sql.execution.datasources.BucketSpec
 import org.apache.spark.sql.types._
 
@@ -188,28 +188,21 @@ case class CreateFunction(
     functionName: String,
     alias: String,
     resources: Seq[(String, String)],
-    isTemp: Boolean)(sql: String)
-  extends NativeDDLCommand(sql) with Logging {
+    isTemp: Boolean)
+  extends RunnableCommand {
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val func = FunctionIdentifier(functionName, databaseName)
     val catalogFunc = CatalogFunction(func, alias, resources)
     if (isTemp) {
-      // Set `ignoreIfExists` to false, so if the temporary function already exists,
-      // an exception will be thrown.
-      val (info, builder) =
-        sqlContext.sessionState.functionRegistry.makeFunctionBuilderAndInfo(functionName, alias)
-      sqlContext.sessionState.catalog.createTempFunction(functionName, info, builder, false)
+      val info = new ExpressionInfo(alias, functionName)
+      val builder =
+        sqlContext.sessionState.functionRegistry.makeFunctionBuilder(functionName, alias)
+      sqlContext.sessionState.catalog.createTempFunction(
+        functionName, info, builder, ignoreIfExists = false)
     } else {
       // Check if the function to create is already existing. If so, throw exception.
-      var funcExisting: Boolean =
-        try {
-          sqlContext.sessionState.catalog.getFunction(func) != null
-        } catch {
-          case _: NoSuchFunctionException => false
-          case _: AnalysisException => false // HiveExternalCatalog wraps all exceptions with it.
-        }
-      if (funcExisting) {
+      if (sqlContext.sessionState.catalog.functionExists(func)) {
         val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
         throw new AnalysisException(
           s"Function '$functionName' already exists in database '$dbName'.")
@@ -229,8 +222,8 @@ case class DropFunction(
     databaseName: Option[String],
     functionName: String,
     ifExists: Boolean,
-    isTemp: Boolean)(sql: String)
-  extends NativeDDLCommand(sql) with Logging {
+    isTemp: Boolean)
+  extends RunnableCommand {
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     if (isTemp) {
@@ -240,11 +233,10 @@ case class DropFunction(
     } else {
       val func = FunctionIdentifier(functionName, databaseName)
       if (!ifExists) {
-        // getFunction can throw NoSuchFunctionException itself, or return null.
-        val existingFunc = sqlContext.sessionState.catalog.getFunction(func)
-        if (existingFunc == null) {
+        if (!sqlContext.sessionState.catalog.functionExists(func)) {
           val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
-          throw new NoSuchFunctionException(dbName, functionName)
+          throw new AnalysisException(
+            s"Function '$functionName' does not exist in database '$dbName'.")
         }
       }
       sqlContext.sessionState.catalog.dropFunction(func)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index f7fdfacd310e8..7d9e016b72149 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -112,4 +112,20 @@ private[sql] class SessionState(ctx: SQLContext) {
    */
   lazy val continuousQueryManager: ContinuousQueryManager = new ContinuousQueryManager(ctx)
 
+  /**
+   * Loads resource to SQLContext.
+   */
+  def loadResource(resource: Resource): Unit = {
+    resource.resourceType.toLowerCase match {
+      case "jar" => ctx.addJar(resource.path)
+      case _ => ctx.sparkContext.addFile(resource.path)
+    }
+  }
+
+  /**
+   * Loads resources such as JARs and Files to SQLContext.
+   */
+  def loadResources(resources: Seq[Resource]): Unit = resources.foreach(loadResource(_))
 }
+
+case class Resource(resourceType: String, path: String)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
index ccbfd41cca22e..ebb64a5d84a25 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLCommandSuite.scala
@@ -146,13 +146,13 @@ class DDLCommandSuite extends PlanTest {
       "helloworld",
       "com.matthewrathbone.example.SimpleUDFExample",
       Seq(("jar", "/path/to/jar1"), ("jar", "/path/to/jar2")),
-      isTemp = true)(sql1)
+      isTemp = true)
     val expected2 = CreateFunction(
       Some("hello"),
       "world",
       "com.matthewrathbone.example.SimpleUDFExample",
       Seq(("archive", "/path/to/archive"), ("file", "/path/to/file")),
-      isTemp = false)(sql2)
+      isTemp = false)
     comparePlans(parsed1, expected1)
     comparePlans(parsed2, expected2)
   }
@@ -172,22 +172,22 @@ class DDLCommandSuite extends PlanTest {
       None,
       "helloworld",
       ifExists = false,
-      isTemp = true)(sql1)
+      isTemp = true)
     val expected2 = DropFunction(
       None,
       "helloworld",
       ifExists = true,
-      isTemp = true)(sql2)
+      isTemp = true)
     val expected3 = DropFunction(
       Some("hello"),
       "world",
       ifExists = false,
-      isTemp = false)(sql3)
+      isTemp = false)
     val expected4 = DropFunction(
       Some("hello"),
       "world",
       ifExists = true,
-      isTemp = false)(sql4)
+      isTemp = false)
 
     comparePlans(parsed1, expected1)
     comparePlans(parsed2, expected2)
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 33af624cfda78..491ea305a917d 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -491,46 +491,50 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest {
 
   test("SPARK-11595 ADD JAR with input path having URL scheme") {
     withJdbcStatement { statement =>
-      val jarPath = "../hive/src/test/resources/TestUDTF.jar"
-      val jarURL = s"file://${System.getProperty("user.dir")}/$jarPath"
+      try {
+        val jarPath = "../hive/src/test/resources/TestUDTF.jar"
+        val jarURL = s"file://${System.getProperty("user.dir")}/$jarPath"
 
-      Seq(
-        s"ADD JAR $jarURL",
-        s"""CREATE TEMPORARY FUNCTION udtf_count2
-           |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
-         """.stripMargin
-      ).foreach(statement.execute)
+        Seq(
+          s"ADD JAR $jarURL",
+          s"""CREATE TEMPORARY FUNCTION udtf_count2
+             |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
+           """.stripMargin
+        ).foreach(statement.execute)
 
-      val rs1 = statement.executeQuery("DESCRIBE FUNCTION udtf_count2")
+        val rs1 = statement.executeQuery("DESCRIBE FUNCTION udtf_count2")
 
-      assert(rs1.next())
-      assert(rs1.getString(1) === "Function: udtf_count2")
+        assert(rs1.next())
+        assert(rs1.getString(1) === "Function: udtf_count2")
 
-      assert(rs1.next())
-      assertResult("Class: org.apache.spark.sql.hive.execution.GenericUDTFCount2") {
-        rs1.getString(1)
-      }
+        assert(rs1.next())
+        assertResult("Class: org.apache.spark.sql.hive.execution.GenericUDTFCount2") {
+          rs1.getString(1)
+        }
 
-      assert(rs1.next())
-      assert(rs1.getString(1) === "Usage: To be added.")
+        assert(rs1.next())
+        assert(rs1.getString(1) === "Usage: To be added.")
 
-      val dataPath = "../hive/src/test/resources/data/files/kv1.txt"
+        val dataPath = "../hive/src/test/resources/data/files/kv1.txt"
 
-      Seq(
-        s"CREATE TABLE test_udtf(key INT, value STRING)",
-        s"LOAD DATA LOCAL INPATH '$dataPath' OVERWRITE INTO TABLE test_udtf"
-      ).foreach(statement.execute)
+        Seq(
+          s"CREATE TABLE test_udtf(key INT, value STRING)",
+          s"LOAD DATA LOCAL INPATH '$dataPath' OVERWRITE INTO TABLE test_udtf"
+        ).foreach(statement.execute)
 
-      val rs2 = statement.executeQuery(
-        "SELECT key, cc FROM test_udtf LATERAL VIEW udtf_count2(value) dd AS cc")
+        val rs2 = statement.executeQuery(
+          "SELECT key, cc FROM test_udtf LATERAL VIEW udtf_count2(value) dd AS cc")
 
-      assert(rs2.next())
-      assert(rs2.getInt(1) === 97)
-      assert(rs2.getInt(2) === 500)
+        assert(rs2.next())
+        assert(rs2.getInt(1) === 97)
+        assert(rs2.getInt(2) === 500)
 
-      assert(rs2.next())
-      assert(rs2.getInt(1) === 97)
-      assert(rs2.getInt(2) === 500)
+        assert(rs2.next())
+        assert(rs2.getInt(1) === 97)
+        assert(rs2.getInt(2) === 500)
+      } finally {
+        statement.executeQuery("DROP TEMPORARY FUNCTION udtf_count2")
+      }
     }
   }
 
@@ -548,43 +552,47 @@ class SingleSessionSuite extends HiveThriftJdbcTest {
     "--conf spark.sql.hive.thriftServer.singleSession=true" :: Nil
 
   test("test single session") {
-    withMultipleConnectionJdbcStatement(
-      { statement =>
-        val jarPath = "../hive/src/test/resources/TestUDTF.jar"
-        val jarURL = s"file://${System.getProperty("user.dir")}/$jarPath"
-
-        // Configurations and temporary functions added in this session should be visible to all
-        // the other sessions.
-        Seq(
-          "SET foo=bar",
-          s"ADD JAR $jarURL",
-          s"""CREATE TEMPORARY FUNCTION udtf_count2
-              |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
-           """.stripMargin
-        ).foreach(statement.execute)
-      },
-
-      { statement =>
-        val rs1 = statement.executeQuery("SET foo")
-
-        assert(rs1.next())
-        assert(rs1.getString(1) === "foo")
-        assert(rs1.getString(2) === "bar")
-
-        val rs2 = statement.executeQuery("DESCRIBE FUNCTION udtf_count2")
-
-        assert(rs2.next())
-        assert(rs2.getString(1) === "Function: udtf_count2")
+    try {
+      withMultipleConnectionJdbcStatement(
+        { statement =>
+          val jarPath = "../hive/src/test/resources/TestUDTF.jar"
+          val jarURL = s"file://${System.getProperty("user.dir")}/$jarPath"
+
+          // Configurations and temporary functions added in this session should be visible to all
+          // the other sessions.
+          Seq(
+            "SET foo=bar",
+            s"ADD JAR $jarURL",
+            s"""CREATE TEMPORARY FUNCTION udtf_count2
+                |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
+             """.stripMargin
+          ).foreach(statement.execute)
+        },
+
+        { statement =>
+          val rs1 = statement.executeQuery("SET foo")
+
+          assert(rs1.next())
+          assert(rs1.getString(1) === "foo")
+          assert(rs1.getString(2) === "bar")
+
+          val rs2 = statement.executeQuery("DESCRIBE FUNCTION udtf_count2")
+
+          assert(rs2.next())
+          assert(rs2.getString(1) === "Function: udtf_count2")
+
+          assert(rs2.next())
+          assertResult("Class: org.apache.spark.sql.hive.execution.GenericUDTFCount2") {
+            rs2.getString(1)
+          }
 
-        assert(rs2.next())
-        assertResult("Class: org.apache.spark.sql.hive.execution.GenericUDTFCount2") {
-          rs2.getString(1)
+          assert(rs2.next())
+          assert(rs2.getString(1) === "Usage: To be added.")
         }
-
-        assert(rs2.next())
-        assert(rs2.getString(1) === "Usage: To be added.")
-      }
-    )
+      )
+    } finally {
+      statement.executeQuery("DROP TEMPORARY FUNCTION udtf_count2")
+    }
   }
 }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index ad8d7a353819e..31ac1b3ad364a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -23,13 +23,13 @@ import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.execution.{python, SparkPlanner}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.execution.HiveSqlParser
-import org.apache.spark.sql.internal.{SessionState, SQLConf}
+import org.apache.spark.sql.internal.{Resource, SessionState, SQLConf}
 
 
 /**
  * A class that holds all session-specific state in a given [[HiveContext]].
  */
-private[hive] class HiveSessionState(val ctx: HiveContext) extends SessionState(ctx) {
+private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx) {
 
   override lazy val conf: SQLConf = new SQLConf {
     override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false)
@@ -104,4 +104,16 @@ private[hive] class HiveSessionState(val ctx: HiveContext) extends SessionState(
     }
   }
 
+  /**
+   * Loads resource to SQLContext.
+   */
+  override def loadResource(resource: Resource): Unit = {
+    resource.resourceType.toLowerCase match {
+      case "jar" =>
+        super.loadResource(resource)
+      case _ =>
+        ctx.runSqlHive(s"ADD FILE ${resource.path}")
+        super.loadResource(resource)
+    }
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 84d8324731f8b..1f66fbfd85ffa 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -38,6 +38,7 @@ import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchPartitionException}
 import org.apache.spark.sql.catalyst.catalog._
@@ -558,10 +559,7 @@ private[hive] class HiveClientImpl(
   override def getFunctionOption(
       db: String,
       name: String): Option[CatalogFunction] = withHiveState {
-    client.getFunction(db, name)
-    Try {
-      Option(client.getFunction(db, name)).map(fromHiveFunction)
-    }.getOrElse(None)
+    Option(client.getFunction(db, name)).map(fromHiveFunction)
   }
 
   override def listFunctions(db: String, pattern: String): Seq[String] = withHiveState {
@@ -615,8 +613,8 @@ private[hive] class HiveClientImpl(
       .asInstanceOf[Class[_ <: org.apache.hadoop.hive.ql.io.HiveOutputFormat[_, _]]]
 
   private def toHiveFunction(f: CatalogFunction, db: String): HiveFunction = {
-    val resourceUris = f.resources.map { resource =>
-      new ResourceUri(ResourceType.valueOf(resource._1.toUpperCase), resource._2)
+    val resourceUris = f.resources.map { case (resourceType, resourcePath) =>
+      new ResourceUri(ResourceType.valueOf(resourceType.toUpperCase), resourcePath)
     }
     new HiveFunction(
       f.identifier.funcName,
@@ -636,7 +634,7 @@ private[hive] class HiveClientImpl(
         case ResourceType.ARCHIVE => "archive"
         case ResourceType.FILE => "file"
         case ResourceType.JAR => "jar"
-        case r => throw new SparkException(s"Unknown resource type: $r")
+        case r => throw new AnalysisException(s"Unknown resource type: $r")
       }
       (resourceType, uri.getUri())
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 2700d6fa3a657..ec85350e45937 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -40,13 +40,14 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.hive.HiveShim._
 import org.apache.spark.sql.hive.client.HiveClientImpl
+import org.apache.spark.sql.internal.Resource
 import org.apache.spark.sql.types._
 
 
 private[hive] class HiveFunctionRegistry(
     underlying: analysis.FunctionRegistry,
     executionHive: HiveClientImpl,
-    sessionStage: HiveSessionState)
+    sessionState: HiveSessionState)
   extends analysis.FunctionRegistry with HiveInspectors {
 
   def getFunctionInfo(name: String): FunctionInfo = {
@@ -57,40 +58,44 @@ private[hive] class HiveFunctionRegistry(
     }
   }
 
-  def loadHivePermanentFunction(name: String): Option[CatalogFunction] = {
-    val databaseName = sessionStage.catalog.getCurrentDatabase
+  /**
+   * Returns CatalogFunction for the given function name.
+   *
+   * This function looks up the given function name in SessionCatalog. For Hive, it turns to
+   * look up the function in Hive permanent functions in current database. These permanent
+   * functions are registered in Hive with `Create Function` DDL command.
+   *
+   * It is necessary to load related resources (JARs, Files) before calling the function.
+   * We will load these resources here.
+   */
+  private def getHivePermanentFunction(name: String): Option[CatalogFunction] = {
+    val databaseName = sessionState.catalog.getCurrentDatabase
     val func = FunctionIdentifier(name, Option(databaseName))
     val catalogFunc =
-      if (sessionStage.catalog.listFunctions(databaseName, name).size != 0) {
-        Some(sessionStage.catalog.getFunction(func))
+      if (sessionState.catalog.functionExists(func)) {
+        Some(sessionState.catalog.getFunction(func))
       } else {
         None
       }
-    catalogFunc.map(_.resources.foreach { resource =>
-      resource._1.toLowerCase match {
-        case "jar" => sessionStage.ctx.addJar(resource._2)
-        case _ =>
-          sessionStage.ctx.runSqlHive(s"ADD FILE ${resource._2}")
-          sessionStage.ctx.sparkContext.addFile(resource._2)
-      }
-    })
+    val resourcesToLoad = catalogFunc.map(_.resources.map { case (rType, rPath) =>
+      Resource(rType, rPath)
+    }).getOrElse(Seq.empty[Resource])
+    sessionState.loadResources(resourcesToLoad)
     catalogFunc
   }
 
-  override def makeFunctionBuilderAndInfo(
+  override def makeFunctionBuilder(
       name: String,
-      functionClassName: String): (ExpressionInfo, FunctionBuilder) = {
+      functionClassName: String): FunctionBuilder = {
     val hiveUDFWrapper = new HiveFunctionWrapper(functionClassName)
     val hiveUDFClass = hiveUDFWrapper.createFunction().getClass
-    val info = new ExpressionInfo(functionClassName, name)
-    val builder = makeHiveUDFBuilder(name, functionClassName, hiveUDFClass, hiveUDFWrapper)
-    (info, builder)
+    makeFunctionBuilder(name, functionClassName, hiveUDFClass, hiveUDFWrapper)
   }
 
   /**
-   * Generates a Spark FunctionBuilder for a Hive UDF which is specified by a given classname.
+   * Generates a FunctionBuilder for a Hive UDF which is specified by a given classname.
    */
-  def makeHiveUDFBuilder(
+  private def makeFunctionBuilder(
       name: String,
       functionClassName: String,
       hiveUDFClass: Class[_],
@@ -151,38 +156,41 @@ private[hive] class HiveFunctionRegistry(
     builder
   }
 
+  /**
+   * Returns the Expression for the function name and given children Expressions.
+   * This function will first look up existing FunctionBuilder in underlying FunctionRegistry.
+   * If not found, it will get function info from Hive Function Registry.
+   *
+   * If not found again, it will try to find this function in Hive permanent functions.
+   * If found, we load necessary JARs, Files and create the Expression.
+   */
   override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
-    val builder = underlying.lookupFunctionBuilder(name)
-    if (builder.isDefined) {
-      builder.get(children)
-    } else {
+    underlying.lookupFunctionBuilder(name).map { f => f(children) }.getOrElse {
       // We only look it up to see if it exists, but do not include it in the HiveUDF since it is
       // not always serializable.
-      val optFunctionInfo = Option(getFunctionInfo(name.toLowerCase))
-      if (optFunctionInfo.isEmpty) {
-        val catalogFunc = loadHivePermanentFunction(name).getOrElse(
+      val functionInfo = getFunctionInfo(name.toLowerCase)
+      val builder: FunctionBuilder =
+        if (functionInfo == null) {
+          val catalogFunc = getHivePermanentFunction(name).getOrElse(
             throw new AnalysisException(s"undefined function $name"))
 
-        val functionClassName = catalogFunc.className
-        val (_, builder) = makeFunctionBuilderAndInfo(name, functionClassName)
-        builder(children)
-      } else {
-        val functionInfo = optFunctionInfo.get
-        val functionClassName = functionInfo.getFunctionClass.getName
+          val functionClassName = catalogFunc.className
+          makeFunctionBuilder(name, functionClassName)
+        } else {
+          val functionClassName = functionInfo.getFunctionClass.getName
 
-        // When we instantiate hive UDF wrapper class, we may throw exception if the input
-        // expressions don't satisfy the hive UDF, such as type mismatch, input number mismatch,
-        // etc. Here we catch the exception and throw AnalysisException instead.
-        val builder =
+          // When we instantiate hive UDF wrapper class, we may throw exception if the input
+          // expressions don't satisfy the hive UDF, such as type mismatch, input number mismatch,
+          // etc. Here we catch the exception and throw AnalysisException instead.
           if (classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) {
             val wrapper = new HiveFunctionWrapper(functionClassName, functionInfo.getGenericUDF)
-            makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass, wrapper)
+            makeFunctionBuilder(name, functionClassName, functionInfo.getFunctionClass, wrapper)
           } else {
             val wrapper = new HiveFunctionWrapper(functionClassName)
-            makeHiveUDFBuilder(name, functionClassName, functionInfo.getFunctionClass, wrapper)
+            makeFunctionBuilder(name, functionClassName, functionInfo.getFunctionClass, wrapper)
           }
-        builder(children)
-      }
+        }
+      builder(children)
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 7bbbcddb81d3d..01e8dd7b092aa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -55,7 +55,6 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
     TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
     // Add Locale setting
     Locale.setDefault(Locale.US)
-    sql("DROP TEMPORARY FUNCTION IF EXISTS udtf_count2")
   }
 
   override def afterAll() {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index c292f3f469ae2..36ed17f5b1f17 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -68,47 +68,51 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   import hiveContext.implicits._
 
   test("UDTF") {
-    sql(s"ADD JAR ${hiveContext.getHiveFile("TestUDTF.jar").getCanonicalPath()}")
-    // The function source code can be found at:
-    // https://cwiki.apache.org/confluence/display/Hive/DeveloperGuide+UDTF
-    sql(
-      """
-        |CREATE TEMPORARY FUNCTION udtf_count2
-        |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
-      """.stripMargin)
-
-    checkAnswer(
-      sql("SELECT key, cc FROM src LATERAL VIEW udtf_count2(value) dd AS cc"),
-      Row(97, 500) :: Row(97, 500) :: Nil)
+    try {
+      sql(s"ADD JAR ${hiveContext.getHiveFile("TestUDTF.jar").getCanonicalPath()}")
+      // The function source code can be found at:
+      // https://cwiki.apache.org/confluence/display/Hive/DeveloperGuide+UDTF
+      sql(
+        """
+          |CREATE TEMPORARY FUNCTION udtf_count2
+          |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
+        """.stripMargin)
 
-    checkAnswer(
-      sql("SELECT udtf_count2(a) FROM (SELECT 1 AS a FROM src LIMIT 3) t"),
-      Row(3) :: Row(3) :: Nil)
+      checkAnswer(
+        sql("SELECT key, cc FROM src LATERAL VIEW udtf_count2(value) dd AS cc"),
+        Row(97, 500) :: Row(97, 500) :: Nil)
 
-    sql("DROP TEMPORARY FUNCTION udtf_count2")
+      checkAnswer(
+        sql("SELECT udtf_count2(a) FROM (SELECT 1 AS a FROM src LIMIT 3) t"),
+        Row(3) :: Row(3) :: Nil)
+    } finally {
+      sql("DROP TEMPORARY FUNCTION udtf_count2")
+    }
   }
 
   test("permanent UDTF") {
-    sql(
-      s"""
-        |CREATE FUNCTION udtf_count_temp
-        |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
-        |USING JAR '${hiveContext.getHiveFile("TestUDTF.jar").getCanonicalPath()}'
-      """.stripMargin)
+    try {
+      sql(
+        s"""
+          |CREATE FUNCTION udtf_count_temp
+          |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
+          |USING JAR '${hiveContext.getHiveFile("TestUDTF.jar").getCanonicalPath()}'
+        """.stripMargin)
 
-    checkAnswer(
-      sql("SELECT key, cc FROM src LATERAL VIEW udtf_count_temp(value) dd AS cc"),
-      Row(97, 500) :: Row(97, 500) :: Nil)
+      checkAnswer(
+        sql("SELECT key, cc FROM src LATERAL VIEW udtf_count_temp(value) dd AS cc"),
+        Row(97, 500) :: Row(97, 500) :: Nil)
 
-    checkAnswer(
-      sql("SELECT udtf_count_temp(a) FROM (SELECT 1 AS a FROM src LIMIT 3) t"),
-      Row(3) :: Row(3) :: Nil)
-
-    sql("DROP FUNCTION udtf_count_temp")
-    val errMsg = intercept[ParseException] {
-      sql("SELECT key, cc FROM src LATERAL VIEW udtf_count_temp(value) dd AS cc")
-    }.getMessage
-    assert(errMsg.contains("undefined function udtf_count_temp"))
+      checkAnswer(
+        sql("SELECT udtf_count_temp(a) FROM (SELECT 1 AS a FROM src LIMIT 3) t"),
+        Row(3) :: Row(3) :: Nil)
+    } finally {
+      sql("DROP FUNCTION udtf_count_temp")
+      val errMsg = intercept[ParseException] {
+        sql("SELECT key, cc FROM src LATERAL VIEW udtf_count_temp(value) dd AS cc")
+      }.getMessage
+      assert(errMsg.contains("undefined function udtf_count_temp"))
+    }
   }
 
   test("SPARK-6835: udtf in lateral view") {

From 65d9dbdfbb26c0a1e917767aae08f73df4f9763c Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <simonh@tw.ibm.com>
Date: Thu, 31 Mar 2016 10:22:31 +0000
Subject: [PATCH 13/38] Fix compilation error.

---
 .../HiveThriftServer2Suites.scala             | 44 +++++++++----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 491ea305a917d..f15ad4ed720c3 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -552,24 +552,24 @@ class SingleSessionSuite extends HiveThriftJdbcTest {
     "--conf spark.sql.hive.thriftServer.singleSession=true" :: Nil
 
   test("test single session") {
-    try {
-      withMultipleConnectionJdbcStatement(
-        { statement =>
-          val jarPath = "../hive/src/test/resources/TestUDTF.jar"
-          val jarURL = s"file://${System.getProperty("user.dir")}/$jarPath"
-
-          // Configurations and temporary functions added in this session should be visible to all
-          // the other sessions.
-          Seq(
-            "SET foo=bar",
-            s"ADD JAR $jarURL",
-            s"""CREATE TEMPORARY FUNCTION udtf_count2
-                |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
-             """.stripMargin
-          ).foreach(statement.execute)
-        },
-
-        { statement =>
+    withMultipleConnectionJdbcStatement(
+      { statement =>
+        val jarPath = "../hive/src/test/resources/TestUDTF.jar"
+        val jarURL = s"file://${System.getProperty("user.dir")}/$jarPath"
+
+        // Configurations and temporary functions added in this session should be visible to all
+        // the other sessions.
+        Seq(
+          "SET foo=bar",
+          s"ADD JAR $jarURL",
+          s"""CREATE TEMPORARY FUNCTION udtf_count2
+              |AS 'org.apache.spark.sql.hive.execution.GenericUDTFCount2'
+           """.stripMargin
+        ).foreach(statement.execute)
+      },
+
+      { statement =>
+        try {
           val rs1 = statement.executeQuery("SET foo")
 
           assert(rs1.next())
@@ -588,11 +588,11 @@ class SingleSessionSuite extends HiveThriftJdbcTest {
 
           assert(rs2.next())
           assert(rs2.getString(1) === "Usage: To be added.")
+        } finally {
+          statement.executeQuery("DROP TEMPORARY FUNCTION udtf_count2")
         }
-      )
-    } finally {
-      statement.executeQuery("DROP TEMPORARY FUNCTION udtf_count2")
-    }
+      }
+    )
   }
 }
 

From 67df04f70d2f6edb9a670aaa6cf749c8c86f6695 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 31 Mar 2016 19:35:06 -0700
Subject: [PATCH 14/38] Rename alias to className

---
 .../org/apache/spark/sql/execution/command/ddl.scala      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 6e6f17e5db76e..9153bf6097634 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -186,18 +186,18 @@ case class DescribeDatabase(
 case class CreateFunction(
     databaseName: Option[String],
     functionName: String,
-    alias: String,
+    className: String,
     resources: Seq[(String, String)],
     isTemp: Boolean)
   extends RunnableCommand {
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val func = FunctionIdentifier(functionName, databaseName)
-    val catalogFunc = CatalogFunction(func, alias, resources)
+    val catalogFunc = CatalogFunction(func, className, resources)
     if (isTemp) {
-      val info = new ExpressionInfo(alias, functionName)
+      val info = new ExpressionInfo(className, functionName)
       val builder =
-        sqlContext.sessionState.functionRegistry.makeFunctionBuilder(functionName, alias)
+        sqlContext.sessionState.functionRegistry.makeFunctionBuilder(functionName, className)
       sqlContext.sessionState.catalog.createTempFunction(
         functionName, info, builder, ignoreIfExists = false)
     } else {

From b67c4448b23aff65d6d153c9f4fdcf1bc6a26712 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 31 Mar 2016 20:26:37 -0700
Subject: [PATCH 15/38] Make SessionCatalog handle makeFunctionBuilder and
 remove HiveFunctionRegistry

---
 .../catalyst/analysis/FunctionRegistry.scala  |  11 -
 .../sql/catalyst/catalog/SessionCatalog.scala |  14 ++
 .../spark/sql/execution/command/ddl.scala     |   2 +-
 .../spark/sql/hive/HiveSessionCatalog.scala   |  60 ++++++
 .../spark/sql/hive/HiveSessionState.scala     |   8 -
 .../org/apache/spark/sql/hive/hiveUDFs.scala  | 192 ------------------
 .../apache/spark/sql/hive/test/TestHive.scala |  19 +-
 .../sql/hive/test/TestHiveSingleton.scala     |   8 +-
 8 files changed, 86 insertions(+), 228 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 724dab57d21ab..d4300c9479e8a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -52,17 +52,6 @@ trait FunctionRegistry {
 
   /** Drop a function and return whether the function existed. */
   def dropFunction(name: String): Boolean
-
-  /**
-   * Construct a [[FunctionBuilder]] based on the provided class that represents a function.
-   *
-   * This performs reflection to decide what type of [[Expression]] to return in the builder.
-   * This is useful for creating temporary functions.
-   */
-  def makeFunctionBuilder(name: String, functionClassName: String): FunctionBuilder = {
-    // TODO: at least support UDAFs here
-    throw new UnsupportedOperationException("Use sqlContext.udf.register(...) instead.")
-  }
 }
 
 class SimpleFunctionRegistry extends FunctionRegistry {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 4a9700c72808b..875071cffcb1d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -537,6 +537,8 @@ class SessionCatalog(
    * Note: This is currently only used for temporary functions.
    */
   def lookupFunction(name: String, children: Seq[Expression]): Expression = {
+    // TODO: if the function is not in function registry. It needs to load the function from
+    // the external catalog and loads resources.
     functionRegistry.lookupFunction(name, children)
   }
 
@@ -557,7 +559,19 @@ class SessionCatalog(
    * Return a temporary function. For testing only.
    */
   private[catalog] def getTempFunction(name: String): Option[FunctionBuilder] = {
+    // TODO: Why do we need this?
     functionRegistry.lookupFunctionBuilder(name)
   }
 
+  /**
+   * Construct a [[FunctionBuilder]] based on the provided class that represents a function.
+   *
+   * This performs reflection to decide what type of [[Expression]] to return in the builder.
+   * This is useful for creating temporary functions.
+   */
+  def makeFunctionBuilder(name: String, functionClassName: String): FunctionBuilder = {
+    // TODO: at least support UDAFs here
+    throw new UnsupportedOperationException("Use sqlContext.udf.register(...) instead.")
+  }
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 9153bf6097634..85d8a35b5210a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -197,7 +197,7 @@ case class CreateFunction(
     if (isTemp) {
       val info = new ExpressionInfo(className, functionName)
       val builder =
-        sqlContext.sessionState.functionRegistry.makeFunctionBuilder(functionName, className)
+        sqlContext.sessionState.catalog.makeFunctionBuilder(functionName, className)
       sqlContext.sessionState.catalog.createTempFunction(
         functionName, info, builder, ignoreIfExists = false)
     } else {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 1cd783e63a252..119f68d495a9f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -17,18 +17,27 @@
 
 package org.apache.spark.sql.hive
 
+import scala.util.control.NonFatal
+
 import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.ql.exec.{UDAF, UDF}
+import org.apache.hadoop.hive.ql.udf.generic.{GenericUDTF, AbstractGenericUDAFResolver, GenericUDF}
 import org.apache.hadoop.hive.conf.HiveConf
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.BucketSpec
+import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
 import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.Utils
 
 
 class HiveSessionCatalog(
@@ -111,4 +120,55 @@ class HiveSessionCatalog(
     metastoreCatalog.cachedDataSourceTables.getIfPresent(key)
   }
 
+  override def makeFunctionBuilder(funcName: String, funcClassName: String): FunctionBuilder = {
+    makeFunctionBuilder(funcName, Utils.classForName(funcClassName))
+  }
+
+  /**
+   * Construct a [[FunctionBuilder]] based on the provided class that represents a function.
+   */
+  private def makeFunctionBuilder(name: String, clazz: Class[_]): FunctionBuilder = {
+    // When we instantiate hive UDF wrapper class, we may throw exception if the input
+    // expressions don't satisfy the hive UDF, such as type mismatch, input number
+    // mismatch, etc. Here we catch the exception and throw AnalysisException instead.
+    (children: Seq[Expression]) => {
+      try {
+        if (classOf[UDF].isAssignableFrom(clazz)) {
+          val udf = HiveSimpleUDF(name, new HiveFunctionWrapper(clazz.getName), children)
+          udf.dataType // Force it to check input data types.
+          udf
+        } else if (classOf[GenericUDF].isAssignableFrom(clazz)) {
+          val udf = HiveGenericUDF(name, new HiveFunctionWrapper(clazz.getName), children)
+          udf.dataType // Force it to check input data types.
+          udf
+        } else if (classOf[AbstractGenericUDAFResolver].isAssignableFrom(clazz)) {
+          val udaf = HiveUDAFFunction(name, new HiveFunctionWrapper(clazz.getName), children)
+          udaf.dataType // Force it to check input data types.
+          udaf
+        } else if (classOf[UDAF].isAssignableFrom(clazz)) {
+          val udaf = HiveUDAFFunction(
+            name,
+            new HiveFunctionWrapper(clazz.getName),
+            children,
+            isUDAFBridgeRequired = true)
+          udaf.dataType  // Force it to check input data types.
+          udaf
+        } else if (classOf[GenericUDTF].isAssignableFrom(clazz)) {
+          val udtf = HiveGenericUDTF(name, new HiveFunctionWrapper(clazz.getName), children)
+          udtf.elementTypes // Force it to check input data types.
+          udtf
+        } else {
+          throw new AnalysisException(s"No handler for UDF '${clazz.getCanonicalName}'")
+        }
+      } catch {
+        case ae: AnalysisException =>
+          throw ae
+        case NonFatal(e) =>
+          val analysisException =
+            new AnalysisException(s"No handler for UDF '${clazz.getCanonicalName}': $e")
+          analysisException.setStackTrace(e.getStackTrace)
+          throw analysisException
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 31ac1b3ad364a..426410af69a4e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -35,14 +35,6 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
     override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false)
   }
 
-  /**
-   * Internal catalog for managing functions registered by the user.
-   * Note that HiveUDFs will be overridden by functions registered in this context.
-   */
-  override lazy val functionRegistry: FunctionRegistry = {
-    new HiveFunctionRegistry(FunctionRegistry.builtin.copy(), ctx.executionHive, this)
-  }
-
   /**
    * Internal catalog for managing table and database states.
    */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index ec85350e45937..fda52f1f69625 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -43,198 +43,6 @@ import org.apache.spark.sql.hive.client.HiveClientImpl
 import org.apache.spark.sql.internal.Resource
 import org.apache.spark.sql.types._
 
-
-private[hive] class HiveFunctionRegistry(
-    underlying: analysis.FunctionRegistry,
-    executionHive: HiveClientImpl,
-    sessionState: HiveSessionState)
-  extends analysis.FunctionRegistry with HiveInspectors {
-
-  def getFunctionInfo(name: String): FunctionInfo = {
-    // Hive Registry need current database to lookup function
-    // TODO: the current database of executionHive should be consistent with metadataHive
-    executionHive.withHiveState {
-      FunctionRegistry.getFunctionInfo(name)
-    }
-  }
-
-  /**
-   * Returns CatalogFunction for the given function name.
-   *
-   * This function looks up the given function name in SessionCatalog. For Hive, it turns to
-   * look up the function in Hive permanent functions in current database. These permanent
-   * functions are registered in Hive with `Create Function` DDL command.
-   *
-   * It is necessary to load related resources (JARs, Files) before calling the function.
-   * We will load these resources here.
-   */
-  private def getHivePermanentFunction(name: String): Option[CatalogFunction] = {
-    val databaseName = sessionState.catalog.getCurrentDatabase
-    val func = FunctionIdentifier(name, Option(databaseName))
-    val catalogFunc =
-      if (sessionState.catalog.functionExists(func)) {
-        Some(sessionState.catalog.getFunction(func))
-      } else {
-        None
-      }
-    val resourcesToLoad = catalogFunc.map(_.resources.map { case (rType, rPath) =>
-      Resource(rType, rPath)
-    }).getOrElse(Seq.empty[Resource])
-    sessionState.loadResources(resourcesToLoad)
-    catalogFunc
-  }
-
-  override def makeFunctionBuilder(
-      name: String,
-      functionClassName: String): FunctionBuilder = {
-    val hiveUDFWrapper = new HiveFunctionWrapper(functionClassName)
-    val hiveUDFClass = hiveUDFWrapper.createFunction().getClass
-    makeFunctionBuilder(name, functionClassName, hiveUDFClass, hiveUDFWrapper)
-  }
-
-  /**
-   * Generates a FunctionBuilder for a Hive UDF which is specified by a given classname.
-   */
-  private def makeFunctionBuilder(
-      name: String,
-      functionClassName: String,
-      hiveUDFClass: Class[_],
-      hiveUDFWrapper: HiveFunctionWrapper): FunctionBuilder = {
-    val builder = (children: Seq[Expression]) => {
-      try {
-        if (classOf[GenericUDFMacro].isAssignableFrom(hiveUDFClass)) {
-          val udf = HiveGenericUDF(
-            name, hiveUDFWrapper, children)
-          if (udf.resolved) {
-            udf.dataType // Force it to check input data types.
-          }
-          udf
-        } else if (classOf[UDF].isAssignableFrom(hiveUDFClass)) {
-          val udf = HiveSimpleUDF(name, hiveUDFWrapper, children)
-          if (udf.resolved) {
-            udf.dataType // Force it to check input data types.
-          }
-          udf
-        } else if (classOf[GenericUDF].isAssignableFrom(hiveUDFClass)) {
-          val udf = HiveGenericUDF(name, hiveUDFWrapper, children)
-          if (udf.resolved) {
-            udf.dataType // Force it to check input data types.
-          }
-          udf
-        } else if (
-          classOf[AbstractGenericUDAFResolver].isAssignableFrom(hiveUDFClass)) {
-          val udaf = HiveUDAFFunction(name, hiveUDFWrapper, children)
-          if (udaf.resolved) {
-            udaf.dataType // Force it to check input data types.
-          }
-          udaf
-        } else if (classOf[UDAF].isAssignableFrom(hiveUDFClass)) {
-          val udaf = HiveUDAFFunction(
-            name, hiveUDFWrapper, children, isUDAFBridgeRequired = true)
-          if (udaf.resolved) {
-            udaf.dataType  // Force it to check input data types.
-          }
-          udaf
-        } else if (classOf[GenericUDTF].isAssignableFrom(hiveUDFClass)) {
-          val udtf = HiveGenericUDTF(name, hiveUDFWrapper, children)
-          if (udtf.resolved) {
-            udtf.elementTypes // Force it to check input data types.
-          }
-          udtf
-        } else {
-          throw new AnalysisException(s"No handler for udf ${hiveUDFClass}")
-        }
-      } catch {
-        case analysisException: AnalysisException =>
-          throw analysisException
-        case throwable: Throwable =>
-          val errorMessage = s"No handler for Hive udf ${hiveUDFClass} " +
-            s"because: ${throwable.getMessage}."
-          throw new AnalysisException(errorMessage)
-      }
-    }
-    builder
-  }
-
-  /**
-   * Returns the Expression for the function name and given children Expressions.
-   * This function will first look up existing FunctionBuilder in underlying FunctionRegistry.
-   * If not found, it will get function info from Hive Function Registry.
-   *
-   * If not found again, it will try to find this function in Hive permanent functions.
-   * If found, we load necessary JARs, Files and create the Expression.
-   */
-  override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
-    underlying.lookupFunctionBuilder(name).map { f => f(children) }.getOrElse {
-      // We only look it up to see if it exists, but do not include it in the HiveUDF since it is
-      // not always serializable.
-      val functionInfo = getFunctionInfo(name.toLowerCase)
-      val builder: FunctionBuilder =
-        if (functionInfo == null) {
-          val catalogFunc = getHivePermanentFunction(name).getOrElse(
-            throw new AnalysisException(s"undefined function $name"))
-
-          val functionClassName = catalogFunc.className
-          makeFunctionBuilder(name, functionClassName)
-        } else {
-          val functionClassName = functionInfo.getFunctionClass.getName
-
-          // When we instantiate hive UDF wrapper class, we may throw exception if the input
-          // expressions don't satisfy the hive UDF, such as type mismatch, input number mismatch,
-          // etc. Here we catch the exception and throw AnalysisException instead.
-          if (classOf[GenericUDFMacro].isAssignableFrom(functionInfo.getFunctionClass)) {
-            val wrapper = new HiveFunctionWrapper(functionClassName, functionInfo.getGenericUDF)
-            makeFunctionBuilder(name, functionClassName, functionInfo.getFunctionClass, wrapper)
-          } else {
-            val wrapper = new HiveFunctionWrapper(functionClassName)
-            makeFunctionBuilder(name, functionClassName, functionInfo.getFunctionClass, wrapper)
-          }
-        }
-      builder(children)
-    }
-  }
-
-  override def registerFunction(name: String, info: ExpressionInfo, builder: FunctionBuilder)
-  : Unit = underlying.registerFunction(name, info, builder)
-
-  /* List all of the registered function names. */
-  override def listFunction(): Seq[String] = {
-    (FunctionRegistry.getFunctionNames.asScala ++ underlying.listFunction()).toList.sorted
-  }
-
-  /* Get the class of the registered function by specified name. */
-  override def lookupFunction(name: String): Option[ExpressionInfo] = {
-    underlying.lookupFunction(name).orElse(
-    Try {
-      val info = getFunctionInfo(name)
-      val annotation = info.getFunctionClass.getAnnotation(classOf[Description])
-      if (annotation != null) {
-        Some(new ExpressionInfo(
-          info.getFunctionClass.getCanonicalName,
-          annotation.name(),
-          annotation.value(),
-          annotation.extended()))
-      } else {
-        Some(new ExpressionInfo(
-          info.getFunctionClass.getCanonicalName,
-          name,
-          null,
-          null))
-      }
-    }.getOrElse(None))
-  }
-
-  override def lookupFunctionBuilder(name: String): Option[FunctionBuilder] = {
-    underlying.lookupFunctionBuilder(name)
-  }
-
-  // Note: This does not drop functions stored in the metastore
-  override def dropFunction(name: String): Boolean = {
-    underlying.dropFunction(name)
-  }
-
-}
-
 private[hive] case class HiveSimpleUDF(
     name: String, funcWrapper: HiveFunctionWrapper, children: Seq[Expression])
   extends Expression with HiveInspectors with CodegenFallback with Logging {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 3a65154f8d72f..b820c36c343d2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -201,9 +201,13 @@ class TestHiveContext private[hive](
     }
 
     override lazy val functionRegistry = {
-      new TestHiveFunctionRegistry(
-        org.apache.spark.sql.catalyst.analysis.FunctionRegistry.builtin.copy(), self.executionHive,
-        this)
+      // TestHiveFunctionRegistry tracks removed functions. So, we cannot simply use
+      // org.apache.spark.sql.catalyst.analysis.FunctionRegistry.builtIn.copy.
+      val fr = new TestHiveFunctionRegistry
+      org.apache.spark.sql.catalyst.analysis.FunctionRegistry.expressions.foreach {
+        case (name, (info, builder)) => fr.registerFunction(name, info, builder)
+      }
+      fr
     }
   }
 
@@ -529,21 +533,18 @@ class TestHiveContext private[hive](
 
 }
 
-private[hive] class TestHiveFunctionRegistry(
-    fr: SimpleFunctionRegistry,
-    client: HiveClientImpl,
-    sessionState: HiveSessionState) extends HiveFunctionRegistry(fr, client, sessionState) {
+private[hive] class TestHiveFunctionRegistry extends SimpleFunctionRegistry {
 
   private val removedFunctions =
     collection.mutable.ArrayBuffer.empty[(String, (ExpressionInfo, FunctionBuilder))]
 
   def unregisterFunction(name: String): Unit = {
-    fr.functionBuilders.remove(name).foreach(f => removedFunctions += name -> f)
+    functionBuilders.remove(name).foreach(f => removedFunctions += name -> f)
   }
 
   def restore(): Unit = {
     removedFunctions.foreach {
-      case (name, (info, builder)) => fr.registerFunction(name, info, builder)
+      case (name, (info, builder)) => registerFunction(name, info, builder)
     }
   }
 }
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala b/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
index 368b901dac3dd..5b3f84de61c05 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
@@ -21,18 +21,12 @@ import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
-import org.apache.spark.sql.hive.{HiveFunctionRegistry, HiveSessionState}
-import org.apache.spark.sql.hive.execution.HiveSqlParser
 
 
 trait TestHiveSingleton extends SparkFunSuite with BeforeAndAfterAll {
   protected val sqlContext: SQLContext = TestHive
   protected val hiveContext: TestHiveContext = TestHive
-  private val functionRegistry =
-    new HiveFunctionRegistry(
-      FunctionRegistry.builtin.copy(), hiveContext.executionHive, new HiveSessionState(hiveContext))
-  protected val hiveSqlParser = new HiveSqlParser(functionRegistry)
+  protected val hiveSqlParser = hiveContext.sessionState.sqlParser
 
 
   protected override def afterAll(): Unit = {

From 1753cac2a09da47fd5ac439656d0f49f8866f841 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 31 Mar 2016 20:27:28 -0700
Subject: [PATCH 16/38] functionClassName => className

---
 .../scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 119f68d495a9f..83dfe1e728a79 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -120,8 +120,8 @@ class HiveSessionCatalog(
     metastoreCatalog.cachedDataSourceTables.getIfPresent(key)
   }
 
-  override def makeFunctionBuilder(funcName: String, funcClassName: String): FunctionBuilder = {
-    makeFunctionBuilder(funcName, Utils.classForName(funcClassName))
+  override def makeFunctionBuilder(funcName: String, className: String): FunctionBuilder = {
+    makeFunctionBuilder(funcName, Utils.classForName(className))
   }
 
   /**

From a2d588ffed2dda19f48270d9b42999ea0585ceb5 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 31 Mar 2016 21:21:59 -0700
Subject: [PATCH 17/38] Put loadFunctionResources to SessionCatalog

---
 .../sql/catalyst/analysis/Analyzer.scala      |  2 +-
 .../sql/catalyst/catalog/SessionCatalog.scala | 21 ++++++--
 .../catalyst/catalog/functionResources.scala  | 52 +++++++++++++++++++
 .../org/apache/spark/sql/SQLContext.scala     | 17 +++++-
 .../spark/sql/execution/SparkSqlParser.scala  |  1 +
 .../spark/sql/execution/command/ddl.scala     |  2 +-
 .../spark/sql/internal/SessionState.scala     | 23 +++-----
 .../spark/sql/hive/HiveSessionCatalog.scala   |  5 +-
 .../spark/sql/hive/HiveSessionState.scala     | 23 +++-----
 .../org/apache/spark/sql/hive/hiveUDFs.scala  |  1 -
 10 files changed, 106 insertions(+), 41 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 8dc0532b3f89a..02747144ad6f9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystConf, ScalaReflection, SimpleCatalystConf}
-import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{DummyFunctionResourceLoader, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 875071cffcb1d..f6f48aff62c67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -39,17 +39,21 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
  */
 class SessionCatalog(
     externalCatalog: ExternalCatalog,
+    functionResourceLoader: FunctionResourceLoader,
     functionRegistry: FunctionRegistry,
     conf: CatalystConf) {
   import ExternalCatalog._
 
-  def this(externalCatalog: ExternalCatalog, functionRegistry: FunctionRegistry) {
-    this(externalCatalog, functionRegistry, new SimpleCatalystConf(true))
+  def this(
+      externalCatalog: ExternalCatalog,
+      functionRegistry: FunctionRegistry,
+      conf: CatalystConf) {
+    this(externalCatalog, new DummyFunctionResourceLoader, functionRegistry, conf)
   }
 
   // For testing only.
   def this(externalCatalog: ExternalCatalog) {
-    this(externalCatalog, new SimpleFunctionRegistry)
+    this(externalCatalog, new SimpleFunctionRegistry, new SimpleCatalystConf(true))
   }
 
   protected[this] val tempTables = new mutable.HashMap[String, LogicalPlan]
@@ -574,4 +578,15 @@ class SessionCatalog(
     throw new UnsupportedOperationException("Use sqlContext.udf.register(...) instead.")
   }
 
+  /**
+   * Loads resources such as JARs and Files to SQLContext.
+   */
+  def loadFunctionResources(resources: Seq[(String, String)]): Unit = {
+    resources.map(r => (r._1.toLowerCase, r._2)).foreach {
+      case (resourceType, uri) =>
+        val functionResource =
+          FunctionResource(FunctionResourceType.fromString(resourceType), uri)
+        functionResourceLoader.loadResource(functionResource)
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
new file mode 100644
index 0000000000000..4e3400584d0d1
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+import org.apache.spark.sql.AnalysisException
+
+sealed trait FunctionResourceType
+
+object JarResource extends FunctionResourceType
+
+object FileResource extends FunctionResourceType
+
+object ArchiveResource extends FunctionResourceType
+
+object FunctionResourceType {
+  def fromString(resourceType: String): FunctionResourceType = {
+    resourceType.toLowerCase match {
+      case "jar" => JarResource
+      case "file" => FileResource
+      case "archive" => ArchiveResource
+      case other =>
+        throw new AnalysisException(s"Resource Type '$resourceType' is not supported.")
+    }
+  }
+}
+
+case class FunctionResource(resourceType: FunctionResourceType, uri: String)
+
+trait FunctionResourceLoader {
+  def loadResource(resource: FunctionResource): Unit
+}
+
+class DummyFunctionResourceLoader extends FunctionResourceLoader {
+  override def loadResource(resource: FunctionResource): Unit = {
+    throw new UnsupportedOperationException
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 0576a1a178ec0..ff4302dd3bb14 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -32,7 +32,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
 import org.apache.spark.sql.catalyst._
-import org.apache.spark.sql.catalyst.catalog.{ExternalCatalog, InMemoryCatalog}
+import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range}
@@ -208,6 +208,21 @@ class SQLContext private[sql](
     sparkContext.addJar(path)
   }
 
+  protected[sql] def functionResourceLoader: FunctionResourceLoader = {
+    new FunctionResourceLoader {
+      override def loadResource(resource: FunctionResource): Unit = {
+        resource.resourceType match {
+          case JarResource => addJar(resource.uri)
+          case FileResource => sparkContext.addFile(resource.uri)
+          case ArchiveResource =>
+            throw new AnalysisException(
+              "Archive is not allowed to be loaded. If YARN mode is used, " +
+                "please use --archives options while calling spark-submit.")
+        }
+      }
+    }
+  }
+
   /**
    * :: Experimental ::
    * A collection of methods that are considered experimental, but can be used to hook into
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 65a7ff6c3c43e..8de7c2f47e96e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -290,6 +290,7 @@ class SparkSqlAstBuilder extends AstBuilder {
         case "jar" | "file" | "archive" =>
           resourceType -> string(resource.STRING)
         case other =>
+          // We keep this case because the error message is better than ResourceType.fromString
           throw new ParseException(s"Resource Type '$resourceType' is not supported.", ctx)
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 85d8a35b5210a..0a842adaea733 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -21,7 +21,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Row, SQLContext}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException
-import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogFunction}
+import org.apache.spark.sql.catalyst.catalog.{FunctionResource, CatalogDatabase, CatalogFunction}
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalog.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, ExpressionInfo}
 import org.apache.spark.sql.execution.datasources.BucketSpec
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 7d9e016b72149..8f663d9674b81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -51,7 +51,12 @@ private[sql] class SessionState(ctx: SQLContext) {
   /**
    * Internal catalog for managing table and database states.
    */
-  lazy val catalog = new SessionCatalog(ctx.externalCatalog, functionRegistry, conf)
+  lazy val catalog =
+    new SessionCatalog(
+      ctx.externalCatalog,
+      ctx.functionResourceLoader,
+      functionRegistry,
+      conf)
 
   /**
    * Interface exposed to the user for registering user-defined functions.
@@ -111,21 +116,5 @@ private[sql] class SessionState(ctx: SQLContext) {
    * Interface to start and stop [[org.apache.spark.sql.ContinuousQuery]]s.
    */
   lazy val continuousQueryManager: ContinuousQueryManager = new ContinuousQueryManager(ctx)
-
-  /**
-   * Loads resource to SQLContext.
-   */
-  def loadResource(resource: Resource): Unit = {
-    resource.resourceType.toLowerCase match {
-      case "jar" => ctx.addJar(resource.path)
-      case _ => ctx.sparkContext.addFile(resource.path)
-    }
-  }
-
-  /**
-   * Loads resources such as JARs and Files to SQLContext.
-   */
-  def loadResources(resources: Seq[Resource]): Unit = resources.foreach(loadResource(_))
 }
 
-case class Resource(resourceType: String, path: String)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 83dfe1e728a79..1797f5a767d5f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
-import org.apache.spark.sql.catalyst.catalog.SessionCatalog
+import org.apache.spark.sql.catalyst.catalog.{FunctionResourceLoader, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -44,9 +44,10 @@ class HiveSessionCatalog(
     externalCatalog: HiveExternalCatalog,
     client: HiveClient,
     context: HiveContext,
+    functionResourceLoader: FunctionResourceLoader,
     functionRegistry: FunctionRegistry,
     conf: SQLConf)
-  extends SessionCatalog(externalCatalog, functionRegistry, conf) {
+  extends SessionCatalog(externalCatalog, functionResourceLoader, functionRegistry, conf) {
 
   override def setCurrentDatabase(db: String): Unit = {
     super.setCurrentDatabase(db)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 426410af69a4e..36ebde3afc29d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.execution.{python, SparkPlanner}
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.execution.HiveSqlParser
-import org.apache.spark.sql.internal.{Resource, SessionState, SQLConf}
+import org.apache.spark.sql.internal.{SessionState, SQLConf}
 
 
 /**
@@ -39,7 +39,13 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
    * Internal catalog for managing table and database states.
    */
   override lazy val catalog = {
-    new HiveSessionCatalog(ctx.hiveCatalog, ctx.metadataHive, ctx, functionRegistry, conf)
+    new HiveSessionCatalog(
+      ctx.hiveCatalog,
+      ctx.metadataHive,
+      ctx,
+      ctx.functionResourceLoader,
+      functionRegistry,
+      conf)
   }
 
   /**
@@ -95,17 +101,4 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
       }
     }
   }
-
-  /**
-   * Loads resource to SQLContext.
-   */
-  override def loadResource(resource: Resource): Unit = {
-    resource.resourceType.toLowerCase match {
-      case "jar" =>
-        super.loadResource(resource)
-      case _ =>
-        ctx.runSqlHive(s"ADD FILE ${resource.path}")
-        super.loadResource(resource)
-    }
-  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index fda52f1f69625..88a266457aaf9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -40,7 +40,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.hive.HiveShim._
 import org.apache.spark.sql.hive.client.HiveClientImpl
-import org.apache.spark.sql.internal.Resource
 import org.apache.spark.sql.types._
 
 private[hive] case class HiveSimpleUDF(

From 51b72dd028a73276ad374b9560223e27c60cbbd5 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Thu, 31 Mar 2016 22:25:02 -0700
Subject: [PATCH 18/38] SessionCatalog loads permanent function instead of
 letting function registry loads permanent function.

---
 .../catalyst/analysis/FunctionRegistry.scala  |  3 ++
 .../sql/catalyst/catalog/SessionCatalog.scala | 39 +++++++++++++++----
 .../sql/catalyst/catalog/interface.scala      |  1 +
 .../spark/sql/catalyst/identifiers.scala      | 16 ++++----
 .../spark/sql/execution/command/ddl.scala     |  9 +++++
 5 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index d4300c9479e8a..90d66cea29cf5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -52,6 +52,9 @@ trait FunctionRegistry {
 
   /** Drop a function and return whether the function existed. */
   def dropFunction(name: String): Boolean
+
+  /** Checks if a function with a given name exits. */
+  def functionExists(name: String): Boolean = lookupFunction(name).isDefined
 }
 
 class SimpleFunctionRegistry extends FunctionRegistry {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index f6f48aff62c67..e217e74f0b21a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -73,6 +73,7 @@ class SessionCatalog(
   /**
    * Format table name, taking into account case sensitivity.
    */
+  // TODO: Should we use it for temp function name?
   protected[this] def formatTableName(name: String): String = {
     if (conf.caseSensitiveAnalysis) name else name.toLowerCase
   }
@@ -457,6 +458,7 @@ class SessionCatalog(
    * If a database is specified in `name`, this will return the function in that database.
    * If no database is specified, this will return the function in the current database.
    */
+  // TODO: have a better name. This method is actually for fetching the metadata of a function.
   def getFunction(name: FunctionIdentifier): CatalogFunction = {
     val db = name.database.getOrElse(currentDb)
     externalCatalog.getFunction(db, name.funcName)
@@ -467,11 +469,17 @@ class SessionCatalog(
    *
    */
   def functionExists(name: FunctionIdentifier): Boolean = {
-    try {
-      getFunction(name) != null
-    } catch {
-      case _: NoSuchFunctionException => false
-      case _: AnalysisException => false // HiveExternalCatalog wraps all exceptions with it.
+    if (functionRegistry.functionExists(name.unquotedString)) {
+      // This function exists in the FunctionRegistry.
+      true
+    } else {
+      // Need to check if this function exists in the metastore.
+      try {
+        getFunction(name) != null
+      } catch {
+        case _: NoSuchFunctionException => false
+        case _: AnalysisException => false // HiveExternalCatalog wraps all exceptions with it.
+      }
     }
   }
 
@@ -541,9 +549,24 @@ class SessionCatalog(
    * Note: This is currently only used for temporary functions.
    */
   def lookupFunction(name: String, children: Seq[Expression]): Expression = {
-    // TODO: if the function is not in function registry. It needs to load the function from
-    // the external catalog and loads resources.
-    functionRegistry.lookupFunction(name, children)
+    // TODO: Right now, the name can be qualified or not qualified.
+    // It will be better to get a FunctionIdentifier.
+    // TODO: Right now, we assume that name is not qualified!
+    val qualifiedName = FunctionIdentifier(name, Some(currentDb)).unquotedString
+    if (functionRegistry.functionExists(name)) {
+      functionRegistry.lookupFunction(name, children)
+    } else if (functionRegistry.functionExists(qualifiedName)) {
+      functionRegistry.lookupFunction(qualifiedName, children)
+    } else {
+      // The function has not been loaded to the function registry, which means
+      // that the function is a permanent function.
+      val catalogFunction = externalCatalog.getFunction(currentDb, name)
+      assert(qualifiedName == catalogFunction.identifier.unquotedString)
+      loadFunctionResources(catalogFunction.resources)
+      val info = new ExpressionInfo(catalogFunction.className, qualifiedName)
+      val builder = makeFunctionBuilder(qualifiedName, catalogFunction.className)
+      createTempFunction(qualifiedName, info, builder, ignoreIfExists = false)
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index f43d9e6d85000..90906055c3c9a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -159,6 +159,7 @@ abstract class ExternalCatalog {
    */
   def alterFunction(db: String, funcDefinition: CatalogFunction): Unit
 
+  // TODO: getFunction should take a FunctionIdentifier.
   def getFunction(db: String, funcName: String): CatalogFunction
 
   def listFunctions(db: String, pattern: String): Seq[String]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
index 87f4d1b007fe1..aae75956ea61a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
@@ -25,10 +25,10 @@ package org.apache.spark.sql.catalyst
  * Format (quoted): "`name`" or "`db`.`name`"
  */
 sealed trait IdentifierWithDatabase {
-  val name: String
+  val identifier: String
   def database: Option[String]
-  def quotedString: String = database.map(db => s"`$db`.`$name`").getOrElse(s"`$name`")
-  def unquotedString: String = database.map(db => s"$db.$name").getOrElse(name)
+  def quotedString: String = database.map(db => s"`$db`.`$identifier`").getOrElse(s"`$identifier`")
+  def unquotedString: String = database.map(db => s"$db.$identifier").getOrElse(identifier)
   override def toString: String = quotedString
 }
 
@@ -36,13 +36,15 @@ sealed trait IdentifierWithDatabase {
 /**
  * Identifies a table in a database.
  * If `database` is not defined, the current database is used.
+ * When we register a permenent function in the FunctionRegistry, we use
+ * unquotedString as the function name.
  */
 case class TableIdentifier(table: String, database: Option[String])
   extends IdentifierWithDatabase {
 
-  override val name: String = table
+  override val identifier: String = table
 
-  def this(name: String) = this(name, None)
+  def this(table: String) = this(table, None)
 
 }
 
@@ -58,9 +60,9 @@ object TableIdentifier {
 case class FunctionIdentifier(funcName: String, database: Option[String])
   extends IdentifierWithDatabase {
 
-  override val name: String = funcName
+  override val identifier: String = funcName
 
-  def this(name: String) = this(name, None)
+  def this(funcName: String) = this(funcName, None)
 }
 
 object FunctionIdentifier {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 0a842adaea733..b79c53709bc4e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -183,6 +183,7 @@ case class DescribeDatabase(
  *            is referenced for the first time by a session.
  * isTemp: indicates if it is a temporary function.
  */
+// TODO: Use Seq[FunctionResource] instead of Seq[(String, String)] for resources.
 case class CreateFunction(
     databaseName: Option[String],
     functionName: String,
@@ -195,6 +196,13 @@ case class CreateFunction(
     val func = FunctionIdentifier(functionName, databaseName)
     val catalogFunc = CatalogFunction(func, className, resources)
     if (isTemp) {
+      if (databaseName.isDefined) {
+        throw new AnalysisException(
+          s"It is not allowed to provide database name when defining a temporary function. " +
+            s"However, database name ${databaseName.get} is provided..")
+      }
+      // We can overwrite the temp function definition.
+      sqlContext.sessionState.catalog.loadFunctionResources(resources)
       val info = new ExpressionInfo(className, functionName)
       val builder =
         sqlContext.sessionState.catalog.makeFunctionBuilder(functionName, className)
@@ -207,6 +215,7 @@ case class CreateFunction(
         throw new AnalysisException(
           s"Function '$functionName' already exists in database '$dbName'.")
       }
+      // This function will be loaded into the FunctionRegistry when a query uses it.
       sqlContext.sessionState.catalog.createFunction(catalogFunc)
     }
     Seq.empty[Row]

From 66c5261a5638dd05322b5b1a45af9b591a328233 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Fri, 1 Apr 2016 10:02:15 -0700
Subject: [PATCH 19/38] Take care UnresolvedGenerator and cleanup.

---
 .../sql/catalyst/analysis/Analyzer.scala      | 13 +++++++--
 .../sql/catalyst/analysis/unresolved.scala    | 28 +++++++++++++++++--
 .../sql/catalyst/catalog/SessionCatalog.scala |  9 ++++++
 .../sql/catalyst/catalog/interface.scala      |  1 +
 .../sql/catalyst/analysis/AnalysisTest.scala  |  2 +-
 .../analysis/DecimalPrecisionSuite.scala      |  2 +-
 .../BooleanSimplificationSuite.scala          |  1 -
 .../optimizer/EliminateSortsSuite.scala       |  2 +-
 .../spark/sql/internal/SessionState.scala     |  2 +-
 .../spark/sql/hive/HiveSessionState.scala     |  4 +--
 .../sql/hive/execution/HiveSqlParser.scala    | 18 +++++-------
 11 files changed, 59 insertions(+), 23 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 02747144ad6f9..7c481386156ce 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -49,7 +49,6 @@ object SimpleAnalyzer
 class SimpleAnalyzer(functionRegistry: FunctionRegistry, conf: CatalystConf)
   extends Analyzer(
     new SessionCatalog(new InMemoryCatalog, functionRegistry, conf),
-    functionRegistry,
     conf)
 
 /**
@@ -59,7 +58,6 @@ class SimpleAnalyzer(functionRegistry: FunctionRegistry, conf: CatalystConf)
  */
 class Analyzer(
     catalog: SessionCatalog,
-    registry: FunctionRegistry,
     conf: CatalystConf,
     maxIterations: Int = 100)
   extends RuleExecutor[LogicalPlan] with CheckAnalysis {
@@ -787,9 +785,18 @@ class Analyzer(
       case q: LogicalPlan =>
         q transformExpressions {
           case u if !u.childrenResolved => u // Skip until children are resolved.
+          case u @ UnresolvedGenerator(name, children) =>
+            withPosition(u) {
+              catalog.lookupFunction(name, children) match {
+                case generator: Generator => generator
+                case other =>
+                  failAnalysis(s"$name is expected to be a generator. However, " +
+                    s"its class is ${other.getClass.getCanonicalName}, which is not a generator.")
+              }
+            }
           case u @ UnresolvedFunction(name, children, isDistinct) =>
             withPosition(u) {
-              registry.lookupFunction(name, children) match {
+              catalog.lookupFunction(name, children) match {
                 // DISTINCT is not meaningful for a Max or a Min.
                 case max: Max if isDistinct =>
                   AggregateExpression(max, Complete, isDistinct = false)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index e73d367a730e4..18efc36349aeb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{errors, TableIdentifier}
+import org.apache.spark.sql.catalyst.{InternalRow, errors, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.codegen.{ExprCode, CodegenContext, CodegenFallback}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
@@ -133,6 +133,30 @@ object UnresolvedAttribute {
   }
 }
 
+case class UnresolvedGenerator(
+    name: String,
+    children: Seq[Expression]) extends Generator {
+
+  override def elementTypes: Seq[(DataType, Boolean, String)] =
+    throw new UnresolvedException(this, "elementTypes")
+  override def dataType: DataType = throw new UnresolvedException(this, "dataType")
+  override def foldable: Boolean = throw new UnresolvedException(this, "foldable")
+  override def nullable: Boolean = throw new UnresolvedException(this, "nullable")
+  override lazy val resolved = false
+
+  override def prettyName: String = name
+  override def toString: String = s"'$name(${children.mkString(", ")})"
+
+  override def eval(input: InternalRow = null): TraversableOnce[InternalRow] =
+    throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
+
+  override protected def genCode(ctx: CodegenContext, ev: ExprCode): String =
+    throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
+
+  override def terminate(): TraversableOnce[InternalRow] =
+    throw new UnsupportedOperationException(s"Cannot evaluate expression: $this")
+}
+
 case class UnresolvedFunction(
     name: String,
     children: Seq[Expression],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index e217e74f0b21a..890e164c7c814 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -433,6 +433,12 @@ class SessionCatalog(
    */
   def dropFunction(name: FunctionIdentifier): Unit = {
     val db = name.database.getOrElse(currentDb)
+    val qualified = name.copy(database = Some(db)).unquotedString
+    if (functionRegistry.functionExists(qualified)) {
+      // If we have loaded this function into FunctionRegistry,
+      // also drop it from there.
+      functionRegistry.dropFunction(qualified)
+    }
     externalCatalog.dropFunction(db, name.funcName)
   }
 
@@ -560,12 +566,15 @@ class SessionCatalog(
     } else {
       // The function has not been loaded to the function registry, which means
       // that the function is a permanent function.
+      // We need to first put the function in FunctionRegistry.
       val catalogFunction = externalCatalog.getFunction(currentDb, name)
       assert(qualifiedName == catalogFunction.identifier.unquotedString)
       loadFunctionResources(catalogFunction.resources)
       val info = new ExpressionInfo(catalogFunction.className, qualifiedName)
       val builder = makeFunctionBuilder(qualifiedName, catalogFunction.className)
       createTempFunction(qualifiedName, info, builder, ignoreIfExists = false)
+      // Now, we need to create the Expression.
+      functionRegistry.lookupFunction(qualifiedName, children)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 90906055c3c9a..cf57bc27f1a21 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -146,6 +146,7 @@ abstract class ExternalCatalog {
 
   def createFunction(db: String, funcDefinition: CatalogFunction): Unit
 
+  // TODO: dropFunction should also take a FunctionIdentifier.
   def dropFunction(db: String, funcName: String): Unit
 
   def renameFunction(db: String, oldName: String, newName: String): Unit
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 3ec95ef5b5c6c..b1fcf011f43e6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -32,7 +32,7 @@ trait AnalysisTest extends PlanTest {
     val conf = new SimpleCatalystConf(caseSensitive)
     val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
     catalog.createTempTable("TaBlE", TestRelations.testRelation, overrideIfExists = true)
-    new Analyzer(catalog, EmptyFunctionRegistry, conf) {
+    new Analyzer(catalog, conf) {
       override val extendedResolutionRules = EliminateSubqueryAliases :: Nil
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
index 1a350bf847994..b3b1f5b920a53 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.types._
 class DecimalPrecisionSuite extends PlanTest with BeforeAndAfter {
   private val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true)
   private val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
-  private val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, conf)
+  private val analyzer = new Analyzer(catalog, conf)
 
   private val relation = LocalRelation(
     AttributeReference("i", IntegerType)(),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index dd6b5cac28fa2..8147d06969bbe 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -141,7 +141,6 @@ class BooleanSimplificationSuite extends PlanTest with PredicateHelper {
   private val caseInsensitiveConf = new SimpleCatalystConf(false)
   private val caseInsensitiveAnalyzer = new Analyzer(
     new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, caseInsensitiveConf),
-    EmptyFunctionRegistry,
     caseInsensitiveConf)
 
   test("(a && b) || (a && c) => a && (b || c) when case insensitive") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
index 009889d5a1f9d..8c92ad82ac5be 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.rules._
 class EliminateSortsSuite extends PlanTest {
   val conf = new SimpleCatalystConf(caseSensitiveAnalysis = true, orderByOrdinal = false)
   val catalog = new SessionCatalog(new InMemoryCatalog, EmptyFunctionRegistry, conf)
-  val analyzer = new Analyzer(catalog, EmptyFunctionRegistry, conf)
+  val analyzer = new Analyzer(catalog, conf)
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 8f663d9674b81..c23d680afe027 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -67,7 +67,7 @@ private[sql] class SessionState(ctx: SQLContext) {
    * Logical query plan analyzer for resolving unresolved attributes and relations.
    */
   lazy val analyzer: Analyzer = {
-    new Analyzer(catalog, functionRegistry, conf) {
+    new Analyzer(catalog, conf) {
       override val extendedResolutionRules =
         python.ExtractPythonUDFs ::
         PreInsertCastAndRename ::
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 36ebde3afc29d..7077b0ffac072 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -52,7 +52,7 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
    * An analyzer that uses the Hive metastore.
    */
   override lazy val analyzer: Analyzer = {
-    new Analyzer(catalog, functionRegistry, conf) {
+    new Analyzer(catalog, conf) {
       override val extendedResolutionRules =
         catalog.ParquetConversions ::
         catalog.CreateTables ::
@@ -69,7 +69,7 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
   /**
    * Parser for HiveQl query texts.
    */
-  override lazy val sqlParser: ParserInterface = new HiveSqlParser(functionRegistry)
+  override lazy val sqlParser: ParserInterface = HiveSqlParser
 
   /**
    * Planner that takes into account Hive-specific strategies.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
index c2e10dd80cef2..72a9b44f9e854 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
@@ -16,19 +16,19 @@
  */
 package org.apache.spark.sql.hive.execution
 
+import org.apache.spark.sql.catalyst.analysis.UnresolvedGenerator
+
 import scala.collection.JavaConverters._
 
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
-import org.apache.hadoop.hive.ql.exec.{FunctionRegistry => HiveFunctionRegistry}
 import org.apache.hadoop.hive.ql.parse.EximUtil
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 
-import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
-import org.apache.spark.sql.catalyst.catalog.{CatalogColumn, CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser._
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
@@ -41,8 +41,8 @@ import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
 /**
  * Concrete parser for HiveQl statements.
  */
-class HiveSqlParser(functionRegistry: FunctionRegistry) extends AbstractSqlParser {
-  val astBuilder = new HiveSqlAstBuilder(functionRegistry)
+object HiveSqlParser extends AbstractSqlParser {
+  val astBuilder = new HiveSqlAstBuilder
 
   override protected def nativeCommand(sqlText: String): LogicalPlan = {
     HiveNativeCommand(sqlText)
@@ -52,7 +52,7 @@ class HiveSqlParser(functionRegistry: FunctionRegistry) extends AbstractSqlParse
 /**
  * Builder that converts an ANTLR ParseTree into a LogicalPlan/Expression/TableIdentifier.
  */
-class HiveSqlAstBuilder(functionRegistry: FunctionRegistry) extends SparkSqlAstBuilder {
+class HiveSqlAstBuilder extends SparkSqlAstBuilder {
   import ParserUtils._
 
   /**
@@ -273,11 +273,7 @@ class HiveSqlAstBuilder(functionRegistry: FunctionRegistry) extends SparkSqlAstB
       name: String,
       expressions: Seq[Expression],
       ctx: LateralViewContext): Generator = {
-    val func = functionRegistry.lookupFunction(name, expressions)
-    func match {
-      case g: Generator => g
-      case _ => throw new ParseException(s"Couldn't find Generator function '$name'", ctx)
-    }
+    UnresolvedGenerator(name, expressions)
   }
 
   /**

From 2aa3725dd42c3e62e7326ba87ea411be0d308569 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Fri, 1 Apr 2016 13:25:26 -0700
Subject: [PATCH 20/38] If we cannot find a function in Spark's
 FunctionRegistry. We will also try Hive's.

---
 .../catalyst/analysis/FunctionRegistry.scala  |  4 +-
 .../sql/catalyst/catalog/SessionCatalog.scala | 63 ++++++++++---------
 .../spark/sql/hive/HiveSessionCatalog.scala   | 50 +++++++++++++--
 .../sql/hive/execution/HiveQuerySuite.scala   |  6 +-
 .../sql/hive/execution/HiveUDFSuite.scala     | 10 +--
 5 files changed, 90 insertions(+), 43 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 90d66cea29cf5..b120bd25fb34f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -341,7 +341,9 @@ object FunctionRegistry {
 
   val builtin: SimpleFunctionRegistry = {
     val fr = new SimpleFunctionRegistry
-    expressions.foreach { case (name, (info, builder)) => fr.registerFunction(name, info, builder) }
+    expressions.foreach {
+      case (name, (info, builder)) => fr.registerFunction(name, info, builder)
+    }
     fr
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 890e164c7c814..11127334e53b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -493,6 +493,38 @@ class SessionCatalog(
   // | Methods that interact with temporary and metastore functions |
   // ----------------------------------------------------------------
 
+
+  /**
+   * Return a temporary function. For testing only.
+   */
+  private[catalog] def getTempFunction(name: String): Option[FunctionBuilder] = {
+    // TODO: Why do we need this?
+    functionRegistry.lookupFunctionBuilder(name)
+  }
+
+  /**
+   * Construct a [[FunctionBuilder]] based on the provided class that represents a function.
+   *
+   * This performs reflection to decide what type of [[Expression]] to return in the builder.
+   * This is useful for creating temporary functions.
+   */
+  private[sql] def makeFunctionBuilder(name: String, functionClassName: String): FunctionBuilder = {
+    // TODO: at least support UDAFs here
+    throw new UnsupportedOperationException("Use sqlContext.udf.register(...) instead.")
+  }
+
+  /**
+   * Loads resources such as JARs and Files to SQLContext.
+   */
+  def loadFunctionResources(resources: Seq[(String, String)]): Unit = {
+    resources.map(r => (r._1.toLowerCase, r._2)).foreach {
+      case (resourceType, uri) =>
+        val functionResource =
+          FunctionResource(FunctionResourceType.fromString(resourceType), uri)
+        functionResourceLoader.loadResource(functionResource)
+    }
+  }
+
   /**
    * Create a temporary function.
    * This assumes no database is specified in `funcDefinition`.
@@ -590,35 +622,4 @@ class SessionCatalog(
       .map { f => FunctionIdentifier(f) }
     dbFunctions ++ _tempFunctions
   }
-
-  /**
-   * Return a temporary function. For testing only.
-   */
-  private[catalog] def getTempFunction(name: String): Option[FunctionBuilder] = {
-    // TODO: Why do we need this?
-    functionRegistry.lookupFunctionBuilder(name)
-  }
-
-  /**
-   * Construct a [[FunctionBuilder]] based on the provided class that represents a function.
-   *
-   * This performs reflection to decide what type of [[Expression]] to return in the builder.
-   * This is useful for creating temporary functions.
-   */
-  def makeFunctionBuilder(name: String, functionClassName: String): FunctionBuilder = {
-    // TODO: at least support UDAFs here
-    throw new UnsupportedOperationException("Use sqlContext.udf.register(...) instead.")
-  }
-
-  /**
-   * Loads resources such as JARs and Files to SQLContext.
-   */
-  def loadFunctionResources(resources: Seq[(String, String)]): Unit = {
-    resources.map(r => (r._1.toLowerCase, r._2)).foreach {
-      case (resourceType, uri) =>
-        val functionResource =
-          FunctionResource(FunctionResourceType.fromString(resourceType), uri)
-        functionResourceLoader.loadResource(functionResource)
-    }
-  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 1797f5a767d5f..76be5d9f2ba48 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -17,18 +17,20 @@
 
 package org.apache.spark.sql.hive
 
+import scala.util.{Failure, Success, Try}
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.ql.exec.{UDAF, UDF}
-import org.apache.hadoop.hive.ql.udf.generic.{GenericUDTF, AbstractGenericUDAFResolver, GenericUDF}
+import org.apache.hadoop.hive.ql.udf.generic.{GenericUDFMacro, GenericUDTF, AbstractGenericUDAFResolver, GenericUDF}
 import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.exec.{FunctionRegistry => HiveFunctionRegistry}
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.catalog.{FunctionResourceLoader, SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.expressions.{ExpressionInfo, Expression}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.AnalysisException
@@ -159,17 +161,57 @@ class HiveSessionCatalog(
           udtf.elementTypes // Force it to check input data types.
           udtf
         } else {
-          throw new AnalysisException(s"No handler for UDF '${clazz.getCanonicalName}'")
+          throw new AnalysisException(s"No handler for Hive UDF '${clazz.getCanonicalName}'")
         }
       } catch {
         case ae: AnalysisException =>
           throw ae
         case NonFatal(e) =>
           val analysisException =
-            new AnalysisException(s"No handler for UDF '${clazz.getCanonicalName}': $e")
+            new AnalysisException(s"No handler for Hive UDF '${clazz.getCanonicalName}': $e")
           analysisException.setStackTrace(e.getStackTrace)
           throw analysisException
       }
     }
   }
+
+  // We have a list of Hive built-in functions that we do not support. So, we will check
+  // Hive's function registry and lazily load needed functions into our own function registry.
+  // Those Hive built-in functions are
+  // assert_true, collect_list, collect_set, compute_stats, context_ngrams, create_union,
+  // current_user ,elt, ewah_bitmap, ewah_bitmap_and, ewah_bitmap_empty, ewah_bitmap_or, field,
+  // histogram_numeric, in_file, index, inline, java_method, map_keys, map_values,
+  // matchpath, ngrams, noop, noopstreaming, noopwithmap, noopwithmapstreaming,
+  // parse_url, parse_url_tuple, percentile, percentile_approx, posexplode, reflect, reflect2,
+  // regexp, sentences, stack, std, str_to_map, windowingtablefunction, xpath, xpath_boolean,
+  // xpath_double, xpath_float, xpath_int, xpath_long, xpath_number,
+  // xpath_short, and xpath_string.
+  override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
+    Try(super.lookupFunction(name, children)) match {
+      case Success(expr) => expr
+      case Failure(error) =>
+        if (functionRegistry.functionExists(name)) {
+          // If the function actually exists in functionRegistry, it means that there is an
+          // error when we create the Expression using the given children.
+          // We need to throw the original exception.
+          throw error
+        } else {
+          // This function is not in functionRegistry, let's try to load it as a Hive's
+          // built-in function.
+          val functionName = name.toLowerCase
+          // TODO: This may not really work for current_user because current_user is not evaluated
+          // with session info.
+          val functionInfo =
+            Option(HiveFunctionRegistry.getFunctionInfo(functionName)).getOrElse(
+              throw new AnalysisException(s"Undefined Hive UDF: $name"))
+          val className = functionInfo.getFunctionClass.getName
+          val builder = makeFunctionBuilder(functionName, className)
+          // Put this Hive built-in function to our function registry.
+          val info = new ExpressionInfo(className, functionName)
+          createTempFunction(functionName, info, builder, ignoreIfExists = false)
+          // Now, we need to create the Expression.
+          functionRegistry.lookupFunction(functionName, children)
+        }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 01e8dd7b092aa..8a9188b5b7e44 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -1230,14 +1230,16 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
     val e = intercept[AnalysisException] {
       range(1).selectExpr("not_a_udf()")
     }
-    assert(e.getMessage.contains("undefined function not_a_udf"))
+    assert(e.getMessage.contains("Undefined Hive UDF"))
+    assert(e.getMessage.contains("not_a_udf"))
     var success = false
     val t = new Thread("test") {
       override def run(): Unit = {
         val e = intercept[AnalysisException] {
           range(1).selectExpr("not_a_udf()")
         }
-        assert(e.getMessage.contains("undefined function not_a_udf"))
+        assert(e.getMessage.contains("Undefined Hive UDF"))
+        assert(e.getMessage.contains("not_a_udf"))
         success = true
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index b0e263dff986a..d07ac56586744 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -303,7 +303,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       val message = intercept[AnalysisException] {
         sql("SELECT testUDFTwoListList() FROM testUDF")
       }.getMessage
-      assert(message.contains("No handler for Hive udf"))
+      assert(message.contains("No handler for Hive UDF"))
       sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFTwoListList")
     }
 
@@ -313,7 +313,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       val message = intercept[AnalysisException] {
         sql("SELECT testUDFAnd() FROM testUDF")
       }.getMessage
-      assert(message.contains("No handler for Hive udf"))
+      assert(message.contains("No handler for Hive UDF"))
       sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFAnd")
     }
 
@@ -323,7 +323,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       val message = intercept[AnalysisException] {
         sql("SELECT testUDAFPercentile(a) FROM testUDF GROUP BY b")
       }.getMessage
-      assert(message.contains("No handler for Hive udf"))
+      assert(message.contains("No handler for Hive UDF"))
       sql("DROP TEMPORARY FUNCTION IF EXISTS testUDAFPercentile")
     }
 
@@ -333,7 +333,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       val message = intercept[AnalysisException] {
         sql("SELECT testUDAFAverage() FROM testUDF GROUP BY b")
       }.getMessage
-      assert(message.contains("No handler for Hive udf"))
+      assert(message.contains("No handler for Hive UDF"))
       sql("DROP TEMPORARY FUNCTION IF EXISTS testUDAFAverage")
     }
 
@@ -343,7 +343,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       val message = intercept[AnalysisException] {
         sql("SELECT testUDTFExplode() FROM testUDF")
       }.getMessage
-      assert(message.contains("No handler for Hive udf"))
+      assert(message.contains("No handler for Hive UDF"))
       sql("DROP TEMPORARY FUNCTION IF EXISTS testUDTFExplode")
     }
 

From 81543596860c8ec45fc9fd54154501c8690e2cfd Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Fri, 1 Apr 2016 14:52:38 -0700
Subject: [PATCH 21/38] Fix tests and pre-load two Hive's builtin functions.

---
 .../execution/HiveCompatibilitySuite.scala    |  7 ++--
 .../spark/sql/hive/HiveSessionCatalog.scala   | 34 ++++++++++++++++---
 .../sql/hive/execution/SQLQuerySuite.scala    | 21 ++++++------
 3 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index d8695bc5db0b3..05917ccd5bb2a 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -363,7 +363,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     // Index commands are not supported
     "drop_index",
     "drop_index_removes_partition_dirs",
-    "alter_index"
+    "alter_index",
+
+    // We do not support macro for now.
+    // TODO: throw an exception if a user is trying to create a macro.
+    "macro"
   )
 
   /**
@@ -733,7 +737,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "load_file_with_space_in_the_name",
     "loadpart1",
     "louter_join_ppr",
-    "macro",
     "mapjoin_distinct",
     "mapjoin_filter_on_outerjoin",
     "mapjoin_mapjoin",
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 813006628d2ba..69fcaed67a3d9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
 
-class HiveSessionCatalog(
+private[sql] class HiveSessionCatalog(
     externalCatalog: HiveExternalCatalog,
     client: HiveClient,
     context: HiveContext,
@@ -202,9 +202,19 @@ class HiveSessionCatalog(
           val functionName = name.toLowerCase
           // TODO: This may not really work for current_user because current_user is not evaluated
           // with session info.
-          val functionInfo =
-            Option(HiveFunctionRegistry.getFunctionInfo(functionName)).getOrElse(
-              throw new AnalysisException(s"Undefined Hive UDF: $name"))
+          // We do not need to use executionHive at here because we only load
+          // Hive's builtin functions, which do not need current db.
+          val functionInfo = {
+            try {
+              Option(HiveFunctionRegistry.getFunctionInfo(functionName)).getOrElse(
+                throw new AnalysisException(s"Undefined Hive UDF: $name"))
+            } catch {
+              // If HiveFunctionRegistry.getFunctionInfo throws an exception,
+              // we are failing to load a Hive builtin function, which means that
+              // the given function is not a Hive builtin function.
+              case NonFatal(e) => throw new AnalysisException(s"Undefined Hive UDF: $name")
+            }
+          }
           val className = functionInfo.getFunctionClass.getName
           val builder = makeFunctionBuilder(functionName, className)
           // Put this Hive built-in function to our function registry.
@@ -215,4 +225,20 @@ class HiveSessionCatalog(
         }
     }
   }
+
+  // Pre-load a few commonly used Hive built-in functions.
+  HiveSessionCatalog.preloadedHiveBuiltinFunctions.foreach {
+    case (functionName, clazz) =>
+      val builder = makeFunctionBuilder(functionName, clazz)
+      val info = new ExpressionInfo(clazz.getCanonicalName, functionName)
+      createTempFunction(functionName, info, builder, ignoreIfExists = false)
+  }
+}
+
+private[sql] object HiveSessionCatalog {
+  // This is the list of Hive's built-in functions that are commonly used and we want to
+  // pre-load when we create the FunctionRegistry.
+  val preloadedHiveBuiltinFunctions =
+    ("collect_set", classOf[org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet]) ::
+    ("collect_list", classOf[org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectList]) :: Nil
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index dfec7750576ec..62763c2c86d07 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -108,10 +108,11 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
         Row(3) :: Row(3) :: Nil)
     } finally {
       sql("DROP FUNCTION udtf_count_temp")
-      val errMsg = intercept[ParseException] {
+      val errMsg = intercept[AnalysisException] {
         sql("SELECT key, cc FROM src LATERAL VIEW udtf_count_temp(value) dd AS cc")
       }.getMessage
-      assert(errMsg.contains("undefined function udtf_count_temp"))
+      assert(errMsg.contains("Undefined Hive UDF"))
+      assert(errMsg.contains("udtf_count_temp"))
     }
   }
 
@@ -199,9 +200,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   }
 
   test("show functions") {
-    val allBuiltinFunctions =
-      (FunctionRegistry.builtin.listFunction().toSet[String] ++
-        org.apache.hadoop.hive.ql.exec.FunctionRegistry.getFunctionNames.asScala).toList.sorted
+    val allBuiltinFunctions = FunctionRegistry.builtin.listFunction().toSet[String].toList.sorted
     // The TestContext is shared by all the test cases, some functions may be registered before
     // this, so we check that all the builtin functions are returned.
     val allFunctions = sql("SHOW functions").collect().map(r => r(0))
@@ -213,7 +212,8 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkAnswer(sql("SHOW functions abc.abs"), Row("abs"))
     checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs"))
     checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs"))
-    checkAnswer(sql("SHOW functions `~`"), Row("~"))
+    // Re-enable this test after we fix SPARK-14335.
+    // checkAnswer(sql("SHOW functions `~`"), Row("~"))
     checkAnswer(sql("SHOW functions `a function doens't exist`"), Nil)
     checkAnswer(sql("SHOW functions `weekofyea.*`"), Row("weekofyear"))
     // this probably will failed if we add more function with `sha` prefixing.
@@ -241,10 +241,11 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkExistence(sql("describe functioN abcadf"), true,
       "Function: abcadf not found.")
 
-    checkExistence(sql("describe functioN  `~`"), true,
-      "Function: ~",
-      "Class: org.apache.hadoop.hive.ql.udf.UDFOPBitNot",
-      "Usage: ~ n - Bitwise not")
+    // Re-enable this test after we fix SPARK-14335.
+    // checkExistence(sql("describe functioN  `~`"), true,
+    //  "Function: ~",
+    //  "Class: org.apache.hadoop.hive.ql.udf.UDFOPBitNot",
+    //  "Usage: ~ n - Bitwise not")
   }
 
   test("SPARK-5371: union with null and sum") {

From 04a59263664df573565e287381c8fd1dcad59197 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Fri, 1 Apr 2016 14:58:38 -0700
Subject: [PATCH 22/38] Cleanup

---
 .../org/apache/spark/sql/catalyst/analysis/unresolved.scala | 4 ++--
 .../org/apache/spark/sql/execution/SparkSqlParser.scala     | 1 -
 .../scala/org/apache/spark/sql/execution/command/ddl.scala  | 3 +--
 .../org/apache/spark/sql/hive/HiveSessionCatalog.scala      | 6 +++---
 .../org/apache/spark/sql/hive/execution/HiveSqlParser.scala | 3 +--
 .../org/apache/spark/sql/hive/execution/SQLQuerySuite.scala | 4 ++--
 6 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 18efc36349aeb..36c5ea404364b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{InternalRow, errors, TableIdentifier}
+import org.apache.spark.sql.catalyst.{errors, InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.{ExprCode, CodegenContext, CodegenFallback}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.TreeNode
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 832665697011c..efa7ddf2ab7a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -304,7 +304,6 @@ class SparkSqlAstBuilder extends AstBuilder {
         case "jar" | "file" | "archive" =>
           resourceType -> string(resource.STRING)
         case other =>
-          // We keep this case because the error message is better than ResourceType.fromString
           throw new ParseException(s"Resource Type '$resourceType' is not supported.", ctx)
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index b79c53709bc4e..06722c17e2a80 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -20,8 +20,7 @@ package org.apache.spark.sql.execution.command
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Row, SQLContext}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException
-import org.apache.spark.sql.catalyst.catalog.{FunctionResource, CatalogDatabase, CatalogFunction}
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogFunction}
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalog.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, ExpressionInfo}
 import org.apache.spark.sql.execution.datasources.BucketSpec
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 69fcaed67a3d9..952459243015f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -21,16 +21,16 @@ import scala.util.{Failure, Success, Try}
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.hive.ql.exec.{UDAF, UDF}
-import org.apache.hadoop.hive.ql.udf.generic.{GenericUDFMacro, GenericUDTF, AbstractGenericUDAFResolver, GenericUDF}
 import org.apache.hadoop.hive.conf.HiveConf
+import org.apache.hadoop.hive.ql.exec.{UDAF, UDF}
 import org.apache.hadoop.hive.ql.exec.{FunctionRegistry => HiveFunctionRegistry}
+import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, GenericUDF, GenericUDTF}
 
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.catalog.{FunctionResourceLoader, SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.{ExpressionInfo, Expression}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.AnalysisException
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
index 72a9b44f9e854..8efa0466a415f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.sql.hive.execution
 
-import org.apache.spark.sql.catalyst.analysis.UnresolvedGenerator
-
 import scala.collection.JavaConverters._
 
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
@@ -28,6 +26,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 
+import org.apache.spark.sql.catalyst.analysis.UnresolvedGenerator
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser._
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 62763c2c86d07..94ee7dfef0ddc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -212,7 +212,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkAnswer(sql("SHOW functions abc.abs"), Row("abs"))
     checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs"))
     checkAnswer(sql("SHOW functions `abc`.`abs`"), Row("abs"))
-    // Re-enable this test after we fix SPARK-14335.
+    // TODO: Re-enable this test after we fix SPARK-14335.
     // checkAnswer(sql("SHOW functions `~`"), Row("~"))
     checkAnswer(sql("SHOW functions `a function doens't exist`"), Nil)
     checkAnswer(sql("SHOW functions `weekofyea.*`"), Row("weekofyear"))
@@ -241,7 +241,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkExistence(sql("describe functioN abcadf"), true,
       "Function: abcadf not found.")
 
-    // Re-enable this test after we fix SPARK-14335.
+    // TODO: Re-enable this test after we fix SPARK-14335.
     // checkExistence(sql("describe functioN  `~`"), true,
     //  "Function: ~",
     //  "Class: org.apache.hadoop.hive.ql.udf.UDFOPBitNot",

From 979b03e16c4853d255401dfb2b104bfe10cdcb64 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Fri, 1 Apr 2016 15:25:40 -0700
Subject: [PATCH 23/38] Fix test.

---
 sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index fd736718af12c..1cfd048ec7278 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -83,7 +83,7 @@ class UDFSuite extends QueryTest with SharedSQLContext {
     val e = intercept[AnalysisException] {
       df.selectExpr("a_function_that_does_not_exist()")
     }
-    assert(e.getMessage.contains("undefined function"))
+    assert(e.getMessage.contains("Function not found"))
   }
 
   test("Simple UDF") {

From 323cea6541c23b0610538cdae6b5f7e0747f442b Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Fri, 1 Apr 2016 22:51:43 -0700
Subject: [PATCH 24/38] Remove unnecessary import

---
 .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index fc542609aad98..ecf210d17f278 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystConf, ScalaReflection, SimpleCatalystConf}
-import org.apache.spark.sql.catalyst.catalog.{DummyFunctionResourceLoader, InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._

From 3718e613498cbf9a996f52bc3f215f1051f2ae51 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Sat, 2 Apr 2016 16:34:19 -0700
Subject: [PATCH 25/38] Cleanup #1

---
 .../catalyst/analysis/FunctionRegistry.scala  |  40 +++----
 .../sql/catalyst/analysis/unresolved.scala    |   9 +-
 .../sql/catalyst/catalog/SessionCatalog.scala |  59 +---------
 .../catalyst/catalog/functionResources.scala  |   4 +
 .../catalog/SessionCatalogSuite.scala         | 104 +++---------------
 .../spark/sql/hive/HiveSessionState.scala     |   1 +
 .../sql/hive/test/TestHiveSingleton.scala     |   1 -
 7 files changed, 42 insertions(+), 176 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 330e761a5f4b5..28f50dae36844 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.util.StringKeyHashMap
-import org.apache.spark.util.Utils
 
 
 /** A catalog for looking up user defined functions, used by an [[Analyzer]]. */
@@ -70,14 +69,9 @@ class SimpleFunctionRegistry extends FunctionRegistry {
   }
 
   override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
-    val builder = functionBuilders.get(name)
-    if (builder.isEmpty) {
-      throw new AnalysisException(s"undefined function $name")
-    }
     val func = synchronized {
-      Try(builder.map(_._2)) match {
-        case Success(e) => e.get
-        case Failure(e) => throw new AnalysisException(e.getMessage)
+      functionBuilders.get(name).map(_._2).getOrElse {
+        throw new AnalysisException(s"undefined function $name")
       }
     }
     func(children)
@@ -342,18 +336,22 @@ object FunctionRegistry {
 
   val builtin: SimpleFunctionRegistry = {
     val fr = new SimpleFunctionRegistry
-    expressions.foreach {
-      case (name, (info, builder)) => fr.registerFunction(name, info, builder)
-    }
+    expressions.foreach { case (name, (info, builder)) => fr.registerFunction(name, info, builder) }
     fr
   }
 
+  /** See usage above. */
+  def expression[T <: Expression](name: String)
+      (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
+    expression(name, tag.runtimeClass.asInstanceOf[Class[T]])
+  }
+
   def expression[T <: Expression](
       name: String,
-      runtimeClass: Class[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
+      clazz: Class[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
     // See if we can find a constructor that accepts Seq[Expression]
-    val varargCtor = Try(runtimeClass.getDeclaredConstructor(classOf[Seq[_]])).toOption
-    val builder: FunctionBuilder = (expressions: Seq[Expression]) => {
+    val varargCtor = Try(clazz.getDeclaredConstructor(classOf[Seq[_]])).toOption
+    val builder = (expressions: Seq[Expression]) => {
       if (varargCtor.isDefined) {
         // If there is an apply method that accepts Seq[Expression], use that one.
         Try(varargCtor.get.newInstance(expressions).asInstanceOf[Expression]) match {
@@ -363,7 +361,7 @@ object FunctionRegistry {
       } else {
         // Otherwise, find an ctor method that matches the number of arguments, and use that.
         val params = Seq.fill(expressions.size)(classOf[Expression])
-        val f = Try(runtimeClass.getDeclaredConstructor(params : _*)) match {
+        val f = Try(clazz.getDeclaredConstructor(params : _*)) match {
           case Success(e) =>
             e
           case Failure(e) =>
@@ -376,19 +374,13 @@ object FunctionRegistry {
       }
     }
 
-    val df = runtimeClass.getAnnotation(classOf[ExpressionDescription])
+    val df = clazz.getAnnotation(classOf[ExpressionDescription])
     if (df != null) {
       (name,
-        (new ExpressionInfo(runtimeClass.getCanonicalName, name, df.usage(), df.extended()),
+        (new ExpressionInfo(clazz.getCanonicalName, name, df.usage(), df.extended()),
         builder))
     } else {
-      (name, (new ExpressionInfo(runtimeClass.getCanonicalName, name), builder))
+      (name, (new ExpressionInfo(clazz.getCanonicalName, name), builder))
     }
   }
-
-  /** See usage above. */
-  def expression[T <: Expression](name: String)
-      (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
-    expression(name, tag.runtimeClass.asInstanceOf[Class[T]])
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 36c5ea404364b..e33c6a9cd55eb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -133,9 +133,12 @@ object UnresolvedAttribute {
   }
 }
 
-case class UnresolvedGenerator(
-    name: String,
-    children: Seq[Expression]) extends Generator {
+/**
+ * Represents an unresolved generator, which will be created by the parser for
+ * the [[org.apache.spark.sql.catalyst.plans.logical.Generate]] operator.
+ * The analyzer will resolve this generator.
+ */
+case class UnresolvedGenerator(name: String, children: Seq[Expression]) extends Generator {
 
   override def elementTypes: Seq[(DataType, Boolean, String)] =
     throw new UnresolvedException(this, "elementTypes")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 11127334e53b1..12518ba896eb4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -73,7 +73,6 @@ class SessionCatalog(
   /**
    * Format table name, taking into account case sensitivity.
    */
-  // TODO: Should we use it for temp function name?
   protected[this] def formatTableName(name: String): String = {
     if (conf.caseSensitiveAnalysis) name else name.toLowerCase
   }
@@ -442,22 +441,6 @@ class SessionCatalog(
     externalCatalog.dropFunction(db, name.funcName)
   }
 
-  /**
-   * Alter a metastore function whose name that matches the one specified in `funcDefinition`.
-   *
-   * If no database is specified in `funcDefinition`, assume the function is in the
-   * current database.
-   *
-   * Note: If the underlying implementation does not support altering a certain field,
-   * this becomes a no-op.
-   */
-  def alterFunction(funcDefinition: CatalogFunction): Unit = {
-    val db = funcDefinition.identifier.database.getOrElse(currentDb)
-    val newFuncDefinition = funcDefinition.copy(
-      identifier = FunctionIdentifier(funcDefinition.identifier.funcName, Some(db)))
-    externalCatalog.alterFunction(db, newFuncDefinition)
-  }
-
   /**
    * Retrieve the metadata of a metastore function.
    *
@@ -493,15 +476,6 @@ class SessionCatalog(
   // | Methods that interact with temporary and metastore functions |
   // ----------------------------------------------------------------
 
-
-  /**
-   * Return a temporary function. For testing only.
-   */
-  private[catalog] def getTempFunction(name: String): Option[FunctionBuilder] = {
-    // TODO: Why do we need this?
-    functionRegistry.lookupFunctionBuilder(name)
-  }
-
   /**
    * Construct a [[FunctionBuilder]] based on the provided class that represents a function.
    *
@@ -517,10 +491,10 @@ class SessionCatalog(
    * Loads resources such as JARs and Files to SQLContext.
    */
   def loadFunctionResources(resources: Seq[(String, String)]): Unit = {
-    resources.map(r => (r._1.toLowerCase, r._2)).foreach {
+    resources.foreach {
       case (resourceType, uri) =>
         val functionResource =
-          FunctionResource(FunctionResourceType.fromString(resourceType), uri)
+          FunctionResource(FunctionResourceType.fromString(resourceType.toLowerCase), uri)
         functionResourceLoader.loadResource(functionResource)
     }
   }
@@ -553,35 +527,6 @@ class SessionCatalog(
     }
   }
 
-  /**
-   * Rename a function.
-   *
-   * If a database is specified in `oldName`, this will rename the function in that database.
-   * If no database is specified, this will first attempt to rename a temporary function with
-   * the same name, then, if that does not exist, rename the function in the current database.
-   *
-   * This assumes the database specified in `oldName` matches the one specified in `newName`.
-   */
-  def renameFunction(oldName: FunctionIdentifier, newName: FunctionIdentifier): Unit = {
-    if (oldName.database != newName.database) {
-      throw new AnalysisException("rename does not support moving functions across databases")
-    }
-    val db = oldName.database.getOrElse(currentDb)
-    val oldBuilder = functionRegistry.lookupFunctionBuilder(oldName.funcName)
-    if (oldName.database.isDefined || oldBuilder.isEmpty) {
-      externalCatalog.renameFunction(db, oldName.funcName, newName.funcName)
-    } else {
-      val oldExpressionInfo = functionRegistry.lookupFunction(oldName.funcName).get
-      val newExpressionInfo = new ExpressionInfo(
-        oldExpressionInfo.getClassName,
-        newName.funcName,
-        oldExpressionInfo.getUsage,
-        oldExpressionInfo.getExtended)
-      functionRegistry.dropFunction(oldName.funcName)
-      functionRegistry.registerFunction(newName.funcName, newExpressionInfo, oldBuilder.get)
-    }
-  }
-
   /**
    * Return an [[Expression]] that represents the specified function, assuming it exists.
    * Note: This is currently only used for temporary functions.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
index 4e3400584d0d1..eb4ec2b31b894 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
@@ -19,12 +19,16 @@ package org.apache.spark.sql.catalyst.catalog
 
 import org.apache.spark.sql.AnalysisException
 
+/** An trait that represents the type of a resourced needed by a function. */
 sealed trait FunctionResourceType
 
 object JarResource extends FunctionResourceType
 
 object FileResource extends FunctionResourceType
 
+// We do nto allow users to specify a archive because it is YARN specific.
+// When loading resources, we will throw an exception and ask users to
+// use --archive with spark submit.
 object ArchiveResource extends FunctionResourceType
 
 object FunctionResourceType {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index d85e1192f1e19..4d56d001b3e7d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -689,9 +689,13 @@ class SessionCatalogSuite extends SparkFunSuite {
     val info2 = new ExpressionInfo("tempFunc2", "temp2")
     catalog.createTempFunction("temp1", info1, tempFunc1, ignoreIfExists = false)
     catalog.createTempFunction("temp2", info2, tempFunc2, ignoreIfExists = false)
-    assert(catalog.getTempFunction("temp1") == Some(tempFunc1))
-    assert(catalog.getTempFunction("temp2") == Some(tempFunc2))
-    assert(catalog.getTempFunction("temp3") == None)
+    val arguments = Seq(Literal(1), Literal(2), Literal(3))
+    assert(catalog.lookupFunction("temp1", arguments) === Literal(1))
+    assert(catalog.lookupFunction("temp2", arguments) === Literal(3))
+    // Temporary function does not exist.
+    intercept[AnalysisException] {
+      catalog.lookupFunction("temp3", arguments)
+    }
     val tempFunc3 = (e: Seq[Expression]) => Literal(e.size)
     val info3 = new ExpressionInfo("tempFunc3", "temp1")
     // Temporary function already exists
@@ -700,7 +704,7 @@ class SessionCatalogSuite extends SparkFunSuite {
     }
     // Temporary function is overridden
     catalog.createTempFunction("temp1", info3, tempFunc3, ignoreIfExists = true)
-    assert(catalog.getTempFunction("temp1") == Some(tempFunc3))
+    assert(catalog.lookupFunction("temp1", arguments) === Literal(arguments.length))
   }
 
   test("drop function") {
@@ -732,9 +736,12 @@ class SessionCatalogSuite extends SparkFunSuite {
     val info = new ExpressionInfo("tempFunc", "func1")
     val tempFunc = (e: Seq[Expression]) => e.head
     catalog.createTempFunction("func1", info, tempFunc, ignoreIfExists = false)
-    assert(catalog.getTempFunction("func1") == Some(tempFunc))
+    val arguments = Seq(Literal(1), Literal(2), Literal(3))
+    assert(catalog.lookupFunction("func1", arguments) === Literal(1))
     catalog.dropTempFunction("func1", ignoreIfNotExists = false)
-    assert(catalog.getTempFunction("func1") == None)
+    intercept[AnalysisException] {
+      catalog.lookupFunction("func1", arguments)
+    }
     intercept[AnalysisException] {
       catalog.dropTempFunction("func1", ignoreIfNotExists = false)
     }
@@ -774,91 +781,6 @@ class SessionCatalogSuite extends SparkFunSuite {
     }
   }
 
-  test("rename function") {
-    val externalCatalog = newBasicCatalog()
-    val sessionCatalog = new SessionCatalog(externalCatalog)
-    val newName = "funcky"
-    assert(sessionCatalog.getFunction(
-      FunctionIdentifier("func1", Some("db2"))) == newFunc("func1", Some("db2")))
-    assert(externalCatalog.listFunctions("db2", "*").toSet == Set("func1"))
-    sessionCatalog.renameFunction(
-      FunctionIdentifier("func1", Some("db2")), FunctionIdentifier(newName, Some("db2")))
-    assert(sessionCatalog.getFunction(
-      FunctionIdentifier(newName, Some("db2"))) == newFunc(newName, Some("db2")))
-    assert(externalCatalog.listFunctions("db2", "*").toSet == Set(newName))
-    // Rename function without explicitly specifying database
-    sessionCatalog.setCurrentDatabase("db2")
-    sessionCatalog.renameFunction(FunctionIdentifier(newName), FunctionIdentifier("func1"))
-    assert(sessionCatalog.getFunction(
-      FunctionIdentifier("func1")) == newFunc("func1", Some("db2")))
-    assert(externalCatalog.listFunctions("db2", "*").toSet == Set("func1"))
-    // Renaming "db2.func1" to "db1.func2" should fail because databases don't match
-    intercept[AnalysisException] {
-      sessionCatalog.renameFunction(
-        FunctionIdentifier("func1", Some("db2")), FunctionIdentifier("func2", Some("db1")))
-    }
-  }
-
-  test("rename function when database/function does not exist") {
-    val catalog = new SessionCatalog(newBasicCatalog())
-    intercept[AnalysisException] {
-      catalog.renameFunction(
-        FunctionIdentifier("func1", Some("does_not_exist")),
-        FunctionIdentifier("func5", Some("does_not_exist")))
-    }
-    intercept[AnalysisException] {
-      catalog.renameFunction(
-        FunctionIdentifier("does_not_exist", Some("db2")),
-        FunctionIdentifier("x", Some("db2")))
-    }
-  }
-
-  test("rename temp function") {
-    val externalCatalog = newBasicCatalog()
-    val sessionCatalog = new SessionCatalog(externalCatalog)
-    val info = new ExpressionInfo("tempFunc", "func1")
-    val tempFunc = (e: Seq[Expression]) => e.head
-    sessionCatalog.createTempFunction("func1", info, tempFunc, ignoreIfExists = false)
-    sessionCatalog.setCurrentDatabase("db2")
-    // If a database is specified, we'll always rename the function in that database
-    sessionCatalog.renameFunction(
-      FunctionIdentifier("func1", Some("db2")), FunctionIdentifier("func3", Some("db2")))
-    assert(sessionCatalog.getTempFunction("func1") == Some(tempFunc))
-    assert(sessionCatalog.getTempFunction("func3") == None)
-    assert(externalCatalog.listFunctions("db2", "*").toSet == Set("func3"))
-    // If no database is specified, we'll first rename temporary functions
-    sessionCatalog.createFunction(newFunc("func1", Some("db2")))
-    sessionCatalog.renameFunction(FunctionIdentifier("func1"), FunctionIdentifier("func4"))
-    assert(sessionCatalog.getTempFunction("func4") == Some(tempFunc))
-    assert(sessionCatalog.getTempFunction("func1") == None)
-    assert(externalCatalog.listFunctions("db2", "*").toSet == Set("func1", "func3"))
-    // Then, if no such temporary function exist, rename the function in the current database
-    sessionCatalog.renameFunction(FunctionIdentifier("func1"), FunctionIdentifier("func5"))
-    assert(sessionCatalog.getTempFunction("func5") == None)
-    assert(externalCatalog.listFunctions("db2", "*").toSet == Set("func3", "func5"))
-  }
-
-  test("alter function") {
-    val catalog = new SessionCatalog(newBasicCatalog())
-    assert(catalog.getFunction(FunctionIdentifier("func1", Some("db2"))).className == funcClass)
-    catalog.alterFunction(newFunc("func1", Some("db2")).copy(className = "muhaha"))
-    assert(catalog.getFunction(FunctionIdentifier("func1", Some("db2"))).className == "muhaha")
-    // Alter function without explicitly specifying database
-    catalog.setCurrentDatabase("db2")
-    catalog.alterFunction(newFunc("func1").copy(className = "derpy"))
-    assert(catalog.getFunction(FunctionIdentifier("func1")).className == "derpy")
-  }
-
-  test("alter function when database/function does not exist") {
-    val catalog = new SessionCatalog(newBasicCatalog())
-    intercept[AnalysisException] {
-      catalog.alterFunction(newFunc("func5", Some("does_not_exist")))
-    }
-    intercept[AnalysisException] {
-      catalog.alterFunction(newFunc("funcky", Some("db2")))
-    }
-  }
-
   test("list functions") {
     val catalog = new SessionCatalog(newBasicCatalog())
     val info1 = new ExpressionInfo("tempFunc1", "func1")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
index 0059cab80ee90..95d9a96a03ca5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionState.scala
@@ -103,4 +103,5 @@ private[hive] class HiveSessionState(ctx: HiveContext) extends SessionState(ctx)
       }
     }
   }
+
 }
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala b/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
index 5b3f84de61c05..890783cf99d65 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
@@ -28,7 +28,6 @@ trait TestHiveSingleton extends SparkFunSuite with BeforeAndAfterAll {
   protected val hiveContext: TestHiveContext = TestHive
   protected val hiveSqlParser = hiveContext.sessionState.sqlParser
 
-
   protected override def afterAll(): Unit = {
     try {
       hiveContext.reset()

From ac6dfc5c94a07128b64003852f74105fbb37bd83 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Sun, 3 Apr 2016 11:58:43 -0700
Subject: [PATCH 26/38] Remove withGenerator.

---
 .../spark/sql/catalyst/parser/AstBuilder.scala     | 14 ++------------
 .../spark/sql/hive/execution/HiveSqlParser.scala   | 10 ----------
 2 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index c350f3049f152..85c13f4da0f4f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -532,8 +532,8 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
         Explode(expressions.head)
       case "json_tuple" =>
         JsonTuple(expressions)
-      case other =>
-        withGenerator(other, expressions, ctx)
+      case name =>
+        UnresolvedGenerator(name, expressions)
     }
 
     Generate(
@@ -545,16 +545,6 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
       query)
   }
 
-  /**
-   * Create a [[Generator]]. Override this method in order to support custom Generators.
-   */
-  protected def withGenerator(
-      name: String,
-      expressions: Seq[Expression],
-      ctx: LateralViewContext): Generator = {
-    throw new ParseException(s"Generator function '$name' is not supported", ctx)
-  }
-
   /**
    * Create a joins between two or more logical plans.
    */
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
index 8efa0466a415f..8d90e235a5134 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
@@ -265,16 +265,6 @@ class HiveSqlAstBuilder extends SparkSqlAstBuilder {
     CreateView(tableDesc, plan(query), allowExist, replace, command(ctx))
   }
 
-  /**
-   * Create a [[Generator]]. Override this method in order to support custom Generators.
-   */
-  override protected def withGenerator(
-      name: String,
-      expressions: Seq[Expression],
-      ctx: LateralViewContext): Generator = {
-    UnresolvedGenerator(name, expressions)
-  }
-
   /**
    * Create a [[HiveScriptIOSchema]].
    */

From 64e2ee3eabb514b80941a4db886633f857d4355b Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Sun, 3 Apr 2016 14:12:45 -0700
Subject: [PATCH 27/38] Cleanup

---
 .../sql/catalyst/catalog/SessionCatalog.scala | 27 ++++++++++++++-----
 .../catalyst/catalog/functionResources.scala  |  9 +++++--
 .../sql/catalyst/catalog/interface.scala      |  3 ++-
 .../org/apache/spark/sql/SQLContext.scala     |  1 +
 .../spark/sql/execution/SparkSqlParser.scala  |  2 +-
 .../spark/sql/execution/command/ddl.scala     |  7 +++--
 .../execution/HiveCompatibilitySuite.scala    |  1 -
 .../spark/sql/hive/HiveSessionCatalog.scala   |  4 +--
 .../sql/hive/execution/HiveQuerySuite.scala   |  4 +--
 .../sql/hive/execution/SQLQuerySuite.scala    |  2 +-
 10 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 12518ba896eb4..eb0fb02954fd5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystConf, SimpleCatalystConf}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchFunctionException, SimpleFunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchFunctionException, FunctionRegistry, SimpleFunctionRegistry}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
@@ -48,7 +48,7 @@ class SessionCatalog(
       externalCatalog: ExternalCatalog,
       functionRegistry: FunctionRegistry,
       conf: CatalystConf) {
-    this(externalCatalog, new DummyFunctionResourceLoader, functionRegistry, conf)
+    this(externalCatalog, DummyFunctionResourceLoader, functionRegistry, conf)
   }
 
   // For testing only.
@@ -480,7 +480,6 @@ class SessionCatalog(
    * Construct a [[FunctionBuilder]] based on the provided class that represents a function.
    *
    * This performs reflection to decide what type of [[Expression]] to return in the builder.
-   * This is useful for creating temporary functions.
    */
   private[sql] def makeFunctionBuilder(name: String, functionClassName: String): FunctionBuilder = {
     // TODO: at least support UDAFs here
@@ -488,7 +487,7 @@ class SessionCatalog(
   }
 
   /**
-   * Loads resources such as JARs and Files to SQLContext.
+   * Loads resources such as JARs and Files for a function.
    */
   def loadFunctionResources(resources: Seq[(String, String)]): Unit = {
     resources.foreach {
@@ -527,6 +526,12 @@ class SessionCatalog(
     }
   }
 
+  protected def failFunctionLookup(name: String): Nothing = {
+    throw new AnalysisException(s"Undefined function: $name. This function is " +
+      s"neither a registered temporary function nor " +
+      s"a permanent function registered in the database $currentDb.")
+  }
+
   /**
    * Return an [[Expression]] that represents the specified function, assuming it exists.
    * Note: This is currently only used for temporary functions.
@@ -537,14 +542,22 @@ class SessionCatalog(
     // TODO: Right now, we assume that name is not qualified!
     val qualifiedName = FunctionIdentifier(name, Some(currentDb)).unquotedString
     if (functionRegistry.functionExists(name)) {
+      // This function has been already loaded into the function registry.
       functionRegistry.lookupFunction(name, children)
     } else if (functionRegistry.functionExists(qualifiedName)) {
+      // This function has been already loaded into the function registry.
+      // Unlike the above block, we find this function by using the qualified name.
       functionRegistry.lookupFunction(qualifiedName, children)
     } else {
       // The function has not been loaded to the function registry, which means
-      // that the function is a permanent function.
-      // We need to first put the function in FunctionRegistry.
-      val catalogFunction = externalCatalog.getFunction(currentDb, name)
+      // that the function is a permanent function (if it actually has been registered
+      // in the metastore). We need to first put the function in FunctionRegistry.
+      val catalogFunction = try {
+        externalCatalog.getFunction(currentDb, name)
+      } catch {
+        case e: AnalysisException => failFunctionLookup(name)
+        case e: NoSuchFunctionException => failFunctionLookup(name)
+      }
       assert(qualifiedName == catalogFunction.identifier.unquotedString)
       loadFunctionResources(catalogFunction.resources)
       val info = new ExpressionInfo(catalogFunction.className, qualifiedName)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
index eb4ec2b31b894..5adcc892cf682 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
@@ -26,7 +26,7 @@ object JarResource extends FunctionResourceType
 
 object FileResource extends FunctionResourceType
 
-// We do nto allow users to specify a archive because it is YARN specific.
+// We do not allow users to specify a archive because it is YARN specific.
 // When loading resources, we will throw an exception and ask users to
 // use --archive with spark submit.
 object ArchiveResource extends FunctionResourceType
@@ -45,11 +45,16 @@ object FunctionResourceType {
 
 case class FunctionResource(resourceType: FunctionResourceType, uri: String)
 
+/**
+ * A simple trait representing a class that can be used to load resources used by
+ * a function. Because only a SQLContext can load resources, we create this trait
+ * to avoid of explicitly passing SQLContext around.
+ */
 trait FunctionResourceLoader {
   def loadResource(resource: FunctionResource): Unit
 }
 
-class DummyFunctionResourceLoader extends FunctionResourceLoader {
+object DummyFunctionResourceLoader extends FunctionResourceLoader {
   override def loadResource(resource: FunctionResource): Unit = {
     throw new UnsupportedOperationException
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index cf57bc27f1a21..2675be533e389 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -146,7 +146,7 @@ abstract class ExternalCatalog {
 
   def createFunction(db: String, funcDefinition: CatalogFunction): Unit
 
-  // TODO: dropFunction should also take a FunctionIdentifier.
+  // TODO: dropFunction should take a FunctionIdentifier.
   def dropFunction(db: String, funcName: String): Unit
 
   def renameFunction(db: String, oldName: String, newName: String): Unit
@@ -175,6 +175,7 @@ abstract class ExternalCatalog {
  * @param className fully qualified class name, e.g. "org.apache.spark.util.MyFunc"
  * @param resources resource types and Uris used by the function
  */
+// TODO: Use FunctionResource instead of (String, String) as the element type of resources.
 case class CatalogFunction(
     identifier: FunctionIdentifier,
     className: String,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index b811aeadf1820..8875720ed0203 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -208,6 +208,7 @@ class SQLContext private[sql](
     sparkContext.addJar(path)
   }
 
+  /** Returns a [[FunctionResourceLoader]] that can be used in SessionCatalog. */
   protected[sql] def functionResourceLoader: FunctionResourceLoader = {
     new FunctionResourceLoader {
       override def loadResource(resource: FunctionResource): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 729cf53250281..70ac5f1552d5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -314,7 +314,7 @@ class SparkSqlAstBuilder extends AstBuilder {
     CreateFunction(
       database,
       function,
-      string(ctx.className), // TODO this is not an alias.
+      string(ctx.className),
       resources,
       ctx.TEMPORARY != null)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 06722c17e2a80..c9bcd7d248f1d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -198,9 +198,10 @@ case class CreateFunction(
       if (databaseName.isDefined) {
         throw new AnalysisException(
           s"It is not allowed to provide database name when defining a temporary function. " +
-            s"However, database name ${databaseName.get} is provided..")
+            s"However, database name ${databaseName.get} is provided.")
       }
-      // We can overwrite the temp function definition.
+      // We first load resources and then put the builder in the function registry.
+      // Please note that it is allowed to overwrite an existing temp function.
       sqlContext.sessionState.catalog.loadFunctionResources(resources)
       val info = new ExpressionInfo(className, functionName)
       val builder =
@@ -208,6 +209,8 @@ case class CreateFunction(
       sqlContext.sessionState.catalog.createTempFunction(
         functionName, info, builder, ignoreIfExists = false)
     } else {
+      // We are creating a permanent function. First, we want to check if this function
+      // has already been created.
       // Check if the function to create is already existing. If so, throw exception.
       if (sqlContext.sessionState.catalog.functionExists(func)) {
         val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 05917ccd5bb2a..d08b86d60236f 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -366,7 +366,6 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     "alter_index",
 
     // We do not support macro for now.
-    // TODO: throw an exception if a user is trying to create a macro.
     "macro"
   )
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 952459243015f..69970bd84bff2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -207,12 +207,12 @@ private[sql] class HiveSessionCatalog(
           val functionInfo = {
             try {
               Option(HiveFunctionRegistry.getFunctionInfo(functionName)).getOrElse(
-                throw new AnalysisException(s"Undefined Hive UDF: $name"))
+                failFunctionLookup(name))
             } catch {
               // If HiveFunctionRegistry.getFunctionInfo throws an exception,
               // we are failing to load a Hive builtin function, which means that
               // the given function is not a Hive builtin function.
-              case NonFatal(e) => throw new AnalysisException(s"Undefined Hive UDF: $name")
+              case NonFatal(e) => failFunctionLookup(name)
             }
           }
           val className = functionInfo.getFunctionClass.getName
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 9c3afc226093f..88cd0db2f6e4d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -1230,7 +1230,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
     val e = intercept[AnalysisException] {
       range(1).selectExpr("not_a_udf()")
     }
-    assert(e.getMessage.contains("Undefined Hive UDF"))
+    assert(e.getMessage.contains("Undefined function"))
     assert(e.getMessage.contains("not_a_udf"))
     var success = false
     val t = new Thread("test") {
@@ -1238,7 +1238,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
         val e = intercept[AnalysisException] {
           range(1).selectExpr("not_a_udf()")
         }
-        assert(e.getMessage.contains("Undefined Hive UDF"))
+        assert(e.getMessage.contains("Undefined function"))
         assert(e.getMessage.contains("not_a_udf"))
         success = true
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 94ee7dfef0ddc..b97ffc9080797 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -111,7 +111,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       val errMsg = intercept[AnalysisException] {
         sql("SELECT key, cc FROM src LATERAL VIEW udtf_count_temp(value) dd AS cc")
       }.getMessage
-      assert(errMsg.contains("Undefined Hive UDF"))
+      assert(errMsg.contains("Undefined function"))
       assert(errMsg.contains("udtf_count_temp"))
     }
   }

From 776c09afd8f448c78b063de510b14434764d72cd Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Sun, 3 Apr 2016 14:20:45 -0700
Subject: [PATCH 28/38] style

---
 .../org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index eb0fb02954fd5..8178fb2c6a64d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CatalystConf, SimpleCatalystConf}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{NoSuchFunctionException, FunctionRegistry, SimpleFunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchFunctionException, SimpleFunctionRegistry}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}

From 18d604247c560959107193838ee425e419ba9af9 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Sun, 3 Apr 2016 16:27:25 -0700
Subject: [PATCH 29/38] Update tests.

---
 .../scala/org/apache/spark/sql/UDFSuite.scala |   3 +-
 .../apache/spark/sql/test/SQLTestUtils.scala  |  19 +++
 .../spark/sql/hive/HiveSparkSubmitSuite.scala | 109 ++++++++++++++++++
 .../sql/hive/execution/SQLQuerySuite.scala    |  13 +--
 4 files changed, 132 insertions(+), 12 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 1cfd048ec7278..ec950332c5f63 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -83,7 +83,8 @@ class UDFSuite extends QueryTest with SharedSQLContext {
     val e = intercept[AnalysisException] {
       df.selectExpr("a_function_that_does_not_exist()")
     }
-    assert(e.getMessage.contains("Function not found"))
+    assert(e.getMessage.contains("Undefined function"))
+    assert(e.getMessage.contains("a_function_that_does_not_exist"))
   }
 
   test("Simple UDF") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 80a85a6615974..accf08c9b4d8e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.test
 import java.io.File
 import java.util.UUID
 
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+
 import scala.language.implicitConversions
 import scala.util.Try
 
@@ -131,6 +133,23 @@ private[sql] trait SQLTestUtils
     try f(dir) finally Utils.deleteRecursively(dir)
   }
 
+  /**
+   * Drops functions after calling `f`. A function is represented by (functionName, isTemporary).
+   */
+  protected def withTempFunction(functions: (String, Boolean)*)(f: => Unit): Unit = {
+    try f finally {
+      // If the test failed part way, we don't want to mask the failure by failing to remove
+      // temp tables that never got created.
+      try functions.foreach { case (functionName, isTemporary) =>
+        val withTemporary = if (isTemporary) "TEMPORARY"
+        sqlContext.sql(s"DROP $withTemporary FUNCTION $functionName")
+        assert(
+          !sqlContext.sessionState.catalog.functionExists(FunctionIdentifier(functionName)),
+          s"Function $functionName should have been dropped. But, it still exists.")
+      }
+    }
+  }
+
   /**
    * Drops temporary table `tableName` after calling `f`.
    */
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 53dec6348fdac..830652d6a7b57 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -31,6 +31,8 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogFunction
 import org.apache.spark.sql.{QueryTest, Row, SQLContext}
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext}
@@ -55,6 +57,40 @@ class HiveSparkSubmitSuite
     System.setProperty("spark.testing", "true")
   }
 
+  test("permanent Hive UDF: define a UDF and use it") {
+    val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+    val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
+    val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB"))
+    val jarsString = Seq(jar1, jar2).map(j => j.toString).mkString(",")
+    val args = Seq(
+      "--class", PermanentHiveUDFTest1.getClass.getName.stripSuffix("$"),
+      "--name", "PermanentHiveUDFTest1",
+      "--master", "local-cluster[2,1,1024]",
+      "--conf", "spark.ui.enabled=false",
+      "--conf", "spark.master.rest.enabled=false",
+      "--driver-java-options", "-Dderby.system.durability=test",
+      "--jars", jarsString,
+      unusedJar.toString, "SparkSubmitClassA", "SparkSubmitClassB")
+    runSparkSubmit(args)
+  }
+
+  test("permanent Hive UDF: use a already defined permanent function") {
+    val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+    val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
+    val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB"))
+    val jarsString = Seq(jar1, jar2).map(j => j.toString).mkString(",")
+    val args = Seq(
+      "--class", PermanentHiveUDFTest2.getClass.getName.stripSuffix("$"),
+      "--name", "PermanentHiveUDFTest2",
+      "--master", "local-cluster[2,1,1024]",
+      "--conf", "spark.ui.enabled=false",
+      "--conf", "spark.master.rest.enabled=false",
+      "--driver-java-options", "-Dderby.system.durability=test",
+      "--jars", jarsString,
+      unusedJar.toString, "SparkSubmitClassA", "SparkSubmitClassB")
+    runSparkSubmit(args)
+  }
+
   test("SPARK-8368: includes jars passed in through --jars") {
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
@@ -208,6 +244,79 @@ class HiveSparkSubmitSuite
   }
 }
 
+// This application is used to test defining a new Hive UDF (with an associated jar)
+// and use this UDF.
+object PermanentHiveUDFTest1 extends Logging {
+  def main(args: Array[String]) {
+    Utils.configTestLog4j("INFO")
+    val conf = new SparkConf()
+    conf.set("spark.ui.enabled", "false")
+    val sc = new SparkContext(conf)
+    val hiveContext = new TestHiveContext(sc)
+
+    // Load a Hive UDF from the jar.
+    logInfo("Registering a permanent Hive UDF provided in a jar.")
+    val jar = TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
+    hiveContext.sql(
+      s"""
+         |CREATE FUNCTION example_max
+         |AS 'org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax'
+         |USING JAR '$jar'
+      """.stripMargin)
+    val source =
+      hiveContext.createDataFrame((1 to 10).map(i => (i, s"str$i"))).toDF("key", "val")
+    source.registerTempTable("sourceTable")
+    // Actually use the loaded UDF.
+    logInfo("Using the UDF.")
+    val result = hiveContext.sql(
+      "SELECT example_max(key) as key, val FROM sourceTable GROUP BY val")
+    logInfo("Running a simple query on the table.")
+    val count = result.orderBy("key", "val").count()
+    if (count != 10) {
+      throw new Exception(s"Result table should have 10 rows instead of $count rows")
+    }
+    hiveContext.sql("DROP FUNCTION example_max")
+    logInfo("Test finishes.")
+    sc.stop()
+  }
+}
+
+// This application is used to test that a pre-defined permanent function with a jar
+// resources can be used. We need to run this test in separate JVM to make sure we
+// can load the jar defined with the function.
+object PermanentHiveUDFTest2 extends Logging {
+  def main(args: Array[String]) {
+    Utils.configTestLog4j("INFO")
+    val conf = new SparkConf()
+    conf.set("spark.ui.enabled", "false")
+    val sc = new SparkContext(conf)
+    val hiveContext = new TestHiveContext(sc)
+    // Load a Hive UDF from the jar.
+    logInfo("Write the metadata of a permanent Hive UDF into metastore.")
+    val jar = TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
+    val function = CatalogFunction(
+      FunctionIdentifier("example_max"),
+      "org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax",
+      ("JAR" -> jar) :: Nil)
+    hiveContext.sessionState.catalog.createFunction(function)
+    val source =
+      hiveContext.createDataFrame((1 to 10).map(i => (i, s"str$i"))).toDF("key", "val")
+    source.registerTempTable("sourceTable")
+    // Actually use the loaded UDF.
+    logInfo("Using the UDF.")
+    val result = hiveContext.sql(
+      "SELECT example_max(key) as key, val FROM sourceTable GROUP BY val")
+    logInfo("Running a simple query on the table.")
+    val count = result.orderBy("key", "val").count()
+    if (count != 10) {
+      throw new Exception(s"Result table should have 10 rows instead of $count rows")
+    }
+    hiveContext.sql("DROP FUNCTION example_max")
+    logInfo("Test finishes.")
+    sc.stop()
+  }
+}
+
 // This object is used for testing SPARK-8368: https://issues.apache.org/jira/browse/SPARK-8368.
 // We test if we can load user jars in both driver and executors when HiveContext is used.
 object SparkSubmitClassLoaderTest extends Logging {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index b97ffc9080797..65f55b8893f24 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -68,7 +68,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   import hiveContext.implicits._
 
   test("UDTF") {
-    try {
+    withTempFunction("udtf_count2" -> true) {
       sql(s"ADD JAR ${hiveContext.getHiveFile("TestUDTF.jar").getCanonicalPath()}")
       // The function source code can be found at:
       // https://cwiki.apache.org/confluence/display/Hive/DeveloperGuide+UDTF
@@ -85,13 +85,11 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       checkAnswer(
         sql("SELECT udtf_count2(a) FROM (SELECT 1 AS a FROM src LIMIT 3) t"),
         Row(3) :: Row(3) :: Nil)
-    } finally {
-      sql("DROP TEMPORARY FUNCTION udtf_count2")
     }
   }
 
   test("permanent UDTF") {
-    try {
+    withTempFunction("udtf_count_temp" -> false) {
       sql(
         s"""
           |CREATE FUNCTION udtf_count_temp
@@ -106,13 +104,6 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
       checkAnswer(
         sql("SELECT udtf_count_temp(a) FROM (SELECT 1 AS a FROM src LIMIT 3) t"),
         Row(3) :: Row(3) :: Nil)
-    } finally {
-      sql("DROP FUNCTION udtf_count_temp")
-      val errMsg = intercept[AnalysisException] {
-        sql("SELECT key, cc FROM src LATERAL VIEW udtf_count_temp(value) dd AS cc")
-      }.getMessage
-      assert(errMsg.contains("Undefined function"))
-      assert(errMsg.contains("udtf_count_temp"))
     }
   }
 

From f72e6a9d49960f49b96638053cf13708cb3cbc30 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Sun, 3 Apr 2016 22:20:08 -0700
Subject: [PATCH 30/38] Show functions should use SessionCatalog to get all
 functions.

---
 .../sql/execution/command/commands.scala      | 22 +++++++++----------
 .../org/apache/spark/sql/SQLQuerySuite.scala  |  8 +++++--
 .../sql/hive/execution/SQLQuerySuite.scala    |  8 +++----
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index 4eb8d7ff0d9f9..ed8a60dc4aa29 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -396,18 +396,16 @@ case class ShowFunctions(db: Option[String], pattern: Option[String]) extends Ru
     schema.toAttributes
   }
 
-  override def run(sqlContext: SQLContext): Seq[Row] = pattern match {
-    case Some(p) =>
-      try {
-        val regex = java.util.regex.Pattern.compile(p)
-        sqlContext.sessionState.functionRegistry.listFunction()
-          .filter(regex.matcher(_).matches()).map(Row(_))
-      } catch {
-        // probably will failed in the regex that user provided, then returns empty row.
-        case _: Throwable => Seq.empty[Row]
-      }
-    case None =>
-      sqlContext.sessionState.functionRegistry.listFunction().map(Row(_))
+  override def run(sqlContext: SQLContext): Seq[Row] = {
+    val dbName = db.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
+    val functionNames =
+      sqlContext.sessionState.catalog
+        .listFunctions(dbName, pattern.getOrElse(".*"))
+        .map(_.unquotedString)
+    // We use distinct at here because SessionCatalog's listFunctions will return
+    // duplicated entries for UDFs that have already been loaded into the
+    // FunctionRegistry.
+    functionNames.distinct.sorted.map(Row(_))
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index b727e88668370..5a851b47caf87 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -61,8 +61,12 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
         .filter(regex.matcher(_).matches()).map(Row(_))
     }
     checkAnswer(sql("SHOW functions"), getFunctions(".*"))
-    Seq("^c.*", ".*e$", "log.*", ".*date.*").foreach { pattern =>
-      checkAnswer(sql(s"SHOW FUNCTIONS '$pattern'"), getFunctions(pattern))
+    Seq("^c*", "*e$", "log*", "*date*").foreach { pattern =>
+      // For the pattern part, only '*' and '|' are allowed as wildcards.
+      // For '*', we need to replace it to '.*'.
+      checkAnswer(
+        sql(s"SHOW FUNCTIONS '$pattern'"),
+        getFunctions(pattern.replaceAll("\\*", ".*")))
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 65f55b8893f24..8d8d02483d59f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -68,7 +68,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   import hiveContext.implicits._
 
   test("UDTF") {
-    withTempFunction("udtf_count2" -> true) {
+    withUserDefinedFunction("udtf_count2" -> true) {
       sql(s"ADD JAR ${hiveContext.getHiveFile("TestUDTF.jar").getCanonicalPath()}")
       // The function source code can be found at:
       // https://cwiki.apache.org/confluence/display/Hive/DeveloperGuide+UDTF
@@ -89,7 +89,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
   }
 
   test("permanent UDTF") {
-    withTempFunction("udtf_count_temp" -> false) {
+    withUserDefinedFunction("udtf_count_temp" -> false) {
       sql(
         s"""
           |CREATE FUNCTION udtf_count_temp
@@ -206,9 +206,9 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     // TODO: Re-enable this test after we fix SPARK-14335.
     // checkAnswer(sql("SHOW functions `~`"), Row("~"))
     checkAnswer(sql("SHOW functions `a function doens't exist`"), Nil)
-    checkAnswer(sql("SHOW functions `weekofyea.*`"), Row("weekofyear"))
+    checkAnswer(sql("SHOW functions `weekofyea*`"), Row("weekofyear"))
     // this probably will failed if we add more function with `sha` prefixing.
-    checkAnswer(sql("SHOW functions `sha.*`"), Row("sha") :: Row("sha1") :: Row("sha2") :: Nil)
+    checkAnswer(sql("SHOW functions `sha*`"), Row("sha") :: Row("sha1") :: Row("sha2") :: Nil)
   }
 
   test("describe functions") {

From 5fcf6bc5ac5a359d2deb66b49ae8f136078e0e35 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Sun, 3 Apr 2016 22:21:28 -0700
Subject: [PATCH 31/38] Add tests.

---
 .../sql/catalyst/catalog/SessionCatalog.scala |  12 +-
 .../spark/sql/execution/command/ddl.scala     |  18 +-
 .../apache/spark/sql/test/SQLTestUtils.scala  |  15 +-
 .../spark/sql/hive/HiveExternalCatalog.scala  |   7 +-
 .../spark/sql/hive/HiveSessionCatalog.scala   |   1 +
 .../spark/sql/hive/HiveSparkSubmitSuite.scala |  66 +++++++-
 .../org/apache/spark/sql/hive/UDFSuite.scala  | 158 +++++++++++++++++-
 7 files changed, 253 insertions(+), 24 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 8178fb2c6a64d..a7d88a69e1829 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -558,8 +558,12 @@ class SessionCatalog(
         case e: AnalysisException => failFunctionLookup(name)
         case e: NoSuchFunctionException => failFunctionLookup(name)
       }
-      assert(qualifiedName == catalogFunction.identifier.unquotedString)
       loadFunctionResources(catalogFunction.resources)
+      // Please note that qualifiedName is provided by the user. However,
+      // catalogFunction.identifier.unquotedString is returned by the underlying
+      // catalog. So, it is possible that qualifiedName is not exactly the same as
+      // catalogFunction.identifier.unquotedString (difference is on case-sensitivity).
+      // At here, we preserve the input from the user.
       val info = new ExpressionInfo(catalogFunction.className, qualifiedName)
       val builder = makeFunctionBuilder(qualifiedName, catalogFunction.className)
       createTempFunction(qualifiedName, info, builder, ignoreIfExists = false)
@@ -575,9 +579,11 @@ class SessionCatalog(
     val dbFunctions =
       externalCatalog.listFunctions(db, pattern).map { f => FunctionIdentifier(f, Some(db)) }
     val regex = pattern.replaceAll("\\*", ".*").r
-    val _tempFunctions = functionRegistry.listFunction()
+    val loadedFunctions = functionRegistry.listFunction()
       .filter { f => regex.pattern.matcher(f).matches() }
       .map { f => FunctionIdentifier(f) }
-    dbFunctions ++ _tempFunctions
+    // TODO: Actually, there will be dbFunctions that have been loaded into FunctionRegistry.
+    // So, the returned list may have two entries for the same function.
+    dbFunctions ++ loadedFunctions
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index c9bcd7d248f1d..c86eb6bedf508 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -192,8 +192,6 @@ case class CreateFunction(
   extends RunnableCommand {
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
-    val func = FunctionIdentifier(functionName, databaseName)
-    val catalogFunc = CatalogFunction(func, className, resources)
     if (isTemp) {
       if (databaseName.isDefined) {
         throw new AnalysisException(
@@ -209,11 +207,13 @@ case class CreateFunction(
       sqlContext.sessionState.catalog.createTempFunction(
         functionName, info, builder, ignoreIfExists = false)
     } else {
+      val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
+      val func = FunctionIdentifier(functionName, Some(dbName))
+      val catalogFunc = CatalogFunction(func, className, resources)
       // We are creating a permanent function. First, we want to check if this function
       // has already been created.
       // Check if the function to create is already existing. If so, throw exception.
       if (sqlContext.sessionState.catalog.functionExists(func)) {
-        val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
         throw new AnalysisException(
           s"Function '$functionName' already exists in database '$dbName'.")
       }
@@ -238,14 +238,18 @@ case class DropFunction(
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     if (isTemp) {
-      require(databaseName.isEmpty,
-        "attempted to drop a temporary function while specifying a database")
+      if (databaseName.isDefined) {
+        throw new AnalysisException(
+          s"It is not allowed to provide database name when dropping a temporary function. " +
+            s"However, database name ${databaseName.get} is provided.")
+      }
       sqlContext.sessionState.catalog.dropTempFunction(functionName, ifExists)
     } else {
-      val func = FunctionIdentifier(functionName, databaseName)
+      // We are dropping a permanent.
+      val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
+      val func = FunctionIdentifier(functionName, Some(dbName))
       if (!ifExists) {
         if (!sqlContext.sessionState.catalog.functionExists(func)) {
-          val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
           throw new AnalysisException(
             s"Function '$functionName' does not exist in database '$dbName'.")
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index accf08c9b4d8e..7844d1b296597 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -20,8 +20,6 @@ package org.apache.spark.sql.test
 import java.io.File
 import java.util.UUID
 
-import org.apache.spark.sql.catalyst.FunctionIdentifier
-
 import scala.language.implicitConversions
 import scala.util.Try
 
@@ -31,6 +29,7 @@ import org.scalatest.BeforeAndAfterAll
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.Filter
@@ -136,13 +135,17 @@ private[sql] trait SQLTestUtils
   /**
    * Drops functions after calling `f`. A function is represented by (functionName, isTemporary).
    */
-  protected def withTempFunction(functions: (String, Boolean)*)(f: => Unit): Unit = {
-    try f finally {
+  protected def withUserDefinedFunction(functions: (String, Boolean)*)(f: => Unit): Unit = {
+    try {
+      f
+    } catch {
+      case cause: Throwable => throw cause
+    } finally {
       // If the test failed part way, we don't want to mask the failure by failing to remove
       // temp tables that never got created.
       try functions.foreach { case (functionName, isTemporary) =>
-        val withTemporary = if (isTemporary) "TEMPORARY"
-        sqlContext.sql(s"DROP $withTemporary FUNCTION $functionName")
+        val withTemporary = if (isTemporary) "TEMPORARY" else ""
+        sqlContext.sql(s"DROP $withTemporary FUNCTION IF EXISTS $functionName")
         assert(
           !sqlContext.sessionState.catalog.functionExists(FunctionIdentifier(functionName)),
           s"Function $functionName should have been dropped. But, it still exists.")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 11205ae67c3f7..7176a98604eef 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -272,7 +272,12 @@ private[spark] class HiveExternalCatalog(client: HiveClient) extends ExternalCat
   override def createFunction(
       db: String,
       funcDefinition: CatalogFunction): Unit = withClient {
-    client.createFunction(db, funcDefinition)
+    // Hive's metastore is case insensitive. However, Hive's createFunction does
+    // not normalize the function name (unlike the getFunction part). So,
+    // we are normalizing the function name.
+    val functionName = funcDefinition.identifier.funcName.toLowerCase
+    val functionIdentifier = funcDefinition.identifier.copy(funcName = functionName)
+    client.createFunction(db, funcDefinition.copy(identifier = functionIdentifier))
   }
 
   override def dropFunction(db: String, name: String): Unit = withClient {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index 69970bd84bff2..f36e2fe26e197 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -199,6 +199,7 @@ private[sql] class HiveSessionCatalog(
         } else {
           // This function is not in functionRegistry, let's try to load it as a Hive's
           // built-in function.
+          // Hive is case insensitive.
           val functionName = name.toLowerCase
           // TODO: This may not really work for current_user because current_user is not evaluated
           // with session info.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 830652d6a7b57..dd2129375d3de 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -31,9 +31,9 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.FunctionIdentifier
-import org.apache.spark.sql.catalyst.catalog.CatalogFunction
 import org.apache.spark.sql.{QueryTest, Row, SQLContext}
+import org.apache.spark.sql.catalyst.catalog.CatalogFunction
+import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext}
 import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer
@@ -57,6 +57,23 @@ class HiveSparkSubmitSuite
     System.setProperty("spark.testing", "true")
   }
 
+  test("temporary Hive UDF: define a UDF and use it") {
+    val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+    val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
+    val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB"))
+    val jarsString = Seq(jar1, jar2).map(j => j.toString).mkString(",")
+    val args = Seq(
+      "--class", TemporaryHiveUDFTest.getClass.getName.stripSuffix("$"),
+      "--name", "TemporaryHiveUDFTest",
+      "--master", "local-cluster[2,1,1024]",
+      "--conf", "spark.ui.enabled=false",
+      "--conf", "spark.master.rest.enabled=false",
+      "--driver-java-options", "-Dderby.system.durability=test",
+      "--jars", jarsString,
+      unusedJar.toString, "SparkSubmitClassA", "SparkSubmitClassB")
+    runSparkSubmit(args)
+  }
+
   test("permanent Hive UDF: define a UDF and use it") {
     val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
     val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA"))
@@ -245,7 +262,46 @@ class HiveSparkSubmitSuite
 }
 
 // This application is used to test defining a new Hive UDF (with an associated jar)
-// and use this UDF.
+// and use this UDF. We need to run this test in separate JVM to make sure we
+// can load the jar defined with the function.
+object TemporaryHiveUDFTest extends Logging {
+  def main(args: Array[String]) {
+    Utils.configTestLog4j("INFO")
+    val conf = new SparkConf()
+    conf.set("spark.ui.enabled", "false")
+    val sc = new SparkContext(conf)
+    val hiveContext = new TestHiveContext(sc)
+
+    // Load a Hive UDF from the jar.
+    logInfo("Registering a temporary Hive UDF provided in a jar.")
+    val jar = hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
+    hiveContext.sql(
+      s"""
+         |CREATE TEMPORARY FUNCTION example_max
+         |AS 'org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax'
+         |USING JAR '$jar'
+      """.stripMargin)
+    val source =
+      hiveContext.createDataFrame((1 to 10).map(i => (i, s"str$i"))).toDF("key", "val")
+    source.registerTempTable("sourceTable")
+    // Actually use the loaded UDF.
+    logInfo("Using the UDF.")
+    val result = hiveContext.sql(
+      "SELECT example_max(key) as key, val FROM sourceTable GROUP BY val")
+    logInfo("Running a simple query on the table.")
+    val count = result.orderBy("key", "val").count()
+    if (count != 10) {
+      throw new Exception(s"Result table should have 10 rows instead of $count rows")
+    }
+    hiveContext.sql("DROP temporary FUNCTION example_max")
+    logInfo("Test finishes.")
+    sc.stop()
+  }
+}
+
+// This application is used to test defining a new Hive UDF (with an associated jar)
+// and use this UDF. We need to run this test in separate JVM to make sure we
+// can load the jar defined with the function.
 object PermanentHiveUDFTest1 extends Logging {
   def main(args: Array[String]) {
     Utils.configTestLog4j("INFO")
@@ -256,7 +312,7 @@ object PermanentHiveUDFTest1 extends Logging {
 
     // Load a Hive UDF from the jar.
     logInfo("Registering a permanent Hive UDF provided in a jar.")
-    val jar = TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
+    val jar = hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
     hiveContext.sql(
       s"""
          |CREATE FUNCTION example_max
@@ -293,7 +349,7 @@ object PermanentHiveUDFTest2 extends Logging {
     val hiveContext = new TestHiveContext(sc)
     // Load a Hive UDF from the jar.
     logInfo("Write the metadata of a permanent Hive UDF into metastore.")
-    val jar = TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
+    val jar = hiveContext.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath
     val function = CatalogFunction(
       FunctionIdentifier("example_max"),
       "org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
index 3ab4576811194..44cd2b4934606 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
@@ -17,12 +17,48 @@
 
 package org.apache.spark.sql.hive
 
-import org.apache.spark.sql.QueryTest
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.test.SQLTestUtils
 
 case class FunctionResult(f1: String, f2: String)
 
-class UDFSuite extends QueryTest with TestHiveSingleton {
+/**
+ * A test suite for UDF related functionalities. Because Hive metastore is
+ * case insensitive, database names and function names have both upper case
+ * letters and lower case letters.
+ */
+class UDFSuite
+  extends QueryTest
+  with SQLTestUtils
+  with TestHiveSingleton
+  with BeforeAndAfterEach {
+
+  import hiveContext.implicits._
+
+  private[this] val functionName = "myUpper"
+  private[this] val functionClass =
+    classOf[org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper].getCanonicalName
+
+  private var testDF: DataFrame = null
+  private[this] val testTableName = "testDF_UDFSuite"
+  private var expectedDF: DataFrame = null
+
+  override def beforeAll(): Unit = {
+    sql("USE default")
+
+    testDF = (1 to 10).map(i => s"sTr$i").toDF("value")
+    testDF.registerTempTable(testTableName)
+    expectedDF = (1 to 10).map(i => s"STR$i").toDF("value")
+    super.beforeAll()
+  }
+
+  override def afterEach(): Unit = {
+    sql("USE default")
+    super.afterEach()
+  }
 
   test("UDF case insensitive") {
     hiveContext.udf.register("random0", () => { Math.random() })
@@ -32,4 +68,122 @@ class UDFSuite extends QueryTest with TestHiveSingleton {
     assert(hiveContext.sql("SELECT RANDOm1() FROM src LIMIT 1").head().getDouble(0) >= 0.0)
     assert(hiveContext.sql("SELECT strlenscala('test', 1) FROM src LIMIT 1").head().getInt(0) === 5)
   }
+
+  test("temporary function: create and drop") {
+    withUserDefinedFunction(functionName -> true) {
+      intercept[AnalysisException] {
+        sql(s"CREATE TEMPORARY FUNCTION default.$functionName AS '$functionClass'")
+      }
+      sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$functionClass'")
+      checkAnswer(
+        sql(s"SELECT myupper(value) from $testTableName"),
+        expectedDF
+      )
+      intercept[AnalysisException] {
+        sql(s"DROP TEMPORARY FUNCTION default.$functionName")
+      }
+    }
+  }
+
+  test("permanent function: create and drop without specifying db name") {
+    withUserDefinedFunction(functionName -> false) {
+      sql(s"CREATE FUNCTION $functionName AS '$functionClass'")
+      checkAnswer(
+        sql("SHOW functions like '.*upper'"),
+        Row("default.myupper")
+      )
+      checkAnswer(
+        sql(s"SELECT myupper(value) from $testTableName"),
+        expectedDF
+      )
+      assert(sql("SHOW functions").collect().map(_.getString(0)).contains("default.myupper"))
+    }
+  }
+
+  test("permanent function: create and drop with a db name") {
+    // For this block, drop function command uses functionName as the function name.
+    withUserDefinedFunction(functionName.toUpperCase -> false) {
+      sql(s"CREATE FUNCTION default.$functionName AS '$functionClass'")
+      // TODO: Re-enable it after can distinguish qualified and unqualified function name
+      // in SessionCatalog.lookupFunction.
+      // checkAnswer(
+      //  sql(s"SELECT default.myuPPer(value) from $testTableName"),
+      //  expectedDF
+      // )
+      checkAnswer(
+        sql(s"SELECT myuppER(value) from $testTableName"),
+        expectedDF
+      )
+      checkAnswer(
+        sql(s"SELECT default.MYupper(value) from $testTableName"),
+        expectedDF
+      )
+    }
+
+    // For this block, drop function command uses default.functionName as the function name.
+    withUserDefinedFunction(s"DEfault.$functionName" -> false) {
+      sql(s"CREATE FUNCTION dEFault.$functionName AS '$functionClass'")
+      checkAnswer(
+        sql(s"SELECT myUpper(value) from $testTableName"),
+        expectedDF
+      )
+    }
+  }
+
+  test("permanent function: create and drop a function in another db") {
+    // For this block, drop function command uses functionName as the function name.
+    withTempDatabase { dbName =>
+      withUserDefinedFunction(functionName -> false) {
+        sql(s"CREATE FUNCTION $dbName.$functionName AS '$functionClass'")
+        // TODO: Re-enable it after can distinguish qualified and unqualified function name
+        // checkAnswer(
+        //  sql(s"SELECT $dbName.myuPPer(value) from $testTableName"),
+        //  expectedDF
+        // )
+
+        checkAnswer(
+          sql(s"SHOW FUNCTIONS like $dbName.myupper"),
+          Row(s"$dbName.myupper")
+        )
+
+        sql(s"USE $dbName")
+
+        checkAnswer(
+          sql(s"SELECT myuppER(value) from $testTableName"),
+          expectedDF
+        )
+
+        sql(s"USE default")
+
+        checkAnswer(
+          sql(s"SELECT $dbName.MYupper(value) from $testTableName"),
+          expectedDF
+        )
+
+        sql(s"USE $dbName")
+      }
+
+      sql(s"USE default")
+
+      // For this block, drop function command uses default.functionName as the function name.
+      withUserDefinedFunction(s"$dbName.$functionName" -> false) {
+        sql(s"CREATE FUNCTION $dbName.$functionName AS '$functionClass'")
+        // TODO: Re-enable it after can distinguish qualified and unqualified function name
+        // checkAnswer(
+        //  sql(s"SELECT $dbName.myupper(value) from $testTableName"),
+        //  expectedDF
+        // )
+
+        sql(s"USE $dbName")
+
+        assert(sql("SHOW functions").collect().map(_.getString(0)).contains(s"$dbName.myupper"))
+        checkAnswer(
+          sql(s"SELECT myupper(value) from $testTableName"),
+          expectedDF
+         )
+
+        sql(s"USE default")
+      }
+    }
+  }
 }

From 0572acce35f93961ac5f56bd215c736da00d4aea Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Sun, 3 Apr 2016 23:13:26 -0700
Subject: [PATCH 32/38] Move CreateFunction and DropFunction to functions.scala

---
 .../spark/sql/execution/command/ddl.scala     |  93 +--------------
 .../sql/execution/command/functions.scala     | 109 ++++++++++++++++++
 2 files changed, 113 insertions(+), 89 deletions(-)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index c86eb6bedf508..68968819104e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -18,11 +18,11 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, Row, SQLContext}
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogFunction}
+import org.apache.spark.sql.{Row, SQLContext}
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogDatabase
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalog.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, ExpressionInfo}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.execution.datasources.BucketSpec
 import org.apache.spark.sql.types._
 
@@ -175,91 +175,6 @@ case class DescribeDatabase(
   }
 }
 
-/**
- * The DDL command that creates a function.
- * alias: the class name that implements the created function.
- * resources: Jars, files, or archives which need to be added to the environment when the function
- *            is referenced for the first time by a session.
- * isTemp: indicates if it is a temporary function.
- */
-// TODO: Use Seq[FunctionResource] instead of Seq[(String, String)] for resources.
-case class CreateFunction(
-    databaseName: Option[String],
-    functionName: String,
-    className: String,
-    resources: Seq[(String, String)],
-    isTemp: Boolean)
-  extends RunnableCommand {
-
-  override def run(sqlContext: SQLContext): Seq[Row] = {
-    if (isTemp) {
-      if (databaseName.isDefined) {
-        throw new AnalysisException(
-          s"It is not allowed to provide database name when defining a temporary function. " +
-            s"However, database name ${databaseName.get} is provided.")
-      }
-      // We first load resources and then put the builder in the function registry.
-      // Please note that it is allowed to overwrite an existing temp function.
-      sqlContext.sessionState.catalog.loadFunctionResources(resources)
-      val info = new ExpressionInfo(className, functionName)
-      val builder =
-        sqlContext.sessionState.catalog.makeFunctionBuilder(functionName, className)
-      sqlContext.sessionState.catalog.createTempFunction(
-        functionName, info, builder, ignoreIfExists = false)
-    } else {
-      val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
-      val func = FunctionIdentifier(functionName, Some(dbName))
-      val catalogFunc = CatalogFunction(func, className, resources)
-      // We are creating a permanent function. First, we want to check if this function
-      // has already been created.
-      // Check if the function to create is already existing. If so, throw exception.
-      if (sqlContext.sessionState.catalog.functionExists(func)) {
-        throw new AnalysisException(
-          s"Function '$functionName' already exists in database '$dbName'.")
-      }
-      // This function will be loaded into the FunctionRegistry when a query uses it.
-      sqlContext.sessionState.catalog.createFunction(catalogFunc)
-    }
-    Seq.empty[Row]
-  }
-}
-
-/**
- * The DDL command that drops a function.
- * ifExists: returns an error if the function doesn't exist, unless this is true.
- * isTemp: indicates if it is a temporary function.
- */
-case class DropFunction(
-    databaseName: Option[String],
-    functionName: String,
-    ifExists: Boolean,
-    isTemp: Boolean)
-  extends RunnableCommand {
-
-  override def run(sqlContext: SQLContext): Seq[Row] = {
-    if (isTemp) {
-      if (databaseName.isDefined) {
-        throw new AnalysisException(
-          s"It is not allowed to provide database name when dropping a temporary function. " +
-            s"However, database name ${databaseName.get} is provided.")
-      }
-      sqlContext.sessionState.catalog.dropTempFunction(functionName, ifExists)
-    } else {
-      // We are dropping a permanent.
-      val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
-      val func = FunctionIdentifier(functionName, Some(dbName))
-      if (!ifExists) {
-        if (!sqlContext.sessionState.catalog.functionExists(func)) {
-          throw new AnalysisException(
-            s"Function '$functionName' does not exist in database '$dbName'.")
-        }
-      }
-      sqlContext.sessionState.catalog.dropFunction(func)
-    }
-    Seq.empty[Row]
-  }
-}
-
 /** Rename in ALTER TABLE/VIEW: change the name of a table/view to a different name. */
 case class AlterTableRename(
     oldName: TableIdentifier,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
new file mode 100644
index 0000000000000..efdc7498231f8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{AnalysisException, Row, SQLContext}
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogFunction
+import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
+
+
+/**
+ * The DDL command that creates a function.
+ * alias: the class name that implements the created function.
+ * resources: Jars, files, or archives which need to be added to the environment when the function
+ *            is referenced for the first time by a session.
+ * isTemp: indicates if it is a temporary function.
+ */
+// TODO: Use Seq[FunctionResource] instead of Seq[(String, String)] for resources.
+case class CreateFunction(
+    databaseName: Option[String],
+    functionName: String,
+    className: String,
+    resources: Seq[(String, String)],
+    isTemp: Boolean)
+  extends RunnableCommand {
+
+  override def run(sqlContext: SQLContext): Seq[Row] = {
+    if (isTemp) {
+      if (databaseName.isDefined) {
+        throw new AnalysisException(
+          s"It is not allowed to provide database name when defining a temporary function. " +
+            s"However, database name ${databaseName.get} is provided.")
+      }
+      // We first load resources and then put the builder in the function registry.
+      // Please note that it is allowed to overwrite an existing temp function.
+      sqlContext.sessionState.catalog.loadFunctionResources(resources)
+      val info = new ExpressionInfo(className, functionName)
+      val builder =
+        sqlContext.sessionState.catalog.makeFunctionBuilder(functionName, className)
+      sqlContext.sessionState.catalog.createTempFunction(
+        functionName, info, builder, ignoreIfExists = false)
+    } else {
+      val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
+      val func = FunctionIdentifier(functionName, Some(dbName))
+      val catalogFunc = CatalogFunction(func, className, resources)
+      // We are creating a permanent function. First, we want to check if this function
+      // has already been created.
+      // Check if the function to create is already existing. If so, throw exception.
+      if (sqlContext.sessionState.catalog.functionExists(func)) {
+        throw new AnalysisException(
+          s"Function '$functionName' already exists in database '$dbName'.")
+      }
+      // This function will be loaded into the FunctionRegistry when a query uses it.
+      sqlContext.sessionState.catalog.createFunction(catalogFunc)
+    }
+    Seq.empty[Row]
+  }
+}
+
+/**
+ * The DDL command that drops a function.
+ * ifExists: returns an error if the function doesn't exist, unless this is true.
+ * isTemp: indicates if it is a temporary function.
+ */
+case class DropFunction(
+    databaseName: Option[String],
+    functionName: String,
+    ifExists: Boolean,
+    isTemp: Boolean)
+  extends RunnableCommand {
+
+  override def run(sqlContext: SQLContext): Seq[Row] = {
+    if (isTemp) {
+      if (databaseName.isDefined) {
+        throw new AnalysisException(
+          s"It is not allowed to provide database name when dropping a temporary function. " +
+            s"However, database name ${databaseName.get} is provided.")
+      }
+      sqlContext.sessionState.catalog.dropTempFunction(functionName, ifExists)
+    } else {
+      // We are dropping a permanent.
+      val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
+      val func = FunctionIdentifier(functionName, Some(dbName))
+      if (!ifExists) {
+        if (!sqlContext.sessionState.catalog.functionExists(func)) {
+          throw new AnalysisException(
+            s"Function '$functionName' does not exist in database '$dbName'.")
+        }
+      }
+      sqlContext.sessionState.catalog.dropFunction(func)
+    }
+    Seq.empty[Row]
+  }
+}

From a16395a343e024cee8c641ee1ccd23161be1d447 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Mon, 4 Apr 2016 08:19:37 -0700
Subject: [PATCH 33/38] Fix test

---
 .../sql/catalyst/parser/PlanParserSuite.scala     | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 23f05ce84667c..0786b36a33449 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.sql.catalyst.parser
 
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.analysis.UnresolvedGenerator
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -306,10 +307,18 @@ class PlanParserSuite extends PlanTest {
         .insertInto("t2"),
         from.where('s < 10).select(star()).insertInto("t3")))
 
-    // Unsupported generator.
-    intercept(
+    // Unresolved generator.
+    val expected = table("t")
+      .generate(
+        UnresolvedGenerator("posexplode", Seq('x)),
+        join = true,
+        outer = false,
+        Some("posexpl"),
+        Seq("x", "y"))
+      .select(star())
+    assertEqual(
       "select * from t lateral view posexplode(x) posexpl as x, y",
-      "Generator function 'posexplode' is not supported")
+      expected)
   }
 
   test("joins") {

From 1f779736c3c7f0b778fab4739fb7a1b4fd909d6a Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Mon, 4 Apr 2016 11:57:46 -0700
Subject: [PATCH 34/38] Update test and comments

---
 .../org/apache/spark/sql/execution/command/commands.scala  | 6 +++++-
 .../org/apache/spark/sql/hive/HiveSessionCatalog.scala     | 7 +++++++
 .../apache/spark/sql/hive/execution/SQLQuerySuite.scala    | 4 ++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index ed8a60dc4aa29..965e6ba3a131e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -384,8 +384,12 @@ case class ShowDatabasesCommand(databasePattern: Option[String]) extends Runnabl
  * A command for users to list all of the registered functions.
  * The syntax of using this command in SQL is:
  * {{{
- *    SHOW FUNCTIONS
+ *    SHOW FUNCTIONS [LIKE pattern]
  * }}}
+ * For the pattern, '*' matches any sequence of characters (including no characters) and
+ * '|' is for alternation.
+ * For example, "show functions like 'yea*|windo*'" will return "window" and "year".
+ *
  * TODO currently we are simply ignore the db
  */
 case class ShowFunctions(db: Option[String], pattern: Option[String]) extends RunnableCommand {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index f36e2fe26e197..a76e651ffaf5a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -188,6 +188,13 @@ private[sql] class HiveSessionCatalog(
   // xpath_double, xpath_float, xpath_int, xpath_long, xpath_number,
   // xpath_short, and xpath_string.
   override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
+    // TODO: Once lookupFunction accepts a TableIdentifier, we should refactor this method to
+    // if (super.functionExists(name)) {
+    //   super.lookupFunction(name, children)
+    // } else {
+    //   // This function is a Hive builtin function.
+    //   ...
+    // }
     Try(super.lookupFunction(name, children)) match {
       case Success(expr) => expr
       case Failure(error) =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 8d8d02483d59f..cb1a3c02813c9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -209,6 +209,10 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
     checkAnswer(sql("SHOW functions `weekofyea*`"), Row("weekofyear"))
     // this probably will failed if we add more function with `sha` prefixing.
     checkAnswer(sql("SHOW functions `sha*`"), Row("sha") :: Row("sha1") :: Row("sha2") :: Nil)
+    // Test '|' for alternation.
+    checkAnswer(
+      sql("SHOW functions 'sha*|weekofyea*'"),
+      Row("sha") :: Row("sha1") :: Row("sha2") :: Row("weekofyear") :: Nil)
   }
 
   test("describe functions") {

From 88fd93c54cf8b6e70edab25526be93cd91e2563a Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Mon, 4 Apr 2016 19:10:59 -0700
Subject: [PATCH 35/38] Address comments.

---
 .../sql/catalyst/analysis/Analyzer.scala      |  4 +-
 .../catalyst/analysis/FunctionRegistry.scala  | 12 ++----
 .../catalyst/catalog/InMemoryCatalog.scala    |  5 ---
 .../sql/catalyst/catalog/SessionCatalog.scala | 36 +++++++++++------
 .../sql/catalyst/catalog/interface.scala      | 11 ------
 .../catalyst/catalog/CatalogTestCases.scala   | 15 -------
 .../org/apache/spark/sql/SQLContext.scala     |  4 +-
 .../sql/execution/command/commands.scala      |  9 +++--
 .../sql/execution/command/functions.scala     | 39 +++++++++++--------
 .../spark/sql/hive/HiveExternalCatalog.scala  |  4 --
 .../spark/sql/hive/HiveSessionCatalog.scala   |  2 +-
 .../sql/hive/execution/HiveSqlParser.scala    |  2 +-
 12 files changed, 60 insertions(+), 83 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index be2f780bc117a..dad78fcfcac0a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -47,9 +47,7 @@ object SimpleAnalyzer
     new SimpleCatalystConf(caseSensitiveAnalysis = true))
 
 class SimpleAnalyzer(functionRegistry: FunctionRegistry, conf: CatalystConf)
-  extends Analyzer(
-    new SessionCatalog(new InMemoryCatalog, functionRegistry, conf),
-    conf)
+  extends Analyzer(new SessionCatalog(new InMemoryCatalog, functionRegistry, conf), conf)
 
 /**
  * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 28f50dae36844..7af5ffbe4740e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -52,7 +52,7 @@ trait FunctionRegistry {
   /** Drop a function and return whether the function existed. */
   def dropFunction(name: String): Boolean
 
-  /** Checks if a function with a given name exits. */
+  /** Checks if a function with a given name exists. */
   def functionExists(name: String): Boolean = lookupFunction(name).isDefined
 }
 
@@ -343,14 +343,9 @@ object FunctionRegistry {
   /** See usage above. */
   def expression[T <: Expression](name: String)
       (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
-    expression(name, tag.runtimeClass.asInstanceOf[Class[T]])
-  }
 
-  def expression[T <: Expression](
-      name: String,
-      clazz: Class[T]): (String, (ExpressionInfo, FunctionBuilder)) = {
     // See if we can find a constructor that accepts Seq[Expression]
-    val varargCtor = Try(clazz.getDeclaredConstructor(classOf[Seq[_]])).toOption
+    val varargCtor = Try(tag.runtimeClass.getDeclaredConstructor(classOf[Seq[_]])).toOption
     val builder = (expressions: Seq[Expression]) => {
       if (varargCtor.isDefined) {
         // If there is an apply method that accepts Seq[Expression], use that one.
@@ -361,7 +356,7 @@ object FunctionRegistry {
       } else {
         // Otherwise, find an ctor method that matches the number of arguments, and use that.
         val params = Seq.fill(expressions.size)(classOf[Expression])
-        val f = Try(clazz.getDeclaredConstructor(params : _*)) match {
+        val f = Try(tag.runtimeClass.getDeclaredConstructor(params : _*)) match {
           case Success(e) =>
             e
           case Failure(e) =>
@@ -374,6 +369,7 @@ object FunctionRegistry {
       }
     }
 
+    val clazz = tag.runtimeClass
     val df = clazz.getAnnotation(classOf[ExpressionDescription])
     if (df != null) {
       (name,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 2bbb970ec92e6..2af0107fa37a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -315,11 +315,6 @@ class InMemoryCatalog extends ExternalCatalog {
     catalog(db).functions.put(newName, newFunc)
   }
 
-  override def alterFunction(db: String, funcDefinition: CatalogFunction): Unit = synchronized {
-    requireFunctionExists(db, funcDefinition.identifier.funcName)
-    catalog(db).functions.put(funcDefinition.identifier.funcName, funcDefinition)
-  }
-
   override def getFunction(db: String, funcName: String): CatalogFunction = synchronized {
     requireFunctionExists(db, funcName)
     catalog(db).functions(funcName)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index a7d88a69e1829..d7eaa11812e0e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -434,8 +434,10 @@ class SessionCatalog(
     val db = name.database.getOrElse(currentDb)
     val qualified = name.copy(database = Some(db)).unquotedString
     if (functionRegistry.functionExists(qualified)) {
-      // If we have loaded this function into FunctionRegistry,
+      // If we have loaded this function into the FunctionRegistry,
       // also drop it from there.
+      // For a permanent function, because we loaded it to the FunctionRegistry
+      // when it's first used, we also need to drop it from the FunctionRegistry.
       functionRegistry.dropFunction(qualified)
     }
     externalCatalog.dropFunction(db, name.funcName)
@@ -454,8 +456,7 @@ class SessionCatalog(
   }
 
   /**
-   * Check if a function is already existing.
-   *
+   * Check if the specified function exists.
    */
   def functionExists(name: FunctionIdentifier): Boolean = {
     if (functionRegistry.functionExists(name.unquotedString)) {
@@ -464,6 +465,8 @@ class SessionCatalog(
     } else {
       // Need to check if this function exists in the metastore.
       try {
+        // TODO: It's better to ask external catalog if this function exists.
+        // So, we can avoid of having this hacky try/catch block.
         getFunction(name) != null
       } catch {
         case _: NoSuchFunctionException => false
@@ -487,14 +490,14 @@ class SessionCatalog(
   }
 
   /**
-   * Loads resources such as JARs and Files for a function.
+   * Loads resources such as JARs and Files for a function. Every resource is represented
+   * by a tuple (resource type, resource uri).
    */
   def loadFunctionResources(resources: Seq[(String, String)]): Unit = {
-    resources.foreach {
-      case (resourceType, uri) =>
-        val functionResource =
-          FunctionResource(FunctionResourceType.fromString(resourceType.toLowerCase), uri)
-        functionResourceLoader.loadResource(functionResource)
+    resources.foreach { case (resourceType, uri) =>
+      val functionResource =
+        FunctionResource(FunctionResourceType.fromString(resourceType.toLowerCase), uri)
+      functionResourceLoader.loadResource(functionResource)
     }
   }
 
@@ -534,7 +537,16 @@ class SessionCatalog(
 
   /**
    * Return an [[Expression]] that represents the specified function, assuming it exists.
-   * Note: This is currently only used for temporary functions.
+   *
+   * For a temporary function or a permanent function that has been loaded,
+   * this method will simply lookup the function through the
+   * FunctionRegistry and create an expression based on the builder.
+   *
+   * For a permanent function that has not been loaded, we will first fetch its metadata
+   * from the underlying external catalog. Then, we will load all resources associated
+   * with this function (i.e. jars and files). Finally, we create a function builder
+   * based on the function class and put the builder into the FunctionRegistry.
+   * The name of this function in the FunctionRegistry will be `databaseName.functionName`.
    */
   def lookupFunction(name: String, children: Seq[Expression]): Expression = {
     // TODO: Right now, the name can be qualified or not qualified.
@@ -551,7 +563,7 @@ class SessionCatalog(
     } else {
       // The function has not been loaded to the function registry, which means
       // that the function is a permanent function (if it actually has been registered
-      // in the metastore). We need to first put the function in FunctionRegistry.
+      // in the metastore). We need to first put the function in the FunctionRegistry.
       val catalogFunction = try {
         externalCatalog.getFunction(currentDb, name)
       } catch {
@@ -582,7 +594,7 @@ class SessionCatalog(
     val loadedFunctions = functionRegistry.listFunction()
       .filter { f => regex.pattern.matcher(f).matches() }
       .map { f => FunctionIdentifier(f) }
-    // TODO: Actually, there will be dbFunctions that have been loaded into FunctionRegistry.
+    // TODO: Actually, there will be dbFunctions that have been loaded into the FunctionRegistry.
     // So, the returned list may have two entries for the same function.
     dbFunctions ++ loadedFunctions
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 2675be533e389..97b9946140c5a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -146,21 +146,10 @@ abstract class ExternalCatalog {
 
   def createFunction(db: String, funcDefinition: CatalogFunction): Unit
 
-  // TODO: dropFunction should take a FunctionIdentifier.
   def dropFunction(db: String, funcName: String): Unit
 
   def renameFunction(db: String, oldName: String, newName: String): Unit
 
-  /**
-   * Alter a function whose name that matches the one specified in `funcDefinition`,
-   * assuming the function exists.
-   *
-   * Note: If the underlying implementation does not support altering a certain field,
-   * this becomes a no-op.
-   */
-  def alterFunction(db: String, funcDefinition: CatalogFunction): Unit
-
-  // TODO: getFunction should take a FunctionIdentifier.
   def getFunction(db: String, funcName: String): CatalogFunction
 
   def listFunctions(db: String, pattern: String): Seq[String]
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
index 9d922f5d3bd33..fbcac09ce223f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/CatalogTestCases.scala
@@ -465,21 +465,6 @@ abstract class CatalogTestCases extends SparkFunSuite with BeforeAndAfterEach {
     }
   }
 
-  test("alter function") {
-    val catalog = newBasicCatalog()
-    assert(catalog.getFunction("db2", "func1").className == funcClass)
-    catalog.alterFunction("db2", newFunc("func1").copy(className = "muhaha"))
-    assert(catalog.getFunction("db2", "func1").className == "muhaha")
-    intercept[AnalysisException] { catalog.alterFunction("db2", newFunc("funcky")) }
-  }
-
-  test("alter function when database does not exist") {
-    val catalog = newBasicCatalog()
-    intercept[AnalysisException] {
-      catalog.alterFunction("does_not_exist", newFunc())
-    }
-  }
-
   test("list functions") {
     val catalog = newBasicCatalog()
     catalog.createFunction("db2", newFunc("func2"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 0d63f435f6ec1..d336fde4f4a96 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -208,8 +208,8 @@ class SQLContext private[sql](
     sparkContext.addJar(path)
   }
 
-  /** Returns a [[FunctionResourceLoader]] that can be used in SessionCatalog. */
-  protected[sql] def functionResourceLoader: FunctionResourceLoader = {
+  /** A [[FunctionResourceLoader]] that can be used in SessionCatalog. */
+  protected[sql] lazy val functionResourceLoader: FunctionResourceLoader = {
     new FunctionResourceLoader {
       override def loadResource(resource: FunctionResource): Unit = {
         resource.resourceType match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index 965e6ba3a131e..c18c003ff6911 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -402,13 +402,14 @@ case class ShowFunctions(db: Option[String], pattern: Option[String]) extends Ru
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
     val dbName = db.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
+    // If pattern is not specified, we use '*', which is used to
+    // match any sequence of characters (including no characters).
     val functionNames =
       sqlContext.sessionState.catalog
-        .listFunctions(dbName, pattern.getOrElse(".*"))
+        .listFunctions(dbName, pattern.getOrElse("*"))
         .map(_.unquotedString)
-    // We use distinct at here because SessionCatalog's listFunctions will return
-    // duplicated entries for UDFs that have already been loaded into the
-    // FunctionRegistry.
+    // The session catalog caches some persistent functions in the FunctionRegistry
+    // so there can be duplicates.
     functionNames.distinct.sorted.map(Row(_))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index efdc7498231f8..66d17e322ed6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -25,10 +25,17 @@ import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
 
 /**
  * The DDL command that creates a function.
- * alias: the class name that implements the created function.
- * resources: Jars, files, or archives which need to be added to the environment when the function
- *            is referenced for the first time by a session.
- * isTemp: indicates if it is a temporary function.
+ * To create a temporary function, the syntax of using this command in SQL is:
+ * {{{
+ *    CREATE TEMPORARY FUNCTION functionName
+ *    AS className [USING JAR\FILE 'uri' [, JAR|FILE 'uri']]
+ * }}}
+ *
+ * To create a permanent function, the syntax in SQL is:
+ * {{{
+ *    CREATE FUNCTION [databaseName.]functionName
+ *    AS className [USING JAR\FILE 'uri' [, JAR|FILE 'uri']]
+ * }}}
  */
 // TODO: Use Seq[FunctionResource] instead of Seq[(String, String)] for resources.
 case class CreateFunction(
@@ -55,17 +62,16 @@ case class CreateFunction(
       sqlContext.sessionState.catalog.createTempFunction(
         functionName, info, builder, ignoreIfExists = false)
     } else {
+      // For a permanent, we will store the metadata into underlying external catalog.
+      // This function will be loaded into the FunctionRegistry when a query uses it.
+      // We do not load it into FunctionRegistry right now.
       val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
       val func = FunctionIdentifier(functionName, Some(dbName))
       val catalogFunc = CatalogFunction(func, className, resources)
-      // We are creating a permanent function. First, we want to check if this function
-      // has already been created.
-      // Check if the function to create is already existing. If so, throw exception.
       if (sqlContext.sessionState.catalog.functionExists(func)) {
         throw new AnalysisException(
           s"Function '$functionName' already exists in database '$dbName'.")
       }
-      // This function will be loaded into the FunctionRegistry when a query uses it.
       sqlContext.sessionState.catalog.createFunction(catalogFunc)
     }
     Seq.empty[Row]
@@ -85,24 +91,23 @@ case class DropFunction(
   extends RunnableCommand {
 
   override def run(sqlContext: SQLContext): Seq[Row] = {
+    val catalog = sqlContext.sessionState.catalog
     if (isTemp) {
       if (databaseName.isDefined) {
         throw new AnalysisException(
           s"It is not allowed to provide database name when dropping a temporary function. " +
             s"However, database name ${databaseName.get} is provided.")
       }
-      sqlContext.sessionState.catalog.dropTempFunction(functionName, ifExists)
+      catalog.dropTempFunction(functionName, ifExists)
     } else {
-      // We are dropping a permanent.
-      val dbName = databaseName.getOrElse(sqlContext.sessionState.catalog.getCurrentDatabase)
+      // We are dropping a permanent function.
+      val dbName = databaseName.getOrElse(catalog.getCurrentDatabase)
       val func = FunctionIdentifier(functionName, Some(dbName))
-      if (!ifExists) {
-        if (!sqlContext.sessionState.catalog.functionExists(func)) {
-          throw new AnalysisException(
-            s"Function '$functionName' does not exist in database '$dbName'.")
-        }
+      if (!ifExists && !catalog.functionExists(func)) {
+        throw new AnalysisException(
+          s"Function '$functionName' does not exist in database '$dbName'.")
       }
-      sqlContext.sessionState.catalog.dropFunction(func)
+      catalog.dropFunction(func)
     }
     Seq.empty[Row]
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 7176a98604eef..98a5998d03dd1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -288,10 +288,6 @@ private[spark] class HiveExternalCatalog(client: HiveClient) extends ExternalCat
     client.renameFunction(db, oldName, newName)
   }
 
-  override def alterFunction(db: String, funcDefinition: CatalogFunction): Unit = withClient {
-    client.alterFunction(db, funcDefinition)
-  }
-
   override def getFunction(db: String, funcName: String): CatalogFunction = withClient {
     client.getFunction(db, funcName)
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
index a76e651ffaf5a..d315f39a91e23 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionCatalog.scala
@@ -188,7 +188,7 @@ private[sql] class HiveSessionCatalog(
   // xpath_double, xpath_float, xpath_int, xpath_long, xpath_number,
   // xpath_short, and xpath_string.
   override def lookupFunction(name: String, children: Seq[Expression]): Expression = {
-    // TODO: Once lookupFunction accepts a TableIdentifier, we should refactor this method to
+    // TODO: Once lookupFunction accepts a FunctionIdentifier, we should refactor this method to
     // if (super.functionExists(name)) {
     //   super.lookupFunction(name, children)
     // } else {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
index 8d90e235a5134..75b73766201ef 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveSqlParser.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.parser._
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkSqlAstBuilder
-import org.apache.spark.sql.hive.{CreateTableAsSelect => CTAS, CreateViewAsSelect => CreateView, HiveSessionState}
+import org.apache.spark.sql.hive.{CreateTableAsSelect => CTAS, CreateViewAsSelect => CreateView}
 import org.apache.spark.sql.hive.{HiveGenericUDTF, HiveSerDe}
 import org.apache.spark.sql.hive.HiveShim.HiveFunctionWrapper
 

From 8a41f6df56cede48107e81f50df2111ddaf5e18d Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Mon, 4 Apr 2016 20:24:37 -0700
Subject: [PATCH 36/38] Make resource loader a transient lazy val.

---
 sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index d336fde4f4a96..587ba1ea058a2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -209,7 +209,7 @@ class SQLContext private[sql](
   }
 
   /** A [[FunctionResourceLoader]] that can be used in SessionCatalog. */
-  protected[sql] lazy val functionResourceLoader: FunctionResourceLoader = {
+  @transient protected[sql] lazy val functionResourceLoader: FunctionResourceLoader = {
     new FunctionResourceLoader {
       override def loadResource(resource: FunctionResource): Unit = {
         resource.resourceType match {

From 361421c02390dd2c0dc00bb4e09a150c37bee54a Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Mon, 4 Apr 2016 20:43:31 -0700
Subject: [PATCH 37/38] Review comments

---
 .../apache/spark/sql/hive/test/TestHive.scala |  4 +-
 .../org/apache/spark/sql/hive/UDFSuite.scala  | 43 +++++++++++--------
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 2ee7c64e69ef0..7f6ca21782da4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -201,8 +201,8 @@ class TestHiveContext private[hive](
     }
 
     override lazy val functionRegistry = {
-      // TestHiveFunctionRegistry tracks removed functions. So, we cannot simply use
-      // org.apache.spark.sql.catalyst.analysis.FunctionRegistry.builtIn.copy.
+      // We use TestHiveFunctionRegistry at here to track functions that have been explicitly
+      // unregistered (through TestHiveFunctionRegistry.unregisterFunction method).
       val fr = new TestHiveFunctionRegistry
       org.apache.spark.sql.catalyst.analysis.FunctionRegistry.expressions.foreach {
         case (name, (info, builder)) => fr.registerFunction(name, info, builder)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
index 44cd2b4934606..d1aa5aa931947 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
@@ -38,7 +38,10 @@ class UDFSuite
 
   import hiveContext.implicits._
 
-  private[this] val functionName = "myUpper"
+  private[this] val functionName = "myUPper"
+  private[this] val functionNameUpper = "MYUPPER"
+  private[this] val functionNameLower = "myupper"
+
   private[this] val functionClass =
     classOf[org.apache.hadoop.hive.ql.udf.generic.GenericUDFUpper].getCanonicalName
 
@@ -76,7 +79,7 @@ class UDFSuite
       }
       sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$functionClass'")
       checkAnswer(
-        sql(s"SELECT myupper(value) from $testTableName"),
+        sql(s"SELECT $functionNameLower(value) from $testTableName"),
         expectedDF
       )
       intercept[AnalysisException] {
@@ -90,19 +93,22 @@ class UDFSuite
       sql(s"CREATE FUNCTION $functionName AS '$functionClass'")
       checkAnswer(
         sql("SHOW functions like '.*upper'"),
-        Row("default.myupper")
+        Row(s"default.$functionNameLower")
       )
       checkAnswer(
-        sql(s"SELECT myupper(value) from $testTableName"),
+        sql(s"SELECT $functionName(value) from $testTableName"),
         expectedDF
       )
-      assert(sql("SHOW functions").collect().map(_.getString(0)).contains("default.myupper"))
+      assert(
+        sql("SHOW functions").collect()
+          .map(_.getString(0))
+          .contains(s"default.$functionNameLower"))
     }
   }
 
   test("permanent function: create and drop with a db name") {
     // For this block, drop function command uses functionName as the function name.
-    withUserDefinedFunction(functionName.toUpperCase -> false) {
+    withUserDefinedFunction(functionNameUpper -> false) {
       sql(s"CREATE FUNCTION default.$functionName AS '$functionClass'")
       // TODO: Re-enable it after can distinguish qualified and unqualified function name
       // in SessionCatalog.lookupFunction.
@@ -111,20 +117,20 @@ class UDFSuite
       //  expectedDF
       // )
       checkAnswer(
-        sql(s"SELECT myuppER(value) from $testTableName"),
+        sql(s"SELECT $functionName(value) from $testTableName"),
         expectedDF
       )
       checkAnswer(
-        sql(s"SELECT default.MYupper(value) from $testTableName"),
+        sql(s"SELECT default.$functionName(value) from $testTableName"),
         expectedDF
       )
     }
 
     // For this block, drop function command uses default.functionName as the function name.
-    withUserDefinedFunction(s"DEfault.$functionName" -> false) {
+    withUserDefinedFunction(s"DEfault.$functionNameLower" -> false) {
       sql(s"CREATE FUNCTION dEFault.$functionName AS '$functionClass'")
       checkAnswer(
-        sql(s"SELECT myUpper(value) from $testTableName"),
+        sql(s"SELECT $functionNameUpper(value) from $testTableName"),
         expectedDF
       )
     }
@@ -142,21 +148,21 @@ class UDFSuite
         // )
 
         checkAnswer(
-          sql(s"SHOW FUNCTIONS like $dbName.myupper"),
-          Row(s"$dbName.myupper")
+          sql(s"SHOW FUNCTIONS like $dbName.$functionNameUpper"),
+          Row(s"$dbName.$functionNameLower")
         )
 
         sql(s"USE $dbName")
 
         checkAnswer(
-          sql(s"SELECT myuppER(value) from $testTableName"),
+          sql(s"SELECT $functionName(value) from $testTableName"),
           expectedDF
         )
 
         sql(s"USE default")
 
         checkAnswer(
-          sql(s"SELECT $dbName.MYupper(value) from $testTableName"),
+          sql(s"SELECT $dbName.$functionName(value) from $testTableName"),
           expectedDF
         )
 
@@ -166,7 +172,7 @@ class UDFSuite
       sql(s"USE default")
 
       // For this block, drop function command uses default.functionName as the function name.
-      withUserDefinedFunction(s"$dbName.$functionName" -> false) {
+      withUserDefinedFunction(s"$dbName.$functionNameUpper" -> false) {
         sql(s"CREATE FUNCTION $dbName.$functionName AS '$functionClass'")
         // TODO: Re-enable it after can distinguish qualified and unqualified function name
         // checkAnswer(
@@ -176,9 +182,12 @@ class UDFSuite
 
         sql(s"USE $dbName")
 
-        assert(sql("SHOW functions").collect().map(_.getString(0)).contains(s"$dbName.myupper"))
+        assert(
+          sql("SHOW functions").collect()
+            .map(_.getString(0))
+            .contains(s"$dbName.$functionNameLower"))
         checkAnswer(
-          sql(s"SELECT myupper(value) from $testTableName"),
+          sql(s"SELECT $functionNameLower(value) from $testTableName"),
           expectedDF
          )
 

From 39387666fe24a2e65a267eabc15c7816c8738449 Mon Sep 17 00:00:00 2001
From: Yin Huai <yhuai@databricks.com>
Date: Tue, 5 Apr 2016 07:21:15 -0700
Subject: [PATCH 38/38] Remove unnecessary changes.

---
 .../org/apache/spark/sql/hive/test/TestHiveSingleton.scala     | 1 -
 .../scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala   | 3 ++-
 .../test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala b/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
index 890783cf99d65..154ada3daae51 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.SQLContext
 trait TestHiveSingleton extends SparkFunSuite with BeforeAndAfterAll {
   protected val sqlContext: SQLContext = TestHive
   protected val hiveContext: TestHiveContext = TestHive
-  protected val hiveSqlParser = hiveContext.sessionState.sqlParser
 
   protected override def afterAll(): Unit = {
     try {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala
index a72779d4e3a62..d9664680f4a11 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ErrorPositionSuite.scala
@@ -23,6 +23,7 @@ import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.catalyst.util.quietly
+import org.apache.spark.sql.hive.execution.HiveSqlParser
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
 class ErrorPositionSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterEach {
@@ -130,7 +131,7 @@ class ErrorPositionSuite extends QueryTest with TestHiveSingleton with BeforeAnd
    * @param token a unique token in the string that should be indicated by the exception
    */
   def positionTest(name: String, query: String, token: String): Unit = {
-    def ast = hiveSqlParser.parsePlan(query)
+    def ast = HiveSqlParser.parsePlan(query)
     def parseTree = Try(quietly(ast.treeString)).getOrElse("<failed to parse>")
 
     test(name) {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 4538c1afd77e6..05318f51af01e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -31,7 +31,7 @@ class StatisticsSuite extends QueryTest with TestHiveSingleton {
 
   test("parse analyze commands") {
     def assertAnalyzeCommand(analyzeCommand: String, c: Class[_]) {
-      val parsed = hiveSqlParser.parsePlan(analyzeCommand)
+      val parsed = HiveSqlParser.parsePlan(analyzeCommand)
       val operators = parsed.collect {
         case a: AnalyzeTable => a
         case o => o