-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-14123] [SPARK-14384] [SQL] Handle CreateFunction/DropFunction #12117
Changes from all commits
7d00184
9af70af
35ad7ae
cb29f0f
77848c9
b8dda84
ee957db
133ce1a
6b76980
e05b108
314c4db
c370c47
acf9299
2cab41c
65d9dbd
05709f0
67df04f
b67c444
1753cac
a2d588f
51b72dd
66c5261
2aa3725
e343087
8154359
e0570cd
04a5926
979b03e
323cea6
aa41b20
f40a85e
3718e61
ac6dfc5
64e2ee3
ae359fb
776c09a
18d6042
f72e6a9
5fcf6bc
9d39a83
0572acc
a16395a
1f77973
611fe17
88fd93c
8a41f6d
361421c
21ffafc
3938766
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,7 @@ import scala.collection.mutable | |
import org.apache.spark.sql.AnalysisException | ||
import org.apache.spark.sql.catalyst.{CatalystConf, SimpleCatalystConf} | ||
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} | ||
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, SimpleFunctionRegistry} | ||
import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, NoSuchFunctionException, SimpleFunctionRegistry} | ||
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder | ||
import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionInfo} | ||
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} | ||
|
@@ -39,17 +39,21 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} | |
*/ | ||
class SessionCatalog( | ||
externalCatalog: ExternalCatalog, | ||
functionResourceLoader: FunctionResourceLoader, | ||
functionRegistry: FunctionRegistry, | ||
conf: CatalystConf) { | ||
import ExternalCatalog._ | ||
|
||
def this(externalCatalog: ExternalCatalog, functionRegistry: FunctionRegistry) { | ||
this(externalCatalog, functionRegistry, new SimpleCatalystConf(true)) | ||
def this( | ||
externalCatalog: ExternalCatalog, | ||
functionRegistry: FunctionRegistry, | ||
conf: CatalystConf) { | ||
this(externalCatalog, DummyFunctionResourceLoader, functionRegistry, conf) | ||
} | ||
|
||
// For testing only. | ||
def this(externalCatalog: ExternalCatalog) { | ||
this(externalCatalog, new SimpleFunctionRegistry) | ||
this(externalCatalog, new SimpleFunctionRegistry, new SimpleCatalystConf(true)) | ||
} | ||
|
||
protected[this] val tempTables = new mutable.HashMap[String, LogicalPlan] | ||
|
@@ -439,53 +443,88 @@ class SessionCatalog( | |
*/ | ||
def dropFunction(name: FunctionIdentifier): Unit = { | ||
val db = name.database.getOrElse(currentDb) | ||
val qualified = name.copy(database = Some(db)).unquotedString | ||
if (functionRegistry.functionExists(qualified)) { | ||
// If we have loaded this function into the FunctionRegistry, | ||
// also drop it from there. | ||
// For a permanent function, because we loaded it to the FunctionRegistry | ||
// when it's first used, we also need to drop it from the FunctionRegistry. | ||
functionRegistry.dropFunction(qualified) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand why we have to do this. Isn't the function registry only for temporary functions? We only put things in it in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With this PR, Function registry is not just for temp functions. Builders of permanent functions will be lazily loaded. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see... it's because in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
} | ||
externalCatalog.dropFunction(db, name.funcName) | ||
} | ||
|
||
/** | ||
* Alter a metastore function whose name that matches the one specified in `funcDefinition`. | ||
* | ||
* If no database is specified in `funcDefinition`, assume the function is in the | ||
* current database. | ||
* | ||
* Note: If the underlying implementation does not support altering a certain field, | ||
* this becomes a no-op. | ||
*/ | ||
def alterFunction(funcDefinition: CatalogFunction): Unit = { | ||
val db = funcDefinition.identifier.database.getOrElse(currentDb) | ||
val newFuncDefinition = funcDefinition.copy( | ||
identifier = FunctionIdentifier(funcDefinition.identifier.funcName, Some(db))) | ||
externalCatalog.alterFunction(db, newFuncDefinition) | ||
} | ||
|
||
/** | ||
* Retrieve the metadata of a metastore function. | ||
* | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Users cannot alter functions (no API exposed). So, I just delete it. |
||
* If a database is specified in `name`, this will return the function in that database. | ||
* If no database is specified, this will return the function in the current database. | ||
*/ | ||
// TODO: have a better name. This method is actually for fetching the metadata of a function. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fetchFunctionMetadata? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd just call it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we change this (which we should do separately) then we should rename There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. Will do it in another PR. |
||
def getFunction(name: FunctionIdentifier): CatalogFunction = { | ||
val db = name.database.getOrElse(currentDb) | ||
externalCatalog.getFunction(db, name.funcName) | ||
} | ||
|
||
/** | ||
* Check if the specified function exists. | ||
*/ | ||
def functionExists(name: FunctionIdentifier): Boolean = { | ||
if (functionRegistry.functionExists(name.unquotedString)) { | ||
// This function exists in the FunctionRegistry. | ||
true | ||
} else { | ||
// Need to check if this function exists in the metastore. | ||
try { | ||
// TODO: It's better to ask external catalog if this function exists. | ||
// So, we can avoid of having this hacky try/catch block. | ||
getFunction(name) != null | ||
} catch { | ||
case _: NoSuchFunctionException => false | ||
case _: AnalysisException => false // HiveExternalCatalog wraps all exceptions with it. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This use of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, I think it is not consistent that getFunction could return a null or an exception when it can't find the function. I did this part before is just to make think work first. Better to refactor it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a TODO. |
||
} | ||
} | ||
} | ||
|
||
// ---------------------------------------------------------------- | ||
// | Methods that interact with temporary and metastore functions | | ||
// ---------------------------------------------------------------- | ||
|
||
/** | ||
* Construct a [[FunctionBuilder]] based on the provided class that represents a function. | ||
* | ||
* This performs reflection to decide what type of [[Expression]] to return in the builder. | ||
*/ | ||
private[sql] def makeFunctionBuilder(name: String, functionClassName: String): FunctionBuilder = { | ||
// TODO: at least support UDAFs here | ||
throw new UnsupportedOperationException("Use sqlContext.udf.register(...) instead.") | ||
} | ||
|
||
/** | ||
* Loads resources such as JARs and Files for a function. Every resource is represented | ||
* by a tuple (resource type, resource uri). | ||
*/ | ||
def loadFunctionResources(resources: Seq[(String, String)]): Unit = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. need to document in the javadoc what the keys and values are. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
resources.foreach { case (resourceType, uri) => | ||
val functionResource = | ||
FunctionResource(FunctionResourceType.fromString(resourceType.toLowerCase), uri) | ||
functionResourceLoader.loadResource(functionResource) | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Users cannot rename a function (no API exposed). So, I just delete it. |
||
|
||
/** | ||
* Create a temporary function. | ||
* This assumes no database is specified in `funcDefinition`. | ||
*/ | ||
def createTempFunction( | ||
name: String, | ||
info: ExpressionInfo, | ||
funcDefinition: FunctionBuilder, | ||
ignoreIfExists: Boolean): Unit = { | ||
if (functionRegistry.lookupFunctionBuilder(name).isDefined && !ignoreIfExists) { | ||
throw new AnalysisException(s"Temporary function '$name' already exists.") | ||
} | ||
functionRegistry.registerFunction(name, funcDefinition) | ||
functionRegistry.registerFunction(name, info, funcDefinition) | ||
} | ||
|
||
/** | ||
|
@@ -501,41 +540,59 @@ class SessionCatalog( | |
} | ||
} | ||
|
||
/** | ||
* Rename a function. | ||
* | ||
* If a database is specified in `oldName`, this will rename the function in that database. | ||
* If no database is specified, this will first attempt to rename a temporary function with | ||
* the same name, then, if that does not exist, rename the function in the current database. | ||
* | ||
* This assumes the database specified in `oldName` matches the one specified in `newName`. | ||
*/ | ||
def renameFunction(oldName: FunctionIdentifier, newName: FunctionIdentifier): Unit = { | ||
if (oldName.database != newName.database) { | ||
throw new AnalysisException("rename does not support moving functions across databases") | ||
} | ||
val db = oldName.database.getOrElse(currentDb) | ||
val oldBuilder = functionRegistry.lookupFunctionBuilder(oldName.funcName) | ||
if (oldName.database.isDefined || oldBuilder.isEmpty) { | ||
externalCatalog.renameFunction(db, oldName.funcName, newName.funcName) | ||
} else { | ||
val oldExpressionInfo = functionRegistry.lookupFunction(oldName.funcName).get | ||
val newExpressionInfo = new ExpressionInfo( | ||
oldExpressionInfo.getClassName, | ||
newName.funcName, | ||
oldExpressionInfo.getUsage, | ||
oldExpressionInfo.getExtended) | ||
functionRegistry.dropFunction(oldName.funcName) | ||
functionRegistry.registerFunction(newName.funcName, newExpressionInfo, oldBuilder.get) | ||
} | ||
protected def failFunctionLookup(name: String): Nothing = { | ||
throw new AnalysisException(s"Undefined function: $name. This function is " + | ||
s"neither a registered temporary function nor " + | ||
s"a permanent function registered in the database $currentDb.") | ||
} | ||
|
||
/** | ||
* Return an [[Expression]] that represents the specified function, assuming it exists. | ||
* Note: This is currently only used for temporary functions. | ||
* | ||
* For a temporary function or a permanent function that has been loaded, | ||
* this method will simply lookup the function through the | ||
* FunctionRegistry and create an expression based on the builder. | ||
* | ||
* For a permanent function that has not been loaded, we will first fetch its metadata | ||
* from the underlying external catalog. Then, we will load all resources associated | ||
* with this function (i.e. jars and files). Finally, we create a function builder | ||
* based on the function class and put the builder into the FunctionRegistry. | ||
* The name of this function in the FunctionRegistry will be `databaseName.functionName`. | ||
*/ | ||
def lookupFunction(name: String, children: Seq[Expression]): Expression = { | ||
functionRegistry.lookupFunction(name, children) | ||
// TODO: Right now, the name can be qualified or not qualified. | ||
// It will be better to get a FunctionIdentifier. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I filed https://issues.apache.org/jira/browse/SPARK-14385 for this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! |
||
// TODO: Right now, we assume that name is not qualified! | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will add a test for this (the test will be ignored right now). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added in |
||
val qualifiedName = FunctionIdentifier(name, Some(currentDb)).unquotedString | ||
if (functionRegistry.functionExists(name)) { | ||
// This function has been already loaded into the function registry. | ||
functionRegistry.lookupFunction(name, children) | ||
} else if (functionRegistry.functionExists(qualifiedName)) { | ||
// This function has been already loaded into the function registry. | ||
// Unlike the above block, we find this function by using the qualified name. | ||
functionRegistry.lookupFunction(qualifiedName, children) | ||
} else { | ||
// The function has not been loaded to the function registry, which means | ||
// that the function is a permanent function (if it actually has been registered | ||
// in the metastore). We need to first put the function in the FunctionRegistry. | ||
val catalogFunction = try { | ||
externalCatalog.getFunction(currentDb, name) | ||
} catch { | ||
case e: AnalysisException => failFunctionLookup(name) | ||
case e: NoSuchFunctionException => failFunctionLookup(name) | ||
} | ||
loadFunctionResources(catalogFunction.resources) | ||
// Please note that qualifiedName is provided by the user. However, | ||
// catalogFunction.identifier.unquotedString is returned by the underlying | ||
// catalog. So, it is possible that qualifiedName is not exactly the same as | ||
// catalogFunction.identifier.unquotedString (difference is on case-sensitivity). | ||
// At here, we preserve the input from the user. | ||
val info = new ExpressionInfo(catalogFunction.className, qualifiedName) | ||
val builder = makeFunctionBuilder(qualifiedName, catalogFunction.className) | ||
createTempFunction(qualifiedName, info, builder, ignoreIfExists = false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the javadoc says nothing about this. We should add a sentence there to say we cache it in the registry. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated the doc. |
||
// Now, we need to create the Expression. | ||
functionRegistry.lookupFunction(qualifiedName, children) | ||
} | ||
} | ||
|
||
/** | ||
|
@@ -545,17 +602,11 @@ class SessionCatalog( | |
val dbFunctions = | ||
externalCatalog.listFunctions(db, pattern).map { f => FunctionIdentifier(f, Some(db)) } | ||
val regex = pattern.replaceAll("\\*", ".*").r | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please include my fix for that, or create an utility for all the occurrence. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's do that separately so we can get this patch in first. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I may create another PR for put all the pattern stuff in one place. Right now, it was scattered all over with replaceAll(). Please keep your code as it is for now, I will do it later. Thanks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will keep this part as is. |
||
val _tempFunctions = functionRegistry.listFunction() | ||
val loadedFunctions = functionRegistry.listFunction() | ||
.filter { f => regex.pattern.matcher(f).matches() } | ||
.map { f => FunctionIdentifier(f) } | ||
dbFunctions ++ _tempFunctions | ||
} | ||
|
||
/** | ||
* Return a temporary function. For testing only. | ||
*/ | ||
private[catalog] def getTempFunction(name: String): Option[FunctionBuilder] = { | ||
functionRegistry.lookupFunctionBuilder(name) | ||
// TODO: Actually, there will be dbFunctions that have been loaded into the FunctionRegistry. | ||
// So, the returned list may have two entries for the same function. | ||
dbFunctions ++ loadedFunctions | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yay!