Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-48345][SQL] Check variable declarations #47404

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions common/utils/src/main/resources/error/error-conditions.json
Original file line number Diff line number Diff line change
Expand Up @@ -2941,6 +2941,24 @@
],
"sqlState" : "22029"
},
"INVALID_VARIABLE_DECLARATION" : {
"message" : [
"Invalid variable declaration."
momcilomrk-db marked this conversation as resolved.
Show resolved Hide resolved
],
"subClass" : {
"NOT_ALLOWED_IN_SCOPE" : {
"message" : [
"Variable <varName> was declared on line <lineNumber>, which is not allowed in this scope."
]
},
"ONLY_AT_BEGINNING" : {
"message" : [
"Variable <varName> can only be declared at the beginning of the compound, but it was declared on line <lineNumber>."
]
}
},
"sqlState" : "42K0M"
},
"INVALID_VARIABLE_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE" : {
"message" : [
"Variable type must be string type but got <varType>."
Expand Down
6 changes: 6 additions & 0 deletions common/utils/src/main/resources/error/error-states.json
Original file line number Diff line number Diff line change
Expand Up @@ -4619,6 +4619,12 @@
"standard": "N",
"usedBy": ["Spark"]
},
"42K0M": {
"description": "Invalid variable declaration.",
"origin": "Spark,",
"standard": "N",
"usedBy": ["Spark"]
},
"42KD0": {
"description": "Ambiguous name reference.",
"origin": "Databricks",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, con
import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog}
import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryParsingErrors, SqlScriptingErrors}
import org.apache.spark.sql.errors.DataTypeErrors.toSQLStmt
import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors, QueryParsingErrors, SqlScriptingErrors}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.LEGACY_BANG_EQUALS_NOT
import org.apache.spark.sql.types._
Expand All @@ -62,7 +61,8 @@ import org.apache.spark.util.random.RandomSampler
* The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or
* TableIdentifier.
*/
class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
class AstBuilder extends DataTypeAstBuilder with SQLConfHelper
with Logging with DataTypeErrorsBase {
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
import ParserUtils._

Expand Down Expand Up @@ -133,12 +133,42 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {

private def visitCompoundBodyImpl(
ctx: CompoundBodyContext,
label: Option[String]): CompoundBody = {
label: Option[String],
allowVarDeclare: Boolean): CompoundBody = {
val buff = ListBuffer[CompoundPlanStatement]()
ctx.compoundStatements.forEach(compoundStatement => {
buff += visit(compoundStatement).asInstanceOf[CompoundPlanStatement]
})

val compoundStatements = buff.toList

val candidates = if (allowVarDeclare) {
compoundStatements.dropWhile {
case SingleStatement(_: CreateVariable) => true
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unrelated question: within SQL scripting, shall we only allow to create single-part-name variables? e.g. should CREATE VARIABLE system.session.name be disallowed?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreement with Serge is that we should be able to reference session variables at all times using system.session.<varName>. Also, local variables can be accessed by their name directly, but also using <labelName>.<varName> to allow access to all local variables in case some of them have been shadowed.

Though, what I explained is a resolution part not the creation. For the creation, looking at the code and documentation, we should allow the same thing as well:
image

I will check with Serge once again, just to confirm, but wouldn't block this PR on it because we are preparing a POC for variables anyways which is supposed to handle all local variable related stuff and we will resolve all such cases there, if that's fine by you!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just for my curiosity. If SQL scripting only allows creation of local variable, then we can even fail earlier in the parser if the CREATE VARIABLE uses more than one name part.

case _ => false
}
} else {
compoundStatements
}

val declareVarStatement = candidates.collectFirst {
case SingleStatement(c: CreateVariable) => c
}

declareVarStatement match {
case Some(c: CreateVariable) =>
if (allowVarDeclare) {
throw SqlScriptingErrors.variableDeclarationOnlyAtBeginning(
toSQLId(c.name.asInstanceOf[UnresolvedIdentifier].nameParts),
c.origin.line.get.toString)
} else {
throw SqlScriptingErrors.variableDeclarationNotAllowedInScope(
toSQLId(c.name.asInstanceOf[UnresolvedIdentifier].nameParts),
c.origin.line.get.toString)
}
case _ =>
}

CompoundBody(buff.toSeq, label)
}

Expand All @@ -161,11 +191,11 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
val labelText = beginLabelCtx.
map(_.multipartIdentifier().getText).getOrElse(java.util.UUID.randomUUID.toString).
toLowerCase(Locale.ROOT)
visitCompoundBodyImpl(ctx.compoundBody(), Some(labelText))
visitCompoundBodyImpl(ctx.compoundBody(), Some(labelText), allowVarDeclare = true)
}

override def visitCompoundBody(ctx: CompoundBodyContext): CompoundBody = {
visitCompoundBodyImpl(ctx, None)
visitCompoundBodyImpl(ctx, None, allowVarDeclare = false)
}

override def visitCompoundStatement(ctx: CompoundStatementContext): CompoundPlanStatement =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,18 @@ private[sql] object SqlScriptingErrors extends QueryErrorsBase {
messageParameters = Map("endLabel" -> endLabel))
}

def variableDeclarationNotAllowedInScope(varName: String, lineNumber: String): Throwable = {
new SparkException(
errorClass = "INVALID_VARIABLE_DECLARATION.NOT_ALLOWED_IN_SCOPE",
cause = null,
messageParameters = Map("varName" -> varName, "lineNumber" -> lineNumber))
}

def variableDeclarationOnlyAtBeginning(varName: String, lineNumber: String): Throwable = {
new SparkException(
errorClass = "INVALID_VARIABLE_DECLARATION.ONLY_AT_BEGINNING",
cause = null,
messageParameters = Map("varName" -> varName, "lineNumber" -> lineNumber))
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.parser

import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.plans.logical.CreateVariable

class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
import CatalystSqlParser._
Expand Down Expand Up @@ -263,6 +264,37 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
assert(tree.label.nonEmpty)
}

test("declare at the beginning") {
val sqlScriptText =
"""
|BEGIN
| DECLARE testVariable1 VARCHAR(50);
| DECLARE testVariable2 INTEGER;
|END""".stripMargin
val tree = parseScript(sqlScriptText)
assert(tree.collection.length == 2)
assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
assert(tree.collection.forall(
_.asInstanceOf[SingleStatement].parsedPlan.isInstanceOf[CreateVariable]))
}

test("declare after beginning") {
val sqlScriptText =
"""
|BEGIN
| SELECT 1;
| DECLARE testVariable INTEGER;
|END""".stripMargin
checkError(
exception = intercept[SparkException] {
parseScript(sqlScriptText)
},
errorClass = "INVALID_VARIABLE_DECLARATION.ONLY_AT_BEGINNING",
parameters = Map("varName" -> "`testVariable`", "lineNumber" -> "4"))
}

// TODO Add test for INVALID_VARIABLE_DECLARATION.NOT_ALLOWED_IN_SCOPE exception

test("SET VAR statement test") {
val sqlScriptText =
"""
Expand Down