Skip to content

Commit

Permalink
[SPARK-48529][SQL] Introduction of Labels in SQL Scripting
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Previous [PR1](#46665) and [PR2](#46665) introduced parser and interpreter changes for SQL Scripting. This PR is a follow-up to introduce the concept of labels for SQL Scripting language and proposes the following changes:

- Changes grammar to support labels at start and end of the compound statements.
- Updates visitor functions for compound nodes in the syntax tree in AstBuilder to check if labels are present and valid.

More details can be found in [Jira item](https://issues.apache.org/jira/browse/SPARK-48529) for this task and its parent (where the design doc is uploaded as well).

### Why are the changes needed?
The intent is to add support for various SQL scripting concepts like loops, leave & iterate statements.

### Does this PR introduce any user-facing change?
No.
This PR is among first PRs in series of PRs that will introduce changes to sql() API to add support for SQL scripting, but for now, the API remains unchanged.
In the future, the API will remain the same as well, but it will have new possibility to execute SQL scripts.

### How was this patch tested?
There are tests for newly introduced parser changes:

SqlScriptingParserSuite - unit tests for execution nodes.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47146 from miland-db/sql_batch_labels.

Lead-authored-by: David Milicevic <david.milicevic@databricks.com>
Co-authored-by: Milan Dankovic <milan.dankovic@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
2 people authored and cloud-fan committed Jul 11, 2024
1 parent 97a0f64 commit 2e3a6fb
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ singleCompoundStatement
;

beginEndCompoundBlock
: BEGIN compoundBody END
: beginLabel? BEGIN compoundBody END endLabel?
;

compoundBody
Expand All @@ -68,6 +68,14 @@ singleStatement
: statement SEMICOLON* EOF
;

beginLabel
: multipartIdentifier COLON
;

endLabel
: multipartIdentifier
;

singleExpression
: namedExpression EOF
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,28 +122,49 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
visit(s).asInstanceOf[CompoundBody]
}.getOrElse {
val logicalPlan = visitSingleStatement(ctx.singleStatement())
CompoundBody(Seq(SingleStatement(parsedPlan = logicalPlan)))
CompoundBody(Seq(SingleStatement(parsedPlan = logicalPlan)),
Some(java.util.UUID.randomUUID.toString.toLowerCase(Locale.ROOT)))
}
}

override def visitSingleCompoundStatement(ctx: SingleCompoundStatementContext): CompoundBody = {
visit(ctx.beginEndCompoundBlock()).asInstanceOf[CompoundBody]
}

private def visitCompoundBodyImpl(ctx: CompoundBodyContext): CompoundBody = {
private def visitCompoundBodyImpl(
ctx: CompoundBodyContext,
label: Option[String]): CompoundBody = {
val buff = ListBuffer[CompoundPlanStatement]()
ctx.compoundStatements.forEach(compoundStatement => {
buff += visit(compoundStatement).asInstanceOf[CompoundPlanStatement]
})
CompoundBody(buff.toSeq)

CompoundBody(buff.toSeq, label)
}

override def visitBeginEndCompoundBlock(ctx: BeginEndCompoundBlockContext): CompoundBody = {
visitCompoundBodyImpl(ctx.compoundBody())
val beginLabelCtx = Option(ctx.beginLabel())
val endLabelCtx = Option(ctx.endLabel())

(beginLabelCtx, endLabelCtx) match {
case (Some(bl: BeginLabelContext), Some(el: EndLabelContext))
if bl.multipartIdentifier().getText.nonEmpty &&
bl.multipartIdentifier().getText.toLowerCase(Locale.ROOT) !=
el.multipartIdentifier().getText.toLowerCase(Locale.ROOT) =>
throw SparkException.internalError("Both labels should be same.")
case (None, Some(_)) =>
throw SparkException.internalError("End label can't exist without begin label.")
case _ =>
}

val labelText = beginLabelCtx.
map(_.multipartIdentifier().getText).getOrElse(java.util.UUID.randomUUID.toString).
toLowerCase(Locale.ROOT)
visitCompoundBodyImpl(ctx.compoundBody(), Some(labelText))
}

override def visitCompoundBody(ctx: CompoundBodyContext): CompoundBody = {
visitCompoundBodyImpl(ctx)
visitCompoundBodyImpl(ctx, None)
}

override def visitCompoundStatement(ctx: CompoundStatementContext): CompoundPlanStatement =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,10 @@ case class SingleStatement(parsedPlan: LogicalPlan)
/**
* Logical operator for a compound body. Contains all statements within the compound body.
* @param collection Collection of statements within the compound body.
* @param label Label set to CompoundBody by user or UUID otherwise.
* It can be None in case when CompoundBody is not part of BeginEndCompoundBlock
* for example when CompoundBody is inside loop or conditional block.
*/
case class CompoundBody(collection: Seq[CompoundPlanStatement]) extends CompoundPlanStatement
case class CompoundBody(
collection: Seq[CompoundPlanStatement],
label: Option[String]) extends CompoundPlanStatement
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

package org.apache.spark.sql.catalyst.parser

import org.apache.spark.SparkFunSuite
import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.sql.catalyst.plans.SQLHelper

class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
Expand Down Expand Up @@ -161,6 +161,104 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
== "SELECT 3")
}

test("compound: beginLabel") {
val sqlScriptText =
"""
|lbl: BEGIN
| SELECT 1;
| SELECT 2;
| INSERT INTO A VALUES (a, b, 3);
| SELECT a, b, c FROM T;
| SELECT * FROM T;
|END""".stripMargin
val tree = parseScript(sqlScriptText)
assert(tree.collection.length == 5)
assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
assert(tree.label.contains("lbl"))
}

test("compound: beginLabel + endLabel") {
val sqlScriptText =
"""
|lbl: BEGIN
| SELECT 1;
| SELECT 2;
| INSERT INTO A VALUES (a, b, 3);
| SELECT a, b, c FROM T;
| SELECT * FROM T;
|END lbl""".stripMargin
val tree = parseScript(sqlScriptText)
assert(tree.collection.length == 5)
assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
assert(tree.label.contains("lbl"))
}

test("compound: beginLabel + endLabel with different values") {
val sqlScriptText =
"""
|lbl_begin: BEGIN
| SELECT 1;
| SELECT 2;
| INSERT INTO A VALUES (a, b, 3);
| SELECT a, b, c FROM T;
| SELECT * FROM T;
|END lbl_end""".stripMargin
val e = intercept[SparkException] {
parseScript(sqlScriptText)
}
assert(e.getErrorClass === "INTERNAL_ERROR")
assert(e.getMessage.contains("Both labels should be same."))
}

test("compound: endLabel") {
val sqlScriptText =
"""
|BEGIN
| SELECT 1;
| SELECT 2;
| INSERT INTO A VALUES (a, b, 3);
| SELECT a, b, c FROM T;
| SELECT * FROM T;
|END lbl""".stripMargin
val e = intercept[SparkException] {
parseScript(sqlScriptText)
}
assert(e.getErrorClass === "INTERNAL_ERROR")
assert(e.getMessage.contains("End label can't exist without begin label."))
}

test("compound: beginLabel + endLabel with different casing") {
val sqlScriptText =
"""
|LBL: BEGIN
| SELECT 1;
| SELECT 2;
| INSERT INTO A VALUES (a, b, 3);
| SELECT a, b, c FROM T;
| SELECT * FROM T;
|END lbl""".stripMargin
val tree = parseScript(sqlScriptText)
assert(tree.collection.length == 5)
assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
assert(tree.label.contains("lbl"))
}

test("compound: no labels provided") {
val sqlScriptText =
"""
|BEGIN
| SELECT 1;
| SELECT 2;
| INSERT INTO A VALUES (a, b, 3);
| SELECT a, b, c FROM T;
| SELECT * FROM T;
|END""".stripMargin
val tree = parseScript(sqlScriptText)
assert(tree.collection.length == 5)
assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
assert(tree.label.nonEmpty)
}

// Helper methods
def cleanupStatementString(statementStr: String): String = {
statementStr
Expand Down

0 comments on commit 2e3a6fb

Please sign in to comment.