Skip to content

Commit

Permalink
[SPARK-48990][SQL] Unified variable related SQL syntax keywords
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
The pr aims to unified `variable` related `SQL syntax` keywords, enable syntax `DECLARE (OR REPLACE)? ...` and `DROP TEMPORARY ...` to support keyword: `VAR` (not only `VARIABLE`).

### Why are the changes needed?
When `setting` variables, we support `(VARIABLE | VAR)`, but when `declaring` and `dropping` variables, we only support the keyword `VARIABLE` (not support the keyword `VAR`)

<img width="597" alt="image" src="https://github.com/user-attachments/assets/07084fef-4080-4410-a74c-e6001ae0a8fa">

https://github.com/apache/spark/blob/285489b0225004e918b6e937f7367e492292815e/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4#L68-L72

https://github.com/apache/spark/blob/285489b0225004e918b6e937f7367e492292815e/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4#L218-L220

The syntax seems `a bit weird`, `inconsistent experience` in SQL syntax related to variable usage by end-users, so I propose to `unify` it.

### Does this PR introduce _any_ user-facing change?
Yes, enable end-users to use `variable related SQL` with `consistent` keywords.

### How was this patch tested?
Updated existed UT.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes apache#47469 from panbingkun/SPARK-48990.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
panbingkun authored and attilapiros committed Oct 4, 2024
1 parent 8c4df5c commit 75adc39
Show file tree
Hide file tree
Showing 8 changed files with 263 additions and 14 deletions.
2 changes: 1 addition & 1 deletion docs/sql-ref-syntax-ddl-declare-variable.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ column default expressions, and generated column expressions.
### Syntax

```sql
DECLARE [ OR REPLACE ] [ VARIABLE ]
DECLARE [ OR REPLACE ] [ VAR | VARIABLE ]
variable_name [ data_type ] [ { DEFAULT | = } default_expr ]
```

Expand Down
2 changes: 1 addition & 1 deletion docs/sql-ref-syntax-ddl-drop-variable.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ be thrown if the variable does not exist.
### Syntax

```sql
DROP TEMPORARY VARIABLE [ IF EXISTS ] variable_name
DROP TEMPORARY { VAR | VARIABLE } [ IF EXISTS ] variable_name
```

### Parameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ compoundStatement
;

setStatementWithOptionalVarKeyword
: SET (VARIABLE | VAR)? assignmentList #setVariableWithOptionalKeyword
| SET (VARIABLE | VAR)? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
: SET variable? assignmentList #setVariableWithOptionalKeyword
| SET variable? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
LEFT_PAREN query RIGHT_PAREN #setVariableWithOptionalKeyword
;

Expand Down Expand Up @@ -215,9 +215,9 @@ statement
routineCharacteristics
RETURN (query | expression) #createUserDefinedFunction
| DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference #dropFunction
| DECLARE (OR REPLACE)? VARIABLE?
| DECLARE (OR REPLACE)? variable?
identifierReference dataType? variableDefaultExpression? #createVariable
| DROP TEMPORARY VARIABLE (IF EXISTS)? identifierReference #dropVariable
| DROP TEMPORARY variable (IF EXISTS)? identifierReference #dropVariable
| EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
(statement|setResetStatement) #explain
| SHOW TABLES ((FROM | IN) identifierReference)?
Expand Down Expand Up @@ -272,8 +272,8 @@ setResetStatement
| SET TIME ZONE interval #setTimeZone
| SET TIME ZONE timezone #setTimeZone
| SET TIME ZONE .*? #setTimeZone
| SET (VARIABLE | VAR) assignmentList #setVariable
| SET (VARIABLE | VAR) LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
| SET variable assignmentList #setVariable
| SET variable LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
LEFT_PAREN query RIGHT_PAREN #setVariable
| SET configKey EQ configValue #setQuotedConfiguration
| SET configKey (EQ .*?)? #setConfiguration
Expand Down Expand Up @@ -438,6 +438,11 @@ namespaces
| SCHEMAS
;

variable
: VARIABLE
| VAR
;

describeFuncName
: identifierReference
| stringLit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ SetVariable [variablereference(system.session.title='Create Variable - Failure C


-- !query
DECLARE VARIABLE var1 INT
DECLARE VAR var1 INT
-- !query analysis
CreateVariable defaultvalueexpression(null, null), false
+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1
Expand All @@ -164,7 +164,7 @@ Project [variablereference(system.session.var1=CAST(NULL AS INT)) AS var1#x]


-- !query
DROP TEMPORARY VARIABLE var1
DROP TEMPORARY VAR var1
-- !query analysis
DropVariable false
+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ DECLARE VARIABLE IF NOT EXISTS var1 INT;
DROP TEMPORARY VARIABLE IF EXISTS var1;

SET VARIABLE title = 'Drop Variable';
DECLARE VARIABLE var1 INT;
DECLARE VAR var1 INT;
SELECT var1;
DROP TEMPORARY VARIABLE var1;
DROP TEMPORARY VAR var1;

-- Variable is gone
SELECT var1;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ struct<>


-- !query
DECLARE VARIABLE var1 INT
DECLARE VAR var1 INT
-- !query schema
struct<>
-- !query output
Expand All @@ -184,7 +184,7 @@ NULL


-- !query
DROP TEMPORARY VARIABLE var1
DROP TEMPORARY VAR var1
-- !query schema
struct<>
-- !query output
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.command

import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedFunction, UnresolvedIdentifier, UnresolvedInlineTable}
import org.apache.spark.sql.catalyst.expressions.{Add, Cast, Divide, Literal, ScalarSubquery}
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.logical.{CreateVariable, DefaultValueExpression, Project, SubqueryAlias}
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.{Decimal, DecimalType, DoubleType, IntegerType, MapType, NullType, StringType}
import org.apache.spark.unsafe.types.UTF8String

class DeclareVariableParserSuite extends AnalysisTest with SharedSparkSession {

test("declare variable") {
comparePlans(
parsePlan("DECLARE var1 INT = 1"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Cast(Literal(1, IntegerType), IntegerType), "1"),
replace = false))
comparePlans(
parsePlan("DECLARE var1 INT"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Literal(null, IntegerType), "null"),
replace = false))
comparePlans(
parsePlan("DECLARE var1 = 1"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Literal(1, IntegerType), "1"),
replace = false))
comparePlans(
parsePlan("DECLARE VARIABLE var1 = 1"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Literal(1, IntegerType), "1"),
replace = false))
comparePlans(
parsePlan("DECLARE VAR var1 = 1"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Literal(1, IntegerType), "1"),
replace = false))
comparePlans(
parsePlan("DECLARE VARIABLE var1 DEFAULT 1"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Literal(1, IntegerType), "1"),
replace = false))
comparePlans(
parsePlan("DECLARE VARIABLE var1 INT DEFAULT 1"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Cast(Literal(1, IntegerType), IntegerType), "1"),
replace = false))
comparePlans(
parsePlan("DECLARE VARIABLE system.session.var1 DEFAULT 1"),
CreateVariable(
UnresolvedIdentifier(Seq("system", "session", "var1")),
DefaultValueExpression(Literal(1, IntegerType), "1"),
replace = false))
comparePlans(
parsePlan("DECLARE VARIABLE session.var1 DEFAULT 1"),
CreateVariable(
UnresolvedIdentifier(Seq("session", "var1")),
DefaultValueExpression(Literal(1, IntegerType), "1"),
replace = false))
comparePlans(
parsePlan("DECLARE VARIABLE var1 STRING DEFAULT CURRENT_DATABASE()"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(
Cast(UnresolvedFunction("CURRENT_DATABASE", Nil, isDistinct = false), StringType),
"CURRENT_DATABASE()"),
replace = false))
comparePlans(
parsePlan("DECLARE VARIABLE var1 INT DEFAULT (SELECT c1 FROM VALUES(1) AS T(c1))"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(
Cast(ScalarSubquery(
Project(UnresolvedAttribute("c1") :: Nil,
SubqueryAlias(Seq("T"),
UnresolvedInlineTable(Seq("c1"), Seq(Literal(1)) :: Nil)))), IntegerType),
"(SELECT c1 FROM VALUES(1) AS T(c1))"),
replace = false))
}

test("declare or replace variable") {
comparePlans(
parsePlan("DECLARE OR REPLACE VARIABLE var1 = 1"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Literal(1, IntegerType), "1"),
replace = true))
comparePlans(
parsePlan("DECLARE OR REPLACE VARIABLE var1 DOUBLE DEFAULT 1 + RAND(5)"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(
Cast(
Add(Literal(1, IntegerType),
UnresolvedFunction("RAND", Seq(Literal(5, IntegerType)), isDistinct = false)),
DoubleType),
"1 + RAND(5)"),
replace = true))
comparePlans(
parsePlan("DECLARE OR REPLACE VARIABLE var1 DEFAULT NULL"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Literal(null, NullType), "NULL"),
replace = true))
comparePlans(
parsePlan("DECLARE OR REPLACE VARIABLE INT DEFAULT 5.0"),
CreateVariable(
UnresolvedIdentifier(Seq("INT")),
DefaultValueExpression(Literal(Decimal("5.0"), DecimalType(2, 1)), "5.0"),
replace = true))
comparePlans(
parsePlan("DECLARE OR REPLACE VARIABLE var1 MAP<string, double> " +
"DEFAULT MAP('Hello', 5.1, 'World', -7.1E10)"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Cast(
UnresolvedFunction("MAP", Seq(
Literal(UTF8String.fromString("Hello"), StringType),
Literal(Decimal("5.1"), DecimalType(2, 1)),
Literal(UTF8String.fromString("World"), StringType),
Literal(-7.1E10, DoubleType)), isDistinct = false),
MapType(StringType, DoubleType)),
"MAP('Hello', 5.1, 'World', -7.1E10)"),
replace = true))
comparePlans(
parsePlan("DECLARE OR REPLACE VARIABLE var1 INT DEFAULT NULL"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Cast(Literal(null, NullType), IntegerType), "NULL"),
replace = true))
comparePlans(
parsePlan("DECLARE OR REPLACE VARIABLE var1 INT DEFAULT 1 / 0"),
CreateVariable(
UnresolvedIdentifier(Seq("var1")),
DefaultValueExpression(Cast(
Divide(Literal(1, IntegerType), Literal(0, IntegerType)), IntegerType),
"1 / 0"),
replace = true))
}

test("declare variable - not support syntax") {
// IF NOT EXISTS
checkError(
exception = intercept[ParseException] {
parsePlan("DECLARE VARIABLE IF NOT EXISTS var1 INT")
},
errorClass = "PARSE_SYNTAX_ERROR",
parameters = Map("error" -> "'EXISTS'", "hint" -> "")
)

// The datatype or default value of a variable must be specified
val sqlText = "DECLARE VARIABLE var1"
checkError(
exception = intercept[ParseException] {
parsePlan(sqlText)
},
errorClass = "INVALID_SQL_SYNTAX.VARIABLE_TYPE_OR_DEFAULT_REQUIRED",
parameters = Map.empty,
context = ExpectedContext(fragment = sqlText, start = 0, stop = 20)
)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.execution.command

import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedIdentifier}
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
import org.apache.spark.sql.catalyst.parser.ParseException
import org.apache.spark.sql.catalyst.plans.logical.DropVariable
import org.apache.spark.sql.test.SharedSparkSession

class DropVariableParserSuite extends AnalysisTest with SharedSparkSession {

test("drop variable") {
comparePlans(
parsePlan("DROP TEMPORARY VARIABLE var1"),
DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = false))
comparePlans(
parsePlan("DROP TEMPORARY VAR var1"),
DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = false))
comparePlans(
parsePlan("DROP TEMPORARY VARIABLE IF EXISTS var1"),
DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = true))
}

test("drop variable - not support syntax 'DROP VARIABLE|VAR'") {
checkError(
exception = intercept[ParseException] {
parsePlan("DROP VARIABLE var1")
},
errorClass = "PARSE_SYNTAX_ERROR",
parameters = Map("error" -> "'VARIABLE'", "hint" -> "")
)
checkError(
exception = intercept[ParseException] {
parsePlan("DROP VAR var1")
},
errorClass = "PARSE_SYNTAX_ERROR",
parameters = Map("error" -> "'VAR'", "hint" -> "")
)
}
}

0 comments on commit 75adc39

Please sign in to comment.