From 75adc39ee486c8427e94939b5350c8f6a6aa70cf Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 24 Jul 2024 22:52:04 +0800 Subject: [PATCH] [SPARK-48990][SQL] Unified variable related SQL syntax keywords ### What changes were proposed in this pull request? The pr aims to unified `variable` related `SQL syntax` keywords, enable syntax `DECLARE (OR REPLACE)? ...` and `DROP TEMPORARY ...` to support keyword: `VAR` (not only `VARIABLE`). ### Why are the changes needed? When `setting` variables, we support `(VARIABLE | VAR)`, but when `declaring` and `dropping` variables, we only support the keyword `VARIABLE` (not support the keyword `VAR`) image https://github.com/apache/spark/blob/285489b0225004e918b6e937f7367e492292815e/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4#L68-L72 https://github.com/apache/spark/blob/285489b0225004e918b6e937f7367e492292815e/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4#L218-L220 The syntax seems `a bit weird`, `inconsistent experience` in SQL syntax related to variable usage by end-users, so I propose to `unify` it. ### Does this PR introduce _any_ user-facing change? Yes, enable end-users to use `variable related SQL` with `consistent` keywords. ### How was this patch tested? Updated existed UT. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47469 from panbingkun/SPARK-48990. Authored-by: panbingkun Signed-off-by: Wenchen Fan --- docs/sql-ref-syntax-ddl-declare-variable.md | 2 +- docs/sql-ref-syntax-ddl-drop-variable.md | 2 +- .../sql/catalyst/parser/SqlBaseParser.g4 | 17 +- .../sql-session-variables.sql.out | 4 +- .../inputs/sql-session-variables.sql | 4 +- .../results/sql-session-variables.sql.out | 4 +- .../command/DeclareVariableParserSuite.scala | 188 ++++++++++++++++++ .../command/DropVariableParserSuite.scala | 56 ++++++ 8 files changed, 263 insertions(+), 14 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala diff --git a/docs/sql-ref-syntax-ddl-declare-variable.md b/docs/sql-ref-syntax-ddl-declare-variable.md index 518770c4496ea..ba9857bf1917a 100644 --- a/docs/sql-ref-syntax-ddl-declare-variable.md +++ b/docs/sql-ref-syntax-ddl-declare-variable.md @@ -34,7 +34,7 @@ column default expressions, and generated column expressions. ### Syntax ```sql -DECLARE [ OR REPLACE ] [ VARIABLE ] +DECLARE [ OR REPLACE ] [ VAR | VARIABLE ] variable_name [ data_type ] [ { DEFAULT | = } default_expr ] ``` diff --git a/docs/sql-ref-syntax-ddl-drop-variable.md b/docs/sql-ref-syntax-ddl-drop-variable.md index f58d944317d5e..5b2b0da769459 100644 --- a/docs/sql-ref-syntax-ddl-drop-variable.md +++ b/docs/sql-ref-syntax-ddl-drop-variable.md @@ -27,7 +27,7 @@ be thrown if the variable does not exist. ### Syntax ```sql -DROP TEMPORARY VARIABLE [ IF EXISTS ] variable_name +DROP TEMPORARY { VAR | VARIABLE } [ IF EXISTS ] variable_name ``` ### Parameters diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index a50051715e207..c7aa56cf920ad 100644 --- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -66,8 +66,8 @@ compoundStatement ; setStatementWithOptionalVarKeyword - : SET (VARIABLE | VAR)? assignmentList #setVariableWithOptionalKeyword - | SET (VARIABLE | VAR)? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ + : SET variable? assignmentList #setVariableWithOptionalKeyword + | SET variable? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ LEFT_PAREN query RIGHT_PAREN #setVariableWithOptionalKeyword ; @@ -215,9 +215,9 @@ statement routineCharacteristics RETURN (query | expression) #createUserDefinedFunction | DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference #dropFunction - | DECLARE (OR REPLACE)? VARIABLE? + | DECLARE (OR REPLACE)? variable? identifierReference dataType? variableDefaultExpression? #createVariable - | DROP TEMPORARY VARIABLE (IF EXISTS)? identifierReference #dropVariable + | DROP TEMPORARY variable (IF EXISTS)? identifierReference #dropVariable | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)? (statement|setResetStatement) #explain | SHOW TABLES ((FROM | IN) identifierReference)? @@ -272,8 +272,8 @@ setResetStatement | SET TIME ZONE interval #setTimeZone | SET TIME ZONE timezone #setTimeZone | SET TIME ZONE .*? #setTimeZone - | SET (VARIABLE | VAR) assignmentList #setVariable - | SET (VARIABLE | VAR) LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ + | SET variable assignmentList #setVariable + | SET variable LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ LEFT_PAREN query RIGHT_PAREN #setVariable | SET configKey EQ configValue #setQuotedConfiguration | SET configKey (EQ .*?)? #setConfiguration @@ -438,6 +438,11 @@ namespaces | SCHEMAS ; +variable + : VARIABLE + | VAR + ; + describeFuncName : identifierReference | stringLit diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out index f5ce5ed2e8b6e..eb48f0d9a28f0 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out @@ -150,7 +150,7 @@ SetVariable [variablereference(system.session.title='Create Variable - Failure C -- !query -DECLARE VARIABLE var1 INT +DECLARE VAR var1 INT -- !query analysis CreateVariable defaultvalueexpression(null, null), false +- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1 @@ -164,7 +164,7 @@ Project [variablereference(system.session.var1=CAST(NULL AS INT)) AS var1#x] -- !query -DROP TEMPORARY VARIABLE var1 +DROP TEMPORARY VAR var1 -- !query analysis DropVariable false +- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1 diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql index 2dd205adfa04b..d876be1bb6bcd 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql @@ -26,9 +26,9 @@ DECLARE VARIABLE IF NOT EXISTS var1 INT; DROP TEMPORARY VARIABLE IF EXISTS var1; SET VARIABLE title = 'Drop Variable'; -DECLARE VARIABLE var1 INT; +DECLARE VAR var1 INT; SELECT var1; -DROP TEMPORARY VARIABLE var1; +DROP TEMPORARY VAR var1; -- Variable is gone SELECT var1; diff --git a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out index 67f867e257419..73d3ec737085f 100644 --- a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out @@ -168,7 +168,7 @@ struct<> -- !query -DECLARE VARIABLE var1 INT +DECLARE VAR var1 INT -- !query schema struct<> -- !query output @@ -184,7 +184,7 @@ NULL -- !query -DROP TEMPORARY VARIABLE var1 +DROP TEMPORARY VAR var1 -- !query schema struct<> -- !query output diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala new file mode 100644 index 0000000000000..a292afe6a7c28 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedFunction, UnresolvedIdentifier, UnresolvedInlineTable} +import org.apache.spark.sql.catalyst.expressions.{Add, Cast, Divide, Literal, ScalarSubquery} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.plans.logical.{CreateVariable, DefaultValueExpression, Project, SubqueryAlias} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{Decimal, DecimalType, DoubleType, IntegerType, MapType, NullType, StringType} +import org.apache.spark.unsafe.types.UTF8String + +class DeclareVariableParserSuite extends AnalysisTest with SharedSparkSession { + + test("declare variable") { + comparePlans( + parsePlan("DECLARE var1 INT = 1"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Cast(Literal(1, IntegerType), IntegerType), "1"), + replace = false)) + comparePlans( + parsePlan("DECLARE var1 INT"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Literal(null, IntegerType), "null"), + replace = false)) + comparePlans( + parsePlan("DECLARE var1 = 1"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Literal(1, IntegerType), "1"), + replace = false)) + comparePlans( + parsePlan("DECLARE VARIABLE var1 = 1"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Literal(1, IntegerType), "1"), + replace = false)) + comparePlans( + parsePlan("DECLARE VAR var1 = 1"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Literal(1, IntegerType), "1"), + replace = false)) + comparePlans( + parsePlan("DECLARE VARIABLE var1 DEFAULT 1"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Literal(1, IntegerType), "1"), + replace = false)) + comparePlans( + parsePlan("DECLARE VARIABLE var1 INT DEFAULT 1"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Cast(Literal(1, IntegerType), IntegerType), "1"), + replace = false)) + comparePlans( + parsePlan("DECLARE VARIABLE system.session.var1 DEFAULT 1"), + CreateVariable( + UnresolvedIdentifier(Seq("system", "session", "var1")), + DefaultValueExpression(Literal(1, IntegerType), "1"), + replace = false)) + comparePlans( + parsePlan("DECLARE VARIABLE session.var1 DEFAULT 1"), + CreateVariable( + UnresolvedIdentifier(Seq("session", "var1")), + DefaultValueExpression(Literal(1, IntegerType), "1"), + replace = false)) + comparePlans( + parsePlan("DECLARE VARIABLE var1 STRING DEFAULT CURRENT_DATABASE()"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression( + Cast(UnresolvedFunction("CURRENT_DATABASE", Nil, isDistinct = false), StringType), + "CURRENT_DATABASE()"), + replace = false)) + comparePlans( + parsePlan("DECLARE VARIABLE var1 INT DEFAULT (SELECT c1 FROM VALUES(1) AS T(c1))"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression( + Cast(ScalarSubquery( + Project(UnresolvedAttribute("c1") :: Nil, + SubqueryAlias(Seq("T"), + UnresolvedInlineTable(Seq("c1"), Seq(Literal(1)) :: Nil)))), IntegerType), + "(SELECT c1 FROM VALUES(1) AS T(c1))"), + replace = false)) + } + + test("declare or replace variable") { + comparePlans( + parsePlan("DECLARE OR REPLACE VARIABLE var1 = 1"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Literal(1, IntegerType), "1"), + replace = true)) + comparePlans( + parsePlan("DECLARE OR REPLACE VARIABLE var1 DOUBLE DEFAULT 1 + RAND(5)"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression( + Cast( + Add(Literal(1, IntegerType), + UnresolvedFunction("RAND", Seq(Literal(5, IntegerType)), isDistinct = false)), + DoubleType), + "1 + RAND(5)"), + replace = true)) + comparePlans( + parsePlan("DECLARE OR REPLACE VARIABLE var1 DEFAULT NULL"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Literal(null, NullType), "NULL"), + replace = true)) + comparePlans( + parsePlan("DECLARE OR REPLACE VARIABLE INT DEFAULT 5.0"), + CreateVariable( + UnresolvedIdentifier(Seq("INT")), + DefaultValueExpression(Literal(Decimal("5.0"), DecimalType(2, 1)), "5.0"), + replace = true)) + comparePlans( + parsePlan("DECLARE OR REPLACE VARIABLE var1 MAP " + + "DEFAULT MAP('Hello', 5.1, 'World', -7.1E10)"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Cast( + UnresolvedFunction("MAP", Seq( + Literal(UTF8String.fromString("Hello"), StringType), + Literal(Decimal("5.1"), DecimalType(2, 1)), + Literal(UTF8String.fromString("World"), StringType), + Literal(-7.1E10, DoubleType)), isDistinct = false), + MapType(StringType, DoubleType)), + "MAP('Hello', 5.1, 'World', -7.1E10)"), + replace = true)) + comparePlans( + parsePlan("DECLARE OR REPLACE VARIABLE var1 INT DEFAULT NULL"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Cast(Literal(null, NullType), IntegerType), "NULL"), + replace = true)) + comparePlans( + parsePlan("DECLARE OR REPLACE VARIABLE var1 INT DEFAULT 1 / 0"), + CreateVariable( + UnresolvedIdentifier(Seq("var1")), + DefaultValueExpression(Cast( + Divide(Literal(1, IntegerType), Literal(0, IntegerType)), IntegerType), + "1 / 0"), + replace = true)) + } + + test("declare variable - not support syntax") { + // IF NOT EXISTS + checkError( + exception = intercept[ParseException] { + parsePlan("DECLARE VARIABLE IF NOT EXISTS var1 INT") + }, + errorClass = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "'EXISTS'", "hint" -> "") + ) + + // The datatype or default value of a variable must be specified + val sqlText = "DECLARE VARIABLE var1" + checkError( + exception = intercept[ParseException] { + parsePlan(sqlText) + }, + errorClass = "INVALID_SQL_SYNTAX.VARIABLE_TYPE_OR_DEFAULT_REQUIRED", + parameters = Map.empty, + context = ExpectedContext(fragment = sqlText, start = 0, stop = 20) + ) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala new file mode 100644 index 0000000000000..f2af7e5dedca0 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.command + +import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedIdentifier} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan +import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.plans.logical.DropVariable +import org.apache.spark.sql.test.SharedSparkSession + +class DropVariableParserSuite extends AnalysisTest with SharedSparkSession { + + test("drop variable") { + comparePlans( + parsePlan("DROP TEMPORARY VARIABLE var1"), + DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = false)) + comparePlans( + parsePlan("DROP TEMPORARY VAR var1"), + DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = false)) + comparePlans( + parsePlan("DROP TEMPORARY VARIABLE IF EXISTS var1"), + DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = true)) + } + + test("drop variable - not support syntax 'DROP VARIABLE|VAR'") { + checkError( + exception = intercept[ParseException] { + parsePlan("DROP VARIABLE var1") + }, + errorClass = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "'VARIABLE'", "hint" -> "") + ) + checkError( + exception = intercept[ParseException] { + parsePlan("DROP VAR var1") + }, + errorClass = "PARSE_SYNTAX_ERROR", + parameters = Map("error" -> "'VAR'", "hint" -> "") + ) + } +}