Skip to content

Commit

Permalink
[SPARK-30758][SQL][TESTS] Improve bracketed comments tests
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Although Spark SQL support bracketed comments, but `SQLQueryTestSuite` can't treat bracketed comments well and lead to generated golden files can't display bracketed comments well.
This PR will improve the treatment of bracketed comments and add three test case in `PlanParserSuite`.
Spark SQL can't support nested bracketed comments and #27495 used to support it.

### Why are the changes needed?
Golden files can't display well.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
New UT.

Closes #27481 from beliefer/ansi-brancket-comments.

Authored-by: beliefer <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
beliefer authored and cloud-fan committed Feb 13, 2020
1 parent a6b4b91 commit 04604b9
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 116 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,19 @@ SELECT /* embedded single line */ 'embedded' AS `second`;
SELECT /* both embedded and trailing single line */ 'both' AS third; -- trailing single line

SELECT 'before multi-line' AS fourth;
--QUERY-DELIMITER-START
-- [SPARK-28880] ANSI SQL: Bracketed comments
/* This is an example of SQL which should not execute:
* select 'multi-line';
*/
SELECT 'after multi-line' AS fifth;
--QUERY-DELIMITER-END

-- [SPARK-28880] ANSI SQL: Bracketed comments
--
-- Nested comments
--

--QUERY-DELIMITER-START
/*
SELECT 'trailing' as x1; -- inside block comment
*/
Expand All @@ -44,5 +46,5 @@ Hoo boy. Still two deep...
Now just one deep...
*/
'deeply nested example' AS sixth;

--QUERY-DELIMITER-END
/* and this is the end of the file */
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 13
-- Number of queries: 7


-- !query
Expand Down Expand Up @@ -36,129 +36,32 @@ before multi-line

-- !query
/* This is an example of SQL which should not execute:
* select 'multi-line'
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
/* This is an example of SQL which should not execute:
^^^
* select 'multi-line'


-- !query
*/
* select 'multi-line';
*/
SELECT 'after multi-line' AS fifth
-- !query schema
struct<>
struct<fifth:string>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
*/
^^^
SELECT 'after multi-line' AS fifth
after multi-line


-- !query
/*
SELECT 'trailing' as x1
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
/*
^^^
SELECT 'trailing' as x1


-- !query
SELECT 'trailing' as x1; -- inside block comment
*/

/* This block comment surrounds a query which itself has a block comment...
SELECT /* embedded single line */ 'embedded' AS x2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
*/
^^^

/* This block comment surrounds a query which itself has a block comment...
SELECT /* embedded single line */ 'embedded' AS x2


-- !query
SELECT /* embedded single line */ 'embedded' AS x2;
*/

SELECT -- continued after the following block comments...
/* Deeply nested comment.
This includes a single apostrophe to make sure we aren't decoding this part as a string.
SELECT 'deep nest' AS n1
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
*/
^^^

SELECT -- continued after the following block comments...
/* Deeply nested comment.
This includes a single apostrophe to make sure we aren't decoding this part as a string.
SELECT 'deep nest' AS n1


-- !query
SELECT 'deep nest' AS n1;
/* Second level of nesting...
SELECT 'deeper nest' as n2
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
/* Second level of nesting...
^^^
SELECT 'deeper nest' as n2


-- !query
SELECT 'deeper nest' as n2;
/* Third level of nesting...
SELECT 'deepest nest' as n3
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)

== SQL ==
/* Third level of nesting...
^^^
SELECT 'deepest nest' as n3


-- !query
SELECT 'deepest nest' as n3;
*/
Hoo boy. Still two deep...
*/
Expand All @@ -170,11 +73,27 @@ struct<>
-- !query output
org.apache.spark.sql.catalyst.parser.ParseException

mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0)
mismatched input ''embedded'' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 6, pos 34)

== SQL ==
/*
SELECT 'trailing' as x1; -- inside block comment
*/

/* This block comment surrounds a query which itself has a block comment...
SELECT /* embedded single line */ 'embedded' AS x2;
----------------------------------^^^
*/

SELECT -- continued after the following block comments...
/* Deeply nested comment.
This includes a single apostrophe to make sure we aren't decoding this part as a string.
SELECT 'deep nest' AS n1;
/* Second level of nesting...
SELECT 'deeper nest' as n2;
/* Third level of nesting...
SELECT 'deepest nest' as n3;
*/
^^^
Hoo boy. Still two deep...
*/
Now just one deep...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ package org.apache.spark.sql

import java.io.File
import java.util.{Locale, TimeZone}
import java.util.regex.Pattern

import scala.collection.mutable.{ArrayBuffer, HashMap}
import scala.util.control.NonFatal

import org.apache.spark.{SparkConf, SparkException}
Expand Down Expand Up @@ -62,7 +64,12 @@ import org.apache.spark.tags.ExtendedSQLTest
* }}}
*
* The format for input files is simple:
* 1. A list of SQL queries separated by semicolon.
* 1. A list of SQL queries separated by semicolons by default. If the semicolon cannot effectively
* separate the SQL queries in the test file(e.g. bracketed comments), please use
* --QUERY-DELIMITER-START and --QUERY-DELIMITER-END. Lines starting with
* --QUERY-DELIMITER-START and --QUERY-DELIMITER-END represent the beginning and end of a query,
* respectively. Code that is not surrounded by lines that begin with --QUERY-DELIMITER-START
* and --QUERY-DELIMITER-END is still separated by semicolons.
* 2. Lines starting with -- are treated as comments and ignored.
* 3. Lines starting with --SET are used to specify the configs when running this testing file. You
* can set multiple configs in one --SET, using comma to separate them. Or you can use multiple
Expand Down Expand Up @@ -246,9 +253,15 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {

/** Run a test case. */
protected def runTest(testCase: TestCase): Unit = {
def splitWithSemicolon(seq: Seq[String]) = {
seq.mkString("\n").split("(?<=[^\\\\]);")
}
val input = fileToString(new File(testCase.inputFile))

val (comments, code) = input.split("\n").partition(_.trim.startsWith("--"))
val (comments, code) = input.split("\n").partition { line =>
val newLine = line.trim
newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER")
}

// If `--IMPORT` found, load code from another test case file, then insert them
// into the head in this test.
Expand All @@ -261,10 +274,38 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession {
}
}.flatten

val allCode = importedCode ++ code
val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) {
// Although the loop is heavy, only used for bracketed comments test.
val querys = new ArrayBuffer[String]
val otherCodes = new ArrayBuffer[String]
var tempStr = ""
var start = false
for (c <- allCode) {
if (c.trim.startsWith("--QUERY-DELIMITER-START")) {
start = true
querys ++= splitWithSemicolon(otherCodes.toSeq)
otherCodes.clear()
} else if (c.trim.startsWith("--QUERY-DELIMITER-END")) {
start = false
querys += s"\n${tempStr.stripSuffix(";")}"
tempStr = ""
} else if (start) {
tempStr += s"\n$c"
} else {
otherCodes += c
}
}
if (otherCodes.nonEmpty) {
querys ++= splitWithSemicolon(otherCodes.toSeq)
}
querys.toSeq
} else {
splitWithSemicolon(allCode).toSeq
}

// List of SQL queries to run
// note: this is not a robust way to split queries using semicolon, but works for now.
val queries = (importedCode ++ code).mkString("\n").split("(?<=[^\\\\]);")
.map(_.trim).filter(_ != "").toSeq
val queries = tempQueries.map(_.trim).filter(_ != "").toSeq
// Fix misplacement when comment is at the end of the query.
.map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "")

Expand Down

0 comments on commit 04604b9

Please sign in to comment.