diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql index 6725ce45e72a5..1a454179ef79f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/comments.sql @@ -11,17 +11,19 @@ SELECT /* embedded single line */ 'embedded' AS `second`; SELECT /* both embedded and trailing single line */ 'both' AS third; -- trailing single line SELECT 'before multi-line' AS fourth; +--QUERY-DELIMITER-START -- [SPARK-28880] ANSI SQL: Bracketed comments /* This is an example of SQL which should not execute: * select 'multi-line'; */ SELECT 'after multi-line' AS fifth; +--QUERY-DELIMITER-END -- [SPARK-28880] ANSI SQL: Bracketed comments -- -- Nested comments -- - +--QUERY-DELIMITER-START /* SELECT 'trailing' as x1; -- inside block comment */ @@ -44,5 +46,5 @@ Hoo boy. Still two deep... Now just one deep... */ 'deeply nested example' AS sixth; - +--QUERY-DELIMITER-END /* and this is the end of the file */ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out index 4ea49013a62d1..637c5561bd940 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 13 +-- Number of queries: 7 -- !query @@ -36,129 +36,32 @@ before multi-line -- !query /* This is an example of SQL which should not execute: - * select 'multi-line' --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -/* This is an example of SQL which should not execute: -^^^ - * select 'multi-line' - - --- !query -*/ + * select 'multi-line'; + */ SELECT 'after multi-line' AS fifth -- !query schema -struct<> +struct -- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -*/ -^^^ -SELECT 'after multi-line' AS fifth +after multi-line -- !query /* -SELECT 'trailing' as x1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -/* -^^^ -SELECT 'trailing' as x1 - - --- !query +SELECT 'trailing' as x1; -- inside block comment */ /* This block comment surrounds a query which itself has a block comment... -SELECT /* embedded single line */ 'embedded' AS x2 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -*/ -^^^ - -/* This block comment surrounds a query which itself has a block comment... -SELECT /* embedded single line */ 'embedded' AS x2 - - --- !query +SELECT /* embedded single line */ 'embedded' AS x2; */ SELECT -- continued after the following block comments... /* Deeply nested comment. This includes a single apostrophe to make sure we aren't decoding this part as a string. -SELECT 'deep nest' AS n1 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -extraneous input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -*/ -^^^ - -SELECT -- continued after the following block comments... -/* Deeply nested comment. - This includes a single apostrophe to make sure we aren't decoding this part as a string. -SELECT 'deep nest' AS n1 - - --- !query +SELECT 'deep nest' AS n1; /* Second level of nesting... -SELECT 'deeper nest' as n2 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -/* Second level of nesting... -^^^ -SELECT 'deeper nest' as n2 - - --- !query +SELECT 'deeper nest' as n2; /* Third level of nesting... -SELECT 'deepest nest' as n3 --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -mismatched input '/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) - -== SQL == -/* Third level of nesting... -^^^ -SELECT 'deepest nest' as n3 - - --- !query +SELECT 'deepest nest' as n3; */ Hoo boy. Still two deep... */ @@ -170,11 +73,27 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -mismatched input '*/' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 1, pos 0) +mismatched input ''embedded'' expecting {'(', 'ADD', 'ALTER', 'ANALYZE', 'CACHE', 'CLEAR', 'COMMENT', 'COMMIT', 'CREATE', 'DELETE', 'DESC', 'DESCRIBE', 'DFS', 'DROP', 'EXPLAIN', 'EXPORT', 'FROM', 'GRANT', 'IMPORT', 'INSERT', 'LIST', 'LOAD', 'LOCK', 'MAP', 'MERGE', 'MSCK', 'REDUCE', 'REFRESH', 'REPLACE', 'RESET', 'REVOKE', 'ROLLBACK', 'SELECT', 'SET', 'SHOW', 'START', 'TABLE', 'TRUNCATE', 'UNCACHE', 'UNLOCK', 'UPDATE', 'USE', 'VALUES', 'WITH'}(line 6, pos 34) == SQL == +/* +SELECT 'trailing' as x1; -- inside block comment +*/ + +/* This block comment surrounds a query which itself has a block comment... +SELECT /* embedded single line */ 'embedded' AS x2; +----------------------------------^^^ +*/ + +SELECT -- continued after the following block comments... +/* Deeply nested comment. + This includes a single apostrophe to make sure we aren't decoding this part as a string. +SELECT 'deep nest' AS n1; +/* Second level of nesting... +SELECT 'deeper nest' as n2; +/* Third level of nesting... +SELECT 'deepest nest' as n3; */ -^^^ Hoo boy. Still two deep... */ Now just one deep... diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 6b9e5bbd3c961..da4727f6a98cb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -19,7 +19,9 @@ package org.apache.spark.sql import java.io.File import java.util.{Locale, TimeZone} +import java.util.regex.Pattern +import scala.collection.mutable.{ArrayBuffer, HashMap} import scala.util.control.NonFatal import org.apache.spark.{SparkConf, SparkException} @@ -62,7 +64,12 @@ import org.apache.spark.tags.ExtendedSQLTest * }}} * * The format for input files is simple: - * 1. A list of SQL queries separated by semicolon. + * 1. A list of SQL queries separated by semicolons by default. If the semicolon cannot effectively + * separate the SQL queries in the test file(e.g. bracketed comments), please use + * --QUERY-DELIMITER-START and --QUERY-DELIMITER-END. Lines starting with + * --QUERY-DELIMITER-START and --QUERY-DELIMITER-END represent the beginning and end of a query, + * respectively. Code that is not surrounded by lines that begin with --QUERY-DELIMITER-START + * and --QUERY-DELIMITER-END is still separated by semicolons. * 2. Lines starting with -- are treated as comments and ignored. * 3. Lines starting with --SET are used to specify the configs when running this testing file. You * can set multiple configs in one --SET, using comma to separate them. Or you can use multiple @@ -246,9 +253,15 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { /** Run a test case. */ protected def runTest(testCase: TestCase): Unit = { + def splitWithSemicolon(seq: Seq[String]) = { + seq.mkString("\n").split("(?<=[^\\\\]);") + } val input = fileToString(new File(testCase.inputFile)) - val (comments, code) = input.split("\n").partition(_.trim.startsWith("--")) + val (comments, code) = input.split("\n").partition { line => + val newLine = line.trim + newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER") + } // If `--IMPORT` found, load code from another test case file, then insert them // into the head in this test. @@ -261,10 +274,38 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { } }.flatten + val allCode = importedCode ++ code + val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { + // Although the loop is heavy, only used for bracketed comments test. + val querys = new ArrayBuffer[String] + val otherCodes = new ArrayBuffer[String] + var tempStr = "" + var start = false + for (c <- allCode) { + if (c.trim.startsWith("--QUERY-DELIMITER-START")) { + start = true + querys ++= splitWithSemicolon(otherCodes.toSeq) + otherCodes.clear() + } else if (c.trim.startsWith("--QUERY-DELIMITER-END")) { + start = false + querys += s"\n${tempStr.stripSuffix(";")}" + tempStr = "" + } else if (start) { + tempStr += s"\n$c" + } else { + otherCodes += c + } + } + if (otherCodes.nonEmpty) { + querys ++= splitWithSemicolon(otherCodes.toSeq) + } + querys.toSeq + } else { + splitWithSemicolon(allCode).toSeq + } + // List of SQL queries to run - // note: this is not a robust way to split queries using semicolon, but works for now. - val queries = (importedCode ++ code).mkString("\n").split("(?<=[^\\\\]);") - .map(_.trim).filter(_ != "").toSeq + val queries = tempQueries.map(_.trim).filter(_ != "").toSeq // Fix misplacement when comment is at the end of the query. .map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "")