-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-30758][SQL][TESTS] Improve bracketed comments tests. #27481
Changes from 10 commits
2f3a54c
4619ac7
c743bf3
7f21b1b
71e4c43
5a70f00
3f8497f
a512664
38005e8
fa8397e
900cc73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,9 @@ package org.apache.spark.sql | |
|
||
import java.io.File | ||
import java.util.{Locale, TimeZone} | ||
import java.util.regex.Pattern | ||
|
||
import scala.collection.mutable.{ArrayBuffer, HashMap} | ||
import scala.util.control.NonFatal | ||
|
||
import org.apache.spark.{SparkConf, SparkException} | ||
|
@@ -62,7 +64,12 @@ import org.apache.spark.tags.ExtendedSQLTest | |
* }}} | ||
* | ||
* The format for input files is simple: | ||
* 1. A list of SQL queries separated by semicolon. | ||
* 1. A list of SQL queries separated by semicolons by default. If the semicolon cannot effectively | ||
* separate the SQL queries in the test file(e.g. bracketed comments), please use | ||
* --QUERY-DELIMITER-START and --QUERY-DELIMITER-END. Lines starting with | ||
* --QUERY-DELIMITER-START and --QUERY-DELIMITER-END represent the beginning and end of a query, | ||
* respectively. Code that is not surrounded by lines that begin with --QUERY-DELIMITER-START | ||
* and --QUERY-DELIMITER-END is still separated by semicolons. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is better than my original idea. We only need to use this special delimiter for queries that need it. Good job! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! |
||
* 2. Lines starting with -- are treated as comments and ignored. | ||
* 3. Lines starting with --SET are used to specify the configs when running this testing file. You | ||
* can set multiple configs in one --SET, using comma to separate them. Or you can use multiple | ||
|
@@ -246,9 +253,15 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { | |
|
||
/** Run a test case. */ | ||
protected def runTest(testCase: TestCase): Unit = { | ||
def splitWithSemicolon(seq: Seq[String]) = { | ||
seq.mkString("\n").split("(?<=[^\\\\]);") | ||
} | ||
val input = fileToString(new File(testCase.inputFile)) | ||
|
||
val (comments, code) = input.split("\n").partition(_.trim.startsWith("--")) | ||
val (comments, code) = input.split("\n").partition { line => | ||
val newLine = line.trim | ||
newLine.startsWith("--") && !newLine.startsWith("--QUERY-DELIMITER") | ||
} | ||
|
||
// If `--IMPORT` found, load code from another test case file, then insert them | ||
// into the head in this test. | ||
|
@@ -261,10 +274,38 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession { | |
} | ||
}.flatten | ||
|
||
val allCode = importedCode ++ code | ||
val tempQueries = if (allCode.exists(_.trim.startsWith("--QUERY-DELIMITER"))) { | ||
// Although the loop is heavy, only used for nested comments test. | ||
val querys = new ArrayBuffer[String] | ||
val otherCodes = new ArrayBuffer[String] | ||
var tempStr = "" | ||
var start = false | ||
for (c <- allCode) { | ||
if (c.trim.startsWith("--QUERY-DELIMITER-START")) { | ||
start = true | ||
querys ++= splitWithSemicolon(otherCodes.toSeq) | ||
otherCodes.clear() | ||
} else if (c.trim.startsWith("--QUERY-DELIMITER-END")) { | ||
start = false | ||
querys += s"\n${tempStr.stripSuffix(";")}" | ||
tempStr = "" | ||
} else if (start) { | ||
tempStr += s"\n$c" | ||
} else { | ||
otherCodes += c | ||
} | ||
} | ||
if (otherCodes.nonEmpty) { | ||
querys ++= splitWithSemicolon(otherCodes.toSeq) | ||
} | ||
querys.toSeq | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After the lookp ends, it's possible that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. I forgot it. |
||
} else { | ||
splitWithSemicolon(allCode).toSeq | ||
} | ||
|
||
// List of SQL queries to run | ||
// note: this is not a robust way to split queries using semicolon, but works for now. | ||
val queries = (importedCode ++ code).mkString("\n").split("(?<=[^\\\\]);") | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.map(_.trim).filter(_ != "").toSeq | ||
val queries = tempQueries.map(_.trim).filter(_ != "").toSeq | ||
// Fix misplacement when comment is at the end of the query. | ||
.map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_ != "") | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, the output is pretty nice.