Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-44268][CORE][TEST][FOLLOWUP] Add test to generate sql-error-conditions doc automatic #41865

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common/utils/src/main/resources/error/error-classes.json
Original file line number Diff line number Diff line change
Expand Up @@ -1214,7 +1214,7 @@
},
"UNEXPECTED_TOKEN" : {
"message" : [
"Found the unexpected <token> in the format string; the structure of the format string must match: [MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]."
"Found the unexpected <token> in the format string; the structure of the format string must match: `[MI|S]` `[$]` `[0|9|G|,]*` `[.|D]` `[0|9]*` `[$]` `[PR|MI|S]`."
]
},
"WRONG_NUM_DIGIT" : {
Expand Down
219 changes: 176 additions & 43 deletions core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ package org.apache.spark
import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.util.Locale

import scala.util.Properties.lineSeparator
import scala.util.matching.Regex

import com.fasterxml.jackson.annotation.JsonInclude.Include
import com.fasterxml.jackson.core.JsonParser.Feature.STRICT_DUPLICATE_DETECTION
Expand All @@ -45,6 +47,12 @@ class SparkThrowableSuite extends SparkFunSuite {
SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
"core/testOnly *SparkThrowableSuite -- -t \"Error classes are correctly formatted\""
}}}

To regenerate the error class document. Run:
{{{
SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
"core/testOnly *SparkThrowableSuite -- -t \"Error classes match with document\""
}}}
*/
private val errorJsonFilePath = getWorkspaceFilePath(
"common", "utils", "src", "main", "resources", "error", "error-classes.json")
Expand Down Expand Up @@ -141,54 +149,179 @@ class SparkThrowableSuite extends SparkFunSuite {
checkIfUnique(messageFormats)
}

test("SPARK-44268: Error classes match with document") {
val sqlstateDoc = "sql-error-conditions-sqlstates.md"
test("Error classes match with document") {
val errors = errorReader.errorInfoMap
val errorDocPaths = getWorkspaceFilePath("docs").toFile
.listFiles(_.getName.startsWith("sql-error-conditions-"))
.filter(!_.getName.equals(sqlstateDoc))
.map(f => IOUtils.toString(f.toURI, StandardCharsets.UTF_8)).map(_.split("\n"))
// check the error classes in document should be in error-classes.json
val linkInDocRegex = "\\[(.*)\\]\\((.*)\\)".r
val commonErrorsInDoc = IOUtils.toString(getWorkspaceFilePath("docs",
"sql-error-conditions.md").toUri, StandardCharsets.UTF_8).split("\n")
.filter(_.startsWith("###")).map(s => s.replace("###", "").trim)
.filter(linkInDocRegex.findFirstMatchIn(_).isEmpty)

commonErrorsInDoc.foreach(s => assert(errors.contains(s),
s"Error class: $s is not in error-classes.json"))

val titlePrefix = "title:"
val errorsInDoc = errorDocPaths.map(lines => {
val errorClass = lines.filter(_.startsWith(titlePrefix))
.map(s => s.replace("error class", "").replace(titlePrefix, "").trim).head
assert(errors.contains(errorClass), s"Error class: $errorClass is not in error-classes.json")
val subClasses = lines.filter(_.startsWith("##")).map(s => s.replace("##", "").trim)
.map { s =>
assert(errors(errorClass).subClass.get.contains(s),
s"Error class: $errorClass does not contain sub class: $s in error-classes.json")
s
}
errorClass -> subClasses

// the black list of error class name which should not add quote
val contentQuoteBlackList = Seq(
"INCOMPLETE_TYPE_DEFINITION.MAP",
"INCOMPLETE_TYPE_DEFINITION.STRUCT")

def quoteParameter(content: String, errorName: String): String = {
if (contentQuoteBlackList.contains(errorName)) {
content
} else {
"<(.*?)>".r.replaceAllIn(content, (m: Regex.Match) => {
val matchStr = m.group(1)
if (matchStr.nonEmpty) {
s"`<$matchStr>`"
} else {
m.matched
}
}).replaceAll("%(.*?)\\$", "`\\%$1\\$`")
}
}

val sqlStates = IOUtils.toString(getWorkspaceFilePath("docs",
"sql-error-conditions-sqlstates.md").toUri, StandardCharsets.UTF_8).split("\n")
.filter(_.startsWith("##")).map(s => {

val errorHeader = s.split("[`|:|#|\\s]+").filter(_.nonEmpty)
val sqlState = errorHeader(1)
(sqlState, errorHeader.head.toLowerCase(Locale.ROOT) + "-" + sqlState + "-" +
errorHeader.takeRight(errorHeader.length - 2).mkString("-").toLowerCase(Locale.ROOT))
}).toMap

// check the error classes in error-classes.json should be in document
errors.foreach { e =>
val errorClass = e._1
val subClasses = e._2.subClass.getOrElse(Map.empty).keys.toSeq
if (subClasses.nonEmpty) {
assert(errorsInDoc.contains(errorClass),
s"Error class: $errorClass do not have sql-error-conditions sub doc, please create it")
val subClassesInDoc = errorsInDoc(errorClass)
subClasses.foreach { s =>
assert(subClassesInDoc.contains(s),
s"Error class: $errorClass contains sub class: $s which is not in " +
s"sql-error-conditions sub doc")
def getSqlState(sqlState: Option[String]): String = {
if (sqlState.isDefined) {
val prefix = sqlState.get.substring(0, 2)
if (sqlStates.contains(prefix)) {
s"[SQLSTATE: ${sqlState.get}](sql-error-conditions-sqlstates.html#${sqlStates(prefix)})"
} else {
"SQLSTATE: " + sqlState.get
}
} else {
"SQLSTATE: none assigned"
}
}

def getErrorPath(error: String): String = {
s"sql-error-conditions-${error.toLowerCase(Locale.ROOT).replaceAll("_", "-")}-error-class"
}

def getHeader(title: String): String = {
s"""---
|layout: global
|title: $title
|displayTitle: $title
|license: |
| Licensed to the Apache Software Foundation (ASF) under one or more
| contributor license agreements. See the NOTICE file distributed with
| this work for additional information regarding copyright ownership.
| The ASF licenses this file to You under the Apache License, Version 2.0
| (the "License"); you may not use this file except in compliance with
| the License. You may obtain a copy of the License at
|
| http://www.apache.org/licenses/LICENSE-2.0
|
| Unless required by applicable law or agreed to in writing, software
| distributed under the License is distributed on an "AS IS" BASIS,
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
| See the License for the specific language governing permissions and
| limitations under the License.
|---""".stripMargin
}

val sqlErrorParentDocContent = errors.toSeq.filter(!_._1.startsWith("_LEGACY_ERROR_TEMP_"))
.sortBy(_._1).map(error => {
val name = error._1
val info = error._2
if (info.subClass.isDefined) {
val title = s"[$name](${getErrorPath(name)}.html)"
s"""|### $title
|
|${getSqlState(info.sqlState)}
|
|${quoteParameter(info.messageTemplate, name)}
|
|For more details see $title
|""".stripMargin
} else {
s"""|### $name
|
|${getSqlState(info.sqlState)}
|
|${quoteParameter(info.messageTemplate, name)}
|""".stripMargin
}
}).mkString("\n")

val sqlErrorParentDoc =
s"""${getHeader("Error Conditions")}
|
|This is a list of common, named error conditions returned by Spark SQL.
|
|Also see [SQLSTATE Codes](sql-error-conditions-sqlstates.html).
|
|$sqlErrorParentDocContent
|""".stripMargin

errors.filter(_._2.subClass.isDefined).foreach(error => {
val name = error._1
val info = error._2

val subErrorContent = info.subClass.get.toSeq.sortBy(_._1).map(subError => {
s"""|## ${subError._1}
|
|${quoteParameter(subError._2.messageTemplate, s"$name.${subError._1}")}
|""".stripMargin
}).mkString("\n")

val subErrorDoc =
s"""${getHeader(name + " error class")}
|
|${getSqlState(info.sqlState)}
|
|${quoteParameter(info.messageTemplate, name)}
|
|This error class has the following derived error classes:
|
|$subErrorContent
|""".stripMargin

val errorDocPath = getWorkspaceFilePath("docs", getErrorPath(name) + ".md")
val errorsInDoc = if (errorDocPath.toFile.exists()) {
IOUtils.toString(errorDocPath.toUri, StandardCharsets.UTF_8)
} else {
""
}
if (regenerateGoldenFiles) {
if (subErrorDoc.trim != errorsInDoc.trim) {
logInfo(s"Regenerating sub error class document $errorDocPath")
if (errorDocPath.toFile.exists()) {
Files.delete(errorDocPath)
}
FileUtils.writeStringToFile(
errorDocPath.toFile,
subErrorDoc + lineSeparator,
StandardCharsets.UTF_8)
}
} else if (!errorClass.startsWith("_LEGACY_ERROR_TEMP_")) {
assert(commonErrorsInDoc.contains(errorClass),
s"Error class: $errorClass is not in sql-error-conditions.md")
} else {
assert(subErrorDoc.trim == errorsInDoc.trim,
"The error class document is not up to date. Please regenerate it.")
}
})

val parentDocPath = getWorkspaceFilePath("docs", "sql-error-conditions.md")
val commonErrorsInDoc = if (parentDocPath.toFile.exists()) {
IOUtils.toString(parentDocPath.toUri, StandardCharsets.UTF_8)
} else {
""
}
if (regenerateGoldenFiles) {
if (sqlErrorParentDoc.trim != commonErrorsInDoc.trim) {
logInfo(s"Regenerating error class document $parentDocPath")
if (parentDocPath.toFile.exists()) {
Files.delete(parentDocPath)
}
FileUtils.writeStringToFile(
parentDocPath.toFile,
sqlErrorParentDoc + lineSeparator,
StandardCharsets.UTF_8)
}
} else {
assert(sqlErrorParentDoc.trim == commonErrorsInDoc.trim,
"The error class document is not up to date. Please regenerate it.")
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ license: |
limitations under the License.
---

Invalid as-of join
SQLSTATE: none assigned

Invalid as-of join.

This error class has the following derived error classes:

Expand All @@ -30,3 +32,5 @@ The input argument `tolerance` must be non-negative.
## TOLERANCE_IS_UNFOLDABLE

The input argument `tolerance` must be a constant.


5 changes: 0 additions & 5 deletions docs/sql-error-conditions-datatype-mismatch-error-class.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,11 @@ cannot cast `<srcType>` to `<targetType>`.
## CAST_WITH_CONF_SUGGESTION

cannot cast `<srcType>` to `<targetType>` with ANSI mode on.

If you have to cast `<srcType>` to `<targetType>`, you can set `<config>` as `<configVal>`.

## CAST_WITH_FUNC_SUGGESTION

cannot cast `<srcType>` to `<targetType>`.

To convert values from `<srcType>` to `<targetType>`, you can use the functions `<functionNames>` instead.

## CREATE_MAP_KEY_DIFF_TYPES
Expand Down Expand Up @@ -176,11 +174,8 @@ A range window frame cannot be used in an unordered window specification.
## SEQUENCE_WRONG_INPUT_TYPES

`<functionName>` uses the wrong parameter type. The parameter type must conform to:

1. The start and stop expressions must resolve to the same type.

2. If start and stop expressions resolve to the `<startType>` type, then the step expression must resolve to the `<stepType>` type.

3. Otherwise, if start and stop expressions resolve to the `<otherStartType>` type, then the step expression must resolve to the same type.

## SPECIFIED_WINDOW_FRAME_DIFF_TYPES
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,55 +28,36 @@ This error class has the following derived error classes:
## DATETIME_PATTERN_RECOGNITION

Spark >= 3.0:

Fail to recognize `<pattern>` pattern in the DateTimeFormatter. 1) You can set `<config>` to "LEGACY" to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from '`<docroot>`/sql-ref-datetime-pattern.html'.

## PARSE_DATETIME_BY_NEW_PARSER

Spark >= 3.0:

Fail to parse `<datetime>` in the new parser. You can set `<config>` to "LEGACY" to restore the behavior before Spark 3.0, or set to "CORRECTED" and treat it as an invalid datetime string.

## READ_ANCIENT_DATETIME

Spark >= 3.0:

reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z

from `<format>` files can be ambiguous, as the files may be written by

Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar

that is different from Spark 3.0+'s Proleptic Gregorian calendar.

See more details in SPARK-31404. You can set the SQL config `<config>` or

the datasource option `<option>` to "LEGACY" to rebase the datetime values

w.r.t. the calendar difference during reading. To read the datetime values

as it is, set the SQL config or the datasource option to "CORRECTED".

## WRITE_ANCIENT_DATETIME

Spark >= 3.0:

writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z

into `<format>` files can be dangerous, as the files may be read by Spark 2.x

or legacy versions of Hive later, which uses a legacy hybrid calendar that

is different from Spark 3.0+'s Proleptic Gregorian calendar. See more

details in SPARK-31404. You can set `<config>` to "LEGACY" to rebase the

datetime values w.r.t. the calendar difference during writing, to get maximum

interoperability. Or set the config to "CORRECTED" to write the datetime

values as it is, if you are sure that the written files will only be read by

Spark 3.0+ or other systems that use Proleptic Gregorian calendar.


Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,5 @@ Data columns: `<dataColumns>`.
too many data columns:
Table columns: `<tableColumns>`.
Data columns: `<dataColumns>`.


Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ license: |
limitations under the License.
---

SQLSTATE: none assigned

Can't find table property:

This error class has the following derived error classes:
Expand All @@ -30,3 +32,5 @@ This error class has the following derived error classes:
## MISSING_KEY_PART

`<key>`, `<totalAmountOfParts>` parts are expected.


Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ license: |
limitations under the License.
---

SQLSTATE: none assigned

The boundary `<boundary>` is invalid: `<invalidValue>`.

This error class has the following derived error classes:
Expand All @@ -30,3 +32,5 @@ Expected the value is '0', '`<longMaxValue>`', '[`<intMinValue>`, `<intMaxValue>
## START

Expected the value is '0', '`<longMinValue>`', '[`<intMinValue>`, `<intMaxValue>`]'.


Loading