From d4277b8e78347fd4e3163c6218edc4675ebb6db2 Mon Sep 17 00:00:00 2001 From: Jia Fan Date: Fri, 7 Jul 2023 11:58:23 +0900 Subject: [PATCH] [SPARK-44268][CORE][TEST][FOLLOWUP] Add test to generate `sql-error-conditions` doc automatic ### What changes were proposed in this pull request? This is a follow up PR for #41813, change test to automatic generate `sql-error-conditions` doc. Like other GOLDEN_FILES test, if the doc not match will report a error during test. ### Why are the changes needed? 1. make sure the `error-classes.json` sync with doc 2. and developer sync doc easier. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? add new test. Closes #41865 from Hisoka-X/SPARK-44268_error_json_follow_up. Authored-by: Jia Fan Signed-off-by: Hyukjin Kwon --- .../main/resources/error/error-classes.json | 2 +- .../apache/spark/SparkThrowableSuite.scala | 219 ++++++++++++---- ...rror-conditions-as-of-join-error-class.md} | 6 +- ...onditions-datatype-mismatch-error-class.md | 5 - ...tent-behavior-cross-version-error-class.md | 19 -- ...sert-column-arity-mismatch-error-class.md} | 2 + ...nsufficient-table-property-error-class.md} | 4 + ...onditions-invalid-boundary-error-class.md} | 4 + ...ions-invalid-default-value-error-class.md} | 4 + ...r-conditions-invalid-format-error-class.md | 2 +- ...tions-invalid-inline-table-error-class.md} | 4 + ...valid-lambda-function-call-error-class.md} | 8 +- ...alid-limit-like-expression-error-class.md} | 4 + ...ns-invalid-observed-metrics-error-class.md | 2 + ...nvalid-partition-operation-error-class.md} | 4 + ...r-conditions-invalid-schema-error-class.md | 1 - ...ditions-invalid-sql-syntax-error-class.md} | 58 +++-- ...time-travel-timestamp-expr-error-class.md} | 4 + ...invalid-write-distribution-error-class.md} | 4 + ...alformed-record-in-parsing-error-class.md} | 3 + ...ditions-missing-attributes-error-class.md} | 3 + ...tions-not-a-constant-string-error-class.md | 2 + ...itions-not-allowed-in-from-error-class.md} | 4 + ...-supported-in-jdbc-catalog-error-class.md} | 2 + ...tions-unsupported-add-file-error-class.md} | 2 + ...-unsupported-default-value-error-class.md} | 6 +- ...ditions-unsupported-feature-error-class.md | 10 +- ...ditions-unsupported-insert-error-class.md} | 2 + ...nsupported-merge-condition-error-class.md} | 2 + ...ions-unsupported-overwrite-error-class.md} | 2 + docs/sql-error-conditions.md | 239 +++++++++--------- 31 files changed, 406 insertions(+), 227 deletions(-) rename docs/{sql-error-conditions-invalid-as-of-join.md => sql-error-conditions-as-of-join-error-class.md} (95%) rename docs/{sql-error-conditions-insert-column-arity-mismatch.md => sql-error-conditions-insert-column-arity-mismatch-error-class.md} (99%) rename docs/{sql-error-conditions-insufficient-table-property.md => sql-error-conditions-insufficient-table-property-error-class.md} (97%) rename docs/{sql-error-conditions-invalid-boundary.md => sql-error-conditions-invalid-boundary-error-class.md} (97%) rename docs/{sql-error-conditions-invalid-default-value.md => sql-error-conditions-invalid-default-value-error-class.md} (98%) rename docs/{sql-error-conditions-invalid-inline-table.md => sql-error-conditions-invalid-inline-table-error-class.md} (98%) rename docs/{sql-error-conditions-invalid-lamdba-function-call.md => sql-error-conditions-invalid-lambda-function-call-error-class.md} (82%) rename docs/{sql-error-conditions-invalid-limit-like-expression.md => sql-error-conditions-invalid-limit-like-expression-error-class.md} (98%) rename docs/{sql-error-conditions-invalid-partition-operation.md => sql-error-conditions-invalid-partition-operation-error-class.md} (97%) rename docs/{sql-error-conditions-invalid-sql-syntax.md => sql-error-conditions-invalid-sql-syntax-error-class.md} (95%) rename docs/{sql-error-conditions-invalid-time-travel-timestamp-expr.md => sql-error-conditions-invalid-time-travel-timestamp-expr-error-class.md} (97%) rename docs/{sql-error-conditions-invalid-write-distribution.md => sql-error-conditions-invalid-write-distribution-error-class.md} (98%) rename docs/{sql-error-conditions-malformed-record-in-parsing.md => sql-error-conditions-malformed-record-in-parsing-error-class.md} (99%) rename docs/{sql-error-conditions-missing-attributes.md => sql-error-conditions-missing-attributes-error-class.md} (99%) rename docs/{sql-error-conditions-not-allowed-in-from.md => sql-error-conditions-not-allowed-in-from-error-class.md} (97%) rename docs/{sql-error-conditions-not-supported-in-jdbc-catalog.md => sql-error-conditions-not-supported-in-jdbc-catalog-error-class.md} (99%) rename docs/{sql-error-conditions-unsupported-add-file.md => sql-error-conditions-unsupported-add-file-error-class.md} (99%) rename docs/{sql-error-conditions-unsupported-default-value.md => sql-error-conditions-unsupported-default-value-error-class.md} (93%) rename docs/{sql-error-conditions-unsupported-insert.md => sql-error-conditions-unsupported-insert-error-class.md} (99%) rename docs/{sql-error-conditions-unsupported-merge-condition.md => sql-error-conditions-unsupported-merge-condition-error-class.md} (99%) rename docs/{sql-error-conditions-unsupported-overwrite.md => sql-error-conditions-unsupported-overwrite-error-class.md} (99%) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 55fa2878e375a..0afd103b5655a 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -1214,7 +1214,7 @@ }, "UNEXPECTED_TOKEN" : { "message" : [ - "Found the unexpected in the format string; the structure of the format string must match: [MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]." + "Found the unexpected in the format string; the structure of the format string must match: `[MI|S]` `[$]` `[0|9|G|,]*` `[.|D]` `[0|9]*` `[$]` `[PR|MI|S]`." ] }, "WRONG_NUM_DIGIT" : { diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala index 034a782e53331..0249cde54884b 100644 --- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala @@ -20,8 +20,10 @@ package org.apache.spark import java.io.File import java.nio.charset.StandardCharsets import java.nio.file.Files +import java.util.Locale import scala.util.Properties.lineSeparator +import scala.util.matching.Regex import com.fasterxml.jackson.annotation.JsonInclude.Include import com.fasterxml.jackson.core.JsonParser.Feature.STRICT_DUPLICATE_DETECTION @@ -45,6 +47,12 @@ class SparkThrowableSuite extends SparkFunSuite { SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \ "core/testOnly *SparkThrowableSuite -- -t \"Error classes are correctly formatted\"" }}} + + To regenerate the error class document. Run: + {{{ + SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \ + "core/testOnly *SparkThrowableSuite -- -t \"Error classes match with document\"" + }}} */ private val errorJsonFilePath = getWorkspaceFilePath( "common", "utils", "src", "main", "resources", "error", "error-classes.json") @@ -141,54 +149,179 @@ class SparkThrowableSuite extends SparkFunSuite { checkIfUnique(messageFormats) } - test("SPARK-44268: Error classes match with document") { - val sqlstateDoc = "sql-error-conditions-sqlstates.md" + test("Error classes match with document") { val errors = errorReader.errorInfoMap - val errorDocPaths = getWorkspaceFilePath("docs").toFile - .listFiles(_.getName.startsWith("sql-error-conditions-")) - .filter(!_.getName.equals(sqlstateDoc)) - .map(f => IOUtils.toString(f.toURI, StandardCharsets.UTF_8)).map(_.split("\n")) - // check the error classes in document should be in error-classes.json - val linkInDocRegex = "\\[(.*)\\]\\((.*)\\)".r - val commonErrorsInDoc = IOUtils.toString(getWorkspaceFilePath("docs", - "sql-error-conditions.md").toUri, StandardCharsets.UTF_8).split("\n") - .filter(_.startsWith("###")).map(s => s.replace("###", "").trim) - .filter(linkInDocRegex.findFirstMatchIn(_).isEmpty) - - commonErrorsInDoc.foreach(s => assert(errors.contains(s), - s"Error class: $s is not in error-classes.json")) - - val titlePrefix = "title:" - val errorsInDoc = errorDocPaths.map(lines => { - val errorClass = lines.filter(_.startsWith(titlePrefix)) - .map(s => s.replace("error class", "").replace(titlePrefix, "").trim).head - assert(errors.contains(errorClass), s"Error class: $errorClass is not in error-classes.json") - val subClasses = lines.filter(_.startsWith("##")).map(s => s.replace("##", "").trim) - .map { s => - assert(errors(errorClass).subClass.get.contains(s), - s"Error class: $errorClass does not contain sub class: $s in error-classes.json") - s - } - errorClass -> subClasses + + // the black list of error class name which should not add quote + val contentQuoteBlackList = Seq( + "INCOMPLETE_TYPE_DEFINITION.MAP", + "INCOMPLETE_TYPE_DEFINITION.STRUCT") + + def quoteParameter(content: String, errorName: String): String = { + if (contentQuoteBlackList.contains(errorName)) { + content + } else { + "<(.*?)>".r.replaceAllIn(content, (m: Regex.Match) => { + val matchStr = m.group(1) + if (matchStr.nonEmpty) { + s"`<$matchStr>`" + } else { + m.matched + } + }).replaceAll("%(.*?)\\$", "`\\%$1\\$`") + } + } + + val sqlStates = IOUtils.toString(getWorkspaceFilePath("docs", + "sql-error-conditions-sqlstates.md").toUri, StandardCharsets.UTF_8).split("\n") + .filter(_.startsWith("##")).map(s => { + + val errorHeader = s.split("[`|:|#|\\s]+").filter(_.nonEmpty) + val sqlState = errorHeader(1) + (sqlState, errorHeader.head.toLowerCase(Locale.ROOT) + "-" + sqlState + "-" + + errorHeader.takeRight(errorHeader.length - 2).mkString("-").toLowerCase(Locale.ROOT)) }).toMap - // check the error classes in error-classes.json should be in document - errors.foreach { e => - val errorClass = e._1 - val subClasses = e._2.subClass.getOrElse(Map.empty).keys.toSeq - if (subClasses.nonEmpty) { - assert(errorsInDoc.contains(errorClass), - s"Error class: $errorClass do not have sql-error-conditions sub doc, please create it") - val subClassesInDoc = errorsInDoc(errorClass) - subClasses.foreach { s => - assert(subClassesInDoc.contains(s), - s"Error class: $errorClass contains sub class: $s which is not in " + - s"sql-error-conditions sub doc") + def getSqlState(sqlState: Option[String]): String = { + if (sqlState.isDefined) { + val prefix = sqlState.get.substring(0, 2) + if (sqlStates.contains(prefix)) { + s"[SQLSTATE: ${sqlState.get}](sql-error-conditions-sqlstates.html#${sqlStates(prefix)})" + } else { + "SQLSTATE: " + sqlState.get + } + } else { + "SQLSTATE: none assigned" + } + } + + def getErrorPath(error: String): String = { + s"sql-error-conditions-${error.toLowerCase(Locale.ROOT).replaceAll("_", "-")}-error-class" + } + + def getHeader(title: String): String = { + s"""--- + |layout: global + |title: $title + |displayTitle: $title + |license: | + | Licensed to the Apache Software Foundation (ASF) under one or more + | contributor license agreements. See the NOTICE file distributed with + | this work for additional information regarding copyright ownership. + | The ASF licenses this file to You under the Apache License, Version 2.0 + | (the "License"); you may not use this file except in compliance with + | the License. You may obtain a copy of the License at + | + | http://www.apache.org/licenses/LICENSE-2.0 + | + | Unless required by applicable law or agreed to in writing, software + | distributed under the License is distributed on an "AS IS" BASIS, + | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + | See the License for the specific language governing permissions and + | limitations under the License. + |---""".stripMargin + } + + val sqlErrorParentDocContent = errors.toSeq.filter(!_._1.startsWith("_LEGACY_ERROR_TEMP_")) + .sortBy(_._1).map(error => { + val name = error._1 + val info = error._2 + if (info.subClass.isDefined) { + val title = s"[$name](${getErrorPath(name)}.html)" + s"""|### $title + | + |${getSqlState(info.sqlState)} + | + |${quoteParameter(info.messageTemplate, name)} + | + |For more details see $title + |""".stripMargin + } else { + s"""|### $name + | + |${getSqlState(info.sqlState)} + | + |${quoteParameter(info.messageTemplate, name)} + |""".stripMargin + } + }).mkString("\n") + + val sqlErrorParentDoc = + s"""${getHeader("Error Conditions")} + | + |This is a list of common, named error conditions returned by Spark SQL. + | + |Also see [SQLSTATE Codes](sql-error-conditions-sqlstates.html). + | + |$sqlErrorParentDocContent + |""".stripMargin + + errors.filter(_._2.subClass.isDefined).foreach(error => { + val name = error._1 + val info = error._2 + + val subErrorContent = info.subClass.get.toSeq.sortBy(_._1).map(subError => { + s"""|## ${subError._1} + | + |${quoteParameter(subError._2.messageTemplate, s"$name.${subError._1}")} + |""".stripMargin + }).mkString("\n") + + val subErrorDoc = + s"""${getHeader(name + " error class")} + | + |${getSqlState(info.sqlState)} + | + |${quoteParameter(info.messageTemplate, name)} + | + |This error class has the following derived error classes: + | + |$subErrorContent + |""".stripMargin + + val errorDocPath = getWorkspaceFilePath("docs", getErrorPath(name) + ".md") + val errorsInDoc = if (errorDocPath.toFile.exists()) { + IOUtils.toString(errorDocPath.toUri, StandardCharsets.UTF_8) + } else { + "" + } + if (regenerateGoldenFiles) { + if (subErrorDoc.trim != errorsInDoc.trim) { + logInfo(s"Regenerating sub error class document $errorDocPath") + if (errorDocPath.toFile.exists()) { + Files.delete(errorDocPath) + } + FileUtils.writeStringToFile( + errorDocPath.toFile, + subErrorDoc + lineSeparator, + StandardCharsets.UTF_8) } - } else if (!errorClass.startsWith("_LEGACY_ERROR_TEMP_")) { - assert(commonErrorsInDoc.contains(errorClass), - s"Error class: $errorClass is not in sql-error-conditions.md") + } else { + assert(subErrorDoc.trim == errorsInDoc.trim, + "The error class document is not up to date. Please regenerate it.") } + }) + + val parentDocPath = getWorkspaceFilePath("docs", "sql-error-conditions.md") + val commonErrorsInDoc = if (parentDocPath.toFile.exists()) { + IOUtils.toString(parentDocPath.toUri, StandardCharsets.UTF_8) + } else { + "" + } + if (regenerateGoldenFiles) { + if (sqlErrorParentDoc.trim != commonErrorsInDoc.trim) { + logInfo(s"Regenerating error class document $parentDocPath") + if (parentDocPath.toFile.exists()) { + Files.delete(parentDocPath) + } + FileUtils.writeStringToFile( + parentDocPath.toFile, + sqlErrorParentDoc + lineSeparator, + StandardCharsets.UTF_8) + } + } else { + assert(sqlErrorParentDoc.trim == commonErrorsInDoc.trim, + "The error class document is not up to date. Please regenerate it.") } } diff --git a/docs/sql-error-conditions-invalid-as-of-join.md b/docs/sql-error-conditions-as-of-join-error-class.md similarity index 95% rename from docs/sql-error-conditions-invalid-as-of-join.md rename to docs/sql-error-conditions-as-of-join-error-class.md index 39f190474e6db..921c720d39ad9 100644 --- a/docs/sql-error-conditions-invalid-as-of-join.md +++ b/docs/sql-error-conditions-as-of-join-error-class.md @@ -19,7 +19,9 @@ license: | limitations under the License. --- -Invalid as-of join +SQLSTATE: none assigned + +Invalid as-of join. This error class has the following derived error classes: @@ -30,3 +32,5 @@ The input argument `tolerance` must be non-negative. ## TOLERANCE_IS_UNFOLDABLE The input argument `tolerance` must be a constant. + + diff --git a/docs/sql-error-conditions-datatype-mismatch-error-class.md b/docs/sql-error-conditions-datatype-mismatch-error-class.md index 6ec4f07895029..3bd63925323b4 100644 --- a/docs/sql-error-conditions-datatype-mismatch-error-class.md +++ b/docs/sql-error-conditions-datatype-mismatch-error-class.md @@ -64,13 +64,11 @@ cannot cast `` to ``. ## CAST_WITH_CONF_SUGGESTION cannot cast `` to `` with ANSI mode on. - If you have to cast `` to ``, you can set `` as ``. ## CAST_WITH_FUNC_SUGGESTION cannot cast `` to ``. - To convert values from `` to ``, you can use the functions `` instead. ## CREATE_MAP_KEY_DIFF_TYPES @@ -176,11 +174,8 @@ A range window frame cannot be used in an unordered window specification. ## SEQUENCE_WRONG_INPUT_TYPES `` uses the wrong parameter type. The parameter type must conform to: - 1. The start and stop expressions must resolve to the same type. - 2. If start and stop expressions resolve to the `` type, then the step expression must resolve to the `` type. - 3. Otherwise, if start and stop expressions resolve to the `` type, then the step expression must resolve to the same type. ## SPECIFIED_WINDOW_FRAME_DIFF_TYPES diff --git a/docs/sql-error-conditions-inconsistent-behavior-cross-version-error-class.md b/docs/sql-error-conditions-inconsistent-behavior-cross-version-error-class.md index 6c80b3846cfe0..7a8dfb525fbfa 100644 --- a/docs/sql-error-conditions-inconsistent-behavior-cross-version-error-class.md +++ b/docs/sql-error-conditions-inconsistent-behavior-cross-version-error-class.md @@ -28,55 +28,36 @@ This error class has the following derived error classes: ## DATETIME_PATTERN_RECOGNITION Spark >= 3.0: - Fail to recognize `` pattern in the DateTimeFormatter. 1) You can set `` to "LEGACY" to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from '``/sql-ref-datetime-pattern.html'. ## PARSE_DATETIME_BY_NEW_PARSER Spark >= 3.0: - Fail to parse `` in the new parser. You can set `` to "LEGACY" to restore the behavior before Spark 3.0, or set to "CORRECTED" and treat it as an invalid datetime string. ## READ_ANCIENT_DATETIME Spark >= 3.0: - reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z - from `` files can be ambiguous, as the files may be written by - Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar - that is different from Spark 3.0+'s Proleptic Gregorian calendar. - See more details in SPARK-31404. You can set the SQL config `` or - the datasource option `