diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index a89f0ea267279..c6149ce35a430 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -2008,12 +2008,6 @@ }, "sqlState" : "HY000" }, - "INVALID_HIVE_COLUMN_NAME" : { - "message" : [ - "Cannot create the table having the column whose name contains invalid characters in Hive metastore." - ], - "sqlState" : "42K05" - }, "INVALID_IDENTIFIER" : { "message" : [ "The identifier is invalid. Please, consider quoting it with back-quotes as ``." diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index ebf7436f96e7e..e458cd5a337b5 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -1190,12 +1190,6 @@ The handle `` is invalid. For more details see [INVALID_HANDLE](sql-error-conditions-invalid-handle-error-class.html) -### INVALID_HIVE_COLUMN_NAME - -[SQLSTATE: 42K05](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) - -Cannot create the table `` having the column `` whose name contains invalid characters `` in Hive metastore. - ### INVALID_IDENTIFIER [SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 84f75e3ef5036..32fc8a452106c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -42,13 +42,12 @@ import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.types.DataTypeUtils import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils} -import org.apache.spark.sql.catalyst.util.TypeUtils.{toSQLId, toSQLValue} import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.{PartitioningUtils, SourceOptions} import org.apache.spark.sql.hive.client.HiveClient import org.apache.spark.sql.internal.HiveSerDe import org.apache.spark.sql.internal.StaticSQLConf._ -import org.apache.spark.sql.types.{AnsiIntervalType, ArrayType, DataType, MapType, StructType, TimestampNTZType} +import org.apache.spark.sql.types._ /** * A persistent implementation of the system catalog using Hive. @@ -150,51 +149,6 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat } } - /** - * Checks the validity of data column names. Hive metastore disallows the table to use some - * special characters (',', ':', and ';') in data column names, including nested column names. - * Partition columns do not have such a restriction. Views do not have such a restriction. - */ - private def verifyDataSchema( - tableName: TableIdentifier, tableType: CatalogTableType, dataSchema: StructType): Unit = { - if (tableType != VIEW) { - val invalidChars = Seq(",", ":", ";") - def verifyNestedColumnNames(schema: StructType): Unit = schema.foreach { f => - f.dataType match { - case st: StructType => verifyNestedColumnNames(st) - case _ if invalidChars.exists(f.name.contains) => - val invalidCharsString = invalidChars.map(c => s"'$c'").mkString(", ") - throw new AnalysisException( - errorClass = "INVALID_HIVE_COLUMN_NAME", - messageParameters = Map( - "invalidChars" -> invalidCharsString, - "tableName" -> toSQLId(tableName.nameParts), - "columnName" -> toSQLId(f.name) - )) - case _ => - } - } - - dataSchema.foreach { f => - f.dataType match { - // Checks top-level column names - case _ if f.name.contains(",") => - throw new AnalysisException( - errorClass = "INVALID_HIVE_COLUMN_NAME", - messageParameters = Map( - "invalidChars" -> toSQLValue(","), - "tableName" -> toSQLId(tableName.nameParts), - "columnName" -> toSQLId(f.name) - )) - // Checks nested column names - case st: StructType => - verifyNestedColumnNames(st) - case _ => - } - } - } - } - // -------------------------------------------------------------------------- // Databases // -------------------------------------------------------------------------- @@ -260,8 +214,6 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val table = tableDefinition.identifier.table requireDbExists(db) verifyTableProperties(tableDefinition) - verifyDataSchema( - tableDefinition.identifier, tableDefinition.tableType, tableDefinition.dataSchema) if (tableExists(db, table) && !ignoreIfExists) { throw new TableAlreadyExistsException(db = db, table = table) @@ -711,7 +663,6 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat newDataSchema: StructType): Unit = withClient { requireTableExists(db, table) val oldTable = getTable(db, table) - verifyDataSchema(oldTable.identifier, oldTable.tableType, newDataSchema) val schemaProps = tableMetaToTableProps(oldTable, StructType(newDataSchema ++ oldTable.partitionSchema)).toMap diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 15a7796a72b5d..7b0e0b28780ca 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -34,10 +34,10 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER +import org.apache.spark.sql.errors.DataTypeErrors.toSQLType import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils} import org.apache.spark.sql.execution.datasources.orc.OrcCompressionCodec import org.apache.spark.sql.execution.datasources.parquet.{ParquetCompressionCodec, ParquetFooterReader} -import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} import org.apache.spark.sql.hive.HiveUtils.{CONVERT_METASTORE_ORC, CONVERT_METASTORE_PARQUET} import org.apache.spark.sql.hive.orc.OrcFileOperator @@ -2876,22 +2876,39 @@ class HiveDDLSuite } } - test("SPARK-24681 checks if nested column names do not include ',', ':', and ';'") { - Seq("nested,column", "nested:column", "nested;column").foreach { nestedColumnName => + test("SPARK-47101 checks if nested column names do not include invalid characters") { + // delimiter characters + Seq(",", ":").foreach { c => + val typ = s"array>" + // The regex is from HiveClientImpl.getSparkSQLDataType, please keep them in sync. + val replaced = typ.replaceAll("`", "").replaceAll("(?<=struct<|,)([^,<:]+)(?=:)", "`$1`") + withTable("t") { + checkError( + exception = intercept[SparkException] { + sql(s"CREATE TABLE t (a $typ) USING hive") + }, + errorClass = "CANNOT_RECOGNIZE_HIVE_TYPE", + parameters = Map( + "fieldType" -> toSQLType(replaced), + "fieldName" -> "`a`") + ) + } + } + // other special characters + Seq(";", "^", "\\", "/", "%").foreach { c => + val typ = s"array>" + val replaced = typ.replaceAll("`", "") + val msg = s"java.lang.IllegalArgumentException: Error: : expected at the position " + + s"16 of '$replaced' but '$c' is found." withTable("t") { checkError( exception = intercept[AnalysisException] { - spark.range(1) - .select(struct(lit(0).as(nestedColumnName)).as("toplevel")) - .write - .format("hive") - .saveAsTable("t") + sql(s"CREATE TABLE t (a $typ) USING hive") }, - errorClass = "INVALID_HIVE_COLUMN_NAME", + errorClass = "_LEGACY_ERROR_TEMP_3065", parameters = Map( - "invalidChars" -> "',', ':', ';'", - "tableName" -> "`spark_catalog`.`default`.`t`", - "columnName" -> s"`$nestedColumnName`") + "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException", + "msg" -> msg) ) } } @@ -3385,24 +3402,11 @@ class HiveDDLSuite } } - test("SPARK-44911: Create the table with invalid column") { + test("SPARK-47101: comma is allowed in column name") { val tbl = "t1" withTable(tbl) { - val e = intercept[AnalysisException] { - sql( - s""" - |CREATE TABLE t1 - |STORED AS parquet - |SELECT id, DATE'2018-01-01' + MAKE_DT_INTERVAL(0, id) FROM RANGE(0, 10) - """.stripMargin) - } - checkError(e, - errorClass = "INVALID_HIVE_COLUMN_NAME", - parameters = Map( - "invalidChars" -> "','", - "tableName" -> "`spark_catalog`.`default`.`t1`", - "columnName" -> "`DATE '2018-01-01' + make_dt_interval(0, id, 0, 0`.`000000)`") - ) + sql("CREATE TABLE t1 STORED AS parquet SELECT id as `a,b` FROM range(1)") + checkAnswer(sql("SELECT * FROM t1"), Row(0)) } } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 1a0c2f6165a47..0bcac639443cd 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2141,41 +2141,6 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi } } - test("Auto alias construction of get_json_object") { - val df = Seq(("1", """{"f1": "value1", "f5": 5.23}""")).toDF("key", "jstring") - - withTable("t") { - val e = intercept[AnalysisException] { - df.select($"key", functions.get_json_object($"jstring", "$.f1")) - .write.format("hive").saveAsTable("t") - } - checkError(e, - errorClass = "INVALID_HIVE_COLUMN_NAME", - parameters = Map( - "invalidChars" -> "','", - "tableName" -> "`spark_catalog`.`default`.`t`", - "columnName" -> "`get_json_object(jstring, $`.`f1)`") - ) - } - - withTempView("tempView") { - withTable("t") { - df.createTempView("tempView") - val e = intercept[AnalysisException] { - sql("CREATE TABLE t USING hive AS " + - "SELECT key, get_json_object(jstring, '$.f1') FROM tempView") - } - checkError(e, - errorClass = "INVALID_HIVE_COLUMN_NAME", - parameters = Map( - "invalidChars" -> "','", - "tableName" -> "`spark_catalog`.`default`.`t`", - "columnName" -> "`get_json_object(jstring, $`.`f1)`") - ) - } - } - } - test("SPARK-19912 String literals should be escaped for Hive metastore partition pruning") { withTable("spark_19912") { Seq(