From c7d63eac48d3a81099456360f1c30e6049824749 Mon Sep 17 00:00:00 2001 From: kylerong-db Date: Fri, 18 Aug 2023 11:07:21 -0700 Subject: [PATCH] [SPARK-44873] Support alter view with nested columns in Hive client ### What changes were proposed in this pull request? Previously, if a view's schema contains a nested struct, alterTable using Hive client will fail. This change supports a view with a nested struct. The mechanism is to store an empty schema when we call Hive client, since we already store the actual schema in table properties. This fix is similar to https://github.com/apache/spark/pull/37364 ### Why are the changes needed? This supports using view with nested structs in Hive metastore. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added unit test. Closes #42532 from kylerong-db/hive_view. Authored-by: kylerong-db Signed-off-by: Gengliang Wang --- .../apache/spark/sql/hive/HiveExternalCatalog.scala | 11 ++++++++++- .../spark/sql/hive/HiveParquetSourceSuite.scala | 10 ++++++++++ .../spark/sql/hive/execution/HiveDDLSuite.scala | 8 -------- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index b1eadea42e077..67b780f13c431 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -595,7 +595,16 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat if (tableDefinition.tableType == VIEW) { val newTableProps = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition).toMap - client.alterTable(tableDefinition.copy(properties = newTableProps)) + val newTable = tableDefinition.copy(properties = newTableProps) + try { + client.alterTable(newTable) + } catch { + case NonFatal(e) => + // If for some reason we fail to store the schema we store it as empty there + // since we already store the real schema in the table properties. This try-catch + // should only be necessary for Spark views which are incompatible with Hive + client.alterTable(newTable.copy(schema = EMPTY_DATA_SCHEMA)) + } } else { val oldTableDef = getRawTable(db, tableDefinition.identifier.table) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala index 7c67f34560e29..45668fc683d9c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala @@ -385,4 +385,14 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest with ParquetTest { checkAnswer(spark.table("t"), Row(Row("a", 1))) } } + + test("Alter view with nested struct") { + withView("t", "t2") { + sql("CREATE OR REPLACE VIEW t AS SELECT " + + "struct(id AS `$col2`, struct(id AS `$col`) AS s1) AS s2 FROM RANGE(5)") + sql("ALTER VIEW t SET TBLPROPERTIES ('x' = 'y')") + sql("ALTER VIEW t RENAME TO t2") + checkAnswer(sql("show TBLPROPERTIES t2 (x)"), Row("x", "y")) + } + } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 1c46b55870805..201ba5ea6a1da 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -175,14 +175,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA withView("v") { spark.sql("CREATE VIEW v AS SELECT STRUCT('a' AS `a`, 1 AS b) q") checkAnswer(sql("SELECT q.`a`, q.b FROM v"), Row("a", 1) :: Nil) - - checkError( - exception = intercept[SparkException] { - spark.sql("ALTER VIEW v AS SELECT STRUCT('a' AS `$a`, 1 AS b) q") - }, - errorClass = "CANNOT_RECOGNIZE_HIVE_TYPE", - parameters = Map("fieldType" -> "\"STRUCT<$A:STRING,B:INT>\"", "fieldName" -> "`q`") - ) } }