apache · MaxGekk · Jan 10, 2021 · Jan 10, 2021 · Jan 10, 2021 · MaxGekk
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -510,6 +510,30 @@ object CatalogTable {
     propKey == originalKey || propKey == s"$originalKey.numParts" ||
       propKey.startsWith(s"$originalKey.part.")
   }
+
+  def normalize(table: CatalogTable): CatalogTable = {
+    val nondeterministicProps = Set(
+      "CreateTime",
+      "transient_lastDdlTime",
+      "grantTime",
+      "lastUpdateTime",
+      "last_modified_by",
+      "last_modified_time",
+      "Owner:",
+      // The following are hive specific schema parameters which we do not need to match exactly.
+      "totalNumberFiles",
+      "maxFileSize",
+      "minFileSize"
+    )
+
+    table.copy(
+      createTime = 0L,
+      lastAccessTime = 0L,
+      properties = table.properties.filterKeys(!nondeterministicProps.contains(_)).toMap,
+      stats = None,
+      ignoredProperties = Map.empty
+    )
+  }
 }
 
 /**
@@ -781,10 +805,7 @@ case class HiveTableRelation(
   def isPartitioned: Boolean = partitionCols.nonEmpty
 
   override def doCanonicalize(): HiveTableRelation = copy(
-    tableMeta = tableMeta.copy(
-      storage = CatalogStorageFormat.empty,
-      createTime = -1
-    ),
+    tableMeta = CatalogTable.normalize(tableMeta),
     dataCols = dataCols.zipWithIndex.map {
       case (attr, index) => attr.withExprId(ExprId(index))
     },

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ShowCreateTableSuite.scala
@@ -223,29 +223,6 @@ abstract class ShowCreateTableSuite extends QueryTest with SQLTestUtils {
   }
 
   protected def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = {
-    def normalize(table: CatalogTable): CatalogTable = {
-      val nondeterministicProps = Set(
-        "CreateTime",
-        "transient_lastDdlTime",
-        "grantTime",
-        "lastUpdateTime",
-        "last_modified_by",
-        "last_modified_time",
-        "Owner:",
-        // The following are hive specific schema parameters which we do not need to match exactly.
-        "totalNumberFiles",
-        "maxFileSize",
-        "minFileSize"
-      )
-
-      table.copy(
-        createTime = 0L,
-        lastAccessTime = 0L,
-        properties = table.properties.filterKeys(!nondeterministicProps.contains(_)).toMap,
-        stats = None,
-        ignoredProperties = Map.empty
-      )
-    }
-    assert(normalize(actual) == normalize(expected))
+    assert(CatalogTable.normalize(actual) == CatalogTable.normalize(expected))
   }
 }
diff --git a/...test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala b/...test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.hive.execution.command
 
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.command.v1
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * The class contains tests for the `ALTER TABLE .. DROP PARTITION` command to check
@@ -42,4 +44,34 @@ class AlterTableDropPartitionSuite
       }
     }
   }
+
+  test("SPARK-34060: update stats of cached table") {
+    withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> "true") {
+      withNamespaceAndTable("ns", "tbl") { t =>
+        def checkTableSize(expected: String): Unit = {
+          val stats =
+            sql(s"DESCRIBE TABLE EXTENDED $t")
+              .select("data_type")
+              .where("col_name = 'Statistics'")
+              .first()
+              .getString(0)
+          assert(stats.contains(expected))
+        }
+
+        sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY (part)")
+        sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0")
+        sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1")
+        assert(!spark.catalog.isCached(t))
+        sql(s"CACHE TABLE $t")
+        assert(spark.catalog.isCached(t))
+        checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(0, 0), Row(1, 1)))
+        checkTableSize("4 bytes")
+
+        sql(s"ALTER TABLE $t DROP PARTITION (part=0)")
+        assert(spark.catalog.isCached(t))
+        checkTableSize("2 bytes")
+        checkAnswer(sql(s"SELECT * FROM $t"), Seq(Row(1, 1)))
+      }
+    }
+  }
 }