[SPARK-49866][SQL] Improve the error message for describe table with …

…partition columns ### What changes were proposed in this pull request? Provide more user facing error when partition column name can't be found in the table schema. ### Why are the changes needed? There's an issue where partition column sometimes doesn't match any from the table schema. When that happens we throw an assertion error which is not user friendly. Because of that we introduced new `QueryExecutionError` in order to make it more user facing. ### Does this PR introduce _any_ user-facing change? Yes, users will get more user friendly error message. ### Was this patch authored or co-authored using generative AI tooling? No Closes apache#48338 from mihailoale-db/mihailoale-db/fixdescribepartitioningmessage. Authored-by: Mihailo Aleksic <mihailo.aleksic@databricks.com> Signed-off-by: Max Gekk <max.gekk@gmail.com>
panbingkun · Oct 5, 2024 · 37f2966 · 37f2966
1 parent 3e69b40
commit 37f2966
Show file tree

Hide file tree

Showing 3 changed files with 25 additions and 3 deletions.
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
@@ -3802,6 +3802,12 @@
     ],
     "sqlState" : "428FT"
   },
+  "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA" : {
+    "message" : [
+      "Partition column <column> not found in schema <schema>. Please provide the existing column for partitioning."
+    ],
+    "sqlState" : "42000"
+  },
   "PATH_ALREADY_EXISTS" : {
     "message" : [
       "Path <outputPath> already exists. Set mode as \"overwrite\" to overwrite the existing path."

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2856,4 +2856,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       )
     )
   }
+
+  def partitionColumnNotFoundInTheTableSchemaError(
+      column: Seq[String],
+      schema: StructType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA",
+      messageParameters = Map(
+        "column" -> toSQLId(column),
+        "schema" -> toSQLType(schema)
+      )
+    )
+  }
 }
diff --git a/...core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/...core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, SupportsRead, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.{ClusterByTransform, IdentityTransform}
 import org.apache.spark.sql.connector.read.SupportsReportStatistics
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ArrayImplicits._
 
@@ -156,9 +157,12 @@ case class DescribeTableExec(
           .map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
           .map { fieldNames =>
             val nestedField = table.schema.findNestedField(fieldNames.toImmutableArraySeq)
-            assert(nestedField.isDefined,
-              s"Not found the partition column ${fieldNames.map(quoteIfNeeded).mkString(".")} " +
-              s"in the table schema ${table.schema().catalogString}.")
+            if (nestedField.isEmpty) {
+              throw QueryExecutionErrors.partitionColumnNotFoundInTheTableSchemaError(
+                fieldNames.toSeq,
+                table.schema()
+              )
+            }
             nestedField.get
           }.map { case (path, field) =>
             toCatalystRow(