Skip to content

Commit

Permalink
[SPARK-33803][SQL] Sort table properties by key in DESCRIBE TABLE com…
Browse files Browse the repository at this point in the history
…mand

### What changes were proposed in this pull request?

This PR proposes to sort table properties in DESCRIBE TABLE command. This is consistent with DSv2 command as well:
https://github.com/apache/spark/blob/e3058ba17cb4512537953eb4ded884e24ee93ba2/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala#L63

This PR fixes the test case in Scala 2.13 build as well where the table properties have different order in the map.

### Why are the changes needed?

To keep the deterministic and pretty output, and fix the tests in Scala 2.13 build.
See https://amplab.cs.berkeley.edu/jenkins/job/spark-master-test-maven-hadoop-3.2-scala-2.13/49/testReport/junit/org.apache.spark.sql/SQLQueryTestSuite/describe_sql/

```
describe.sql
Expected "...spark_catalog, view.[query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default]]", but got "...spark_catalog, view.[catalogAndNamespace.part.1=default, view.query.out.col.2=c, view.referredTempFunctionsNames=[]]]" Result did not match for query #29
DESC FORMATTED v
```

### Does this PR introduce _any_ user-facing change?

Yes, it will change the text output from `DESCRIBE [EXTENDED|FORMATTED] table_name`.
Now the table properties are sorted by its key.

### How was this patch tested?

Related unittests were fixed accordingly.

Closes #30799 from HyukjinKwon/SPARK-33803.

Authored-by: HyukjinKwon <gurwls223@apache.org>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 7845865)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
HyukjinKwon authored and cloud-fan committed Dec 16, 2020
1 parent f3709a0 commit 22ef681
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,8 @@ case class CatalogTable(

def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
val map = new mutable.LinkedHashMap[String, String]()
val tableProperties = properties.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
val tableProperties = properties.toSeq.sortBy(_._1)
.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]")
val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")
val lastAccess = {
if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ Num Buckets 2
Bucket Columns [`a`]
Sort Columns [`b`]
Comment table_comment
Table Properties [t=test, e=3]
Table Properties [e=3, t=test]
Location [not included in comparison]/{warehouse_dir}/t
Storage Properties [a=1, b=2]
Partition Provider Catalog
Expand Down Expand Up @@ -162,7 +162,7 @@ Num Buckets 2
Bucket Columns [`a`]
Sort Columns [`b`]
Comment table_comment
Table Properties [t=test, e=3]
Table Properties [e=3, t=test]
Location [not included in comparison]/{warehouse_dir}/t
Storage Properties [a=1, b=2]
Partition Provider Catalog
Expand Down Expand Up @@ -477,7 +477,7 @@ View Text SELECT * FROM t
View Original Text SELECT * FROM t
View Catalog and Namespace spark_catalog.default
View Query Output Columns [a, b, c, d]
Table Properties [view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.referredTempViewNames=[], view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.col.2=c, view.query.out.col.3=d, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]]


-- !query
Expand All @@ -501,7 +501,7 @@ View Text SELECT * FROM t
View Original Text SELECT * FROM t
View Catalog and Namespace spark_catalog.default
View Query Output Columns [a, b, c, d]
Table Properties [view.query.out.col.3=d, view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=4, view.referredTempViewNames=[], view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=c, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=default]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=default, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.col.2=c, view.query.out.col.3=d, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[]]


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ View Text SELECT * FROM base_table
View Original Text SELECT * FROM base_table
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -313,7 +313,7 @@ View Text SELECT * FROM base_table
View Original Text SELECT * FROM base_table
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -359,7 +359,7 @@ View Original Text SELECT t1.a AS t1_a, t2.a AS t2_a
WHERE t1.id = t2.id
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [t1_a, t2_a]
Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=t1_a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=t2_a, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=t1_a, view.query.out.col.1=t2_a, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -413,7 +413,7 @@ View Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_t
View Original Text SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -443,7 +443,7 @@ View Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_
View Original Text SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [id, a]
Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=id, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=a, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=id, view.query.out.col.1=a, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -473,7 +473,7 @@ View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_t
View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -503,7 +503,7 @@ View Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM ba
View Original Text SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -533,7 +533,7 @@ View Text SELECT * FROM base_table WHERE EXISTS (SELECT 1)
View Original Text SELECT * FROM base_table WHERE EXISTS (SELECT 1)
View Catalog and Namespace spark_catalog.temp_view_test
View Query Output Columns [a, id]
Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=id, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=temp_view_test]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=temp_view_test, view.query.out.col.0=a, view.query.out.col.1=id, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -669,7 +669,7 @@ View Text SELECT * FROM t1 CROSS JOIN t2
View Original Text SELECT * FROM t1 CROSS JOIN t2
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [num, name, num2, value]
Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -710,7 +710,7 @@ View Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2
View Original Text SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [num, name, num2, value]
Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -751,7 +751,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2
View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [num, name, num2, value]
Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -792,7 +792,7 @@ View Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.va
View Original Text SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [num, name, num2, value]
Table Properties [view.query.out.col.3=value, view.catalogAndNamespace.numParts=2, view.query.out.col.0=num, view.query.out.numCols=4, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=name, view.catalogAndNamespace.part.0=spark_catalog, view.query.out.col.2=num2, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=num, view.query.out.col.1=name, view.query.out.col.2=num2, view.query.out.col.3=value, view.query.out.numCols=4, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -894,7 +894,7 @@ BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [a, b]
Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down Expand Up @@ -933,7 +933,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)
View Catalog and Namespace spark_catalog.testviewschm2
View Query Output Columns [a, b]
Table Properties [view.catalogAndNamespace.numParts=2, view.query.out.col.0=a, view.query.out.numCols=2, view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true, view.query.out.col.1=b, view.catalogAndNamespace.part.0=spark_catalog, view.referredTempFunctionsNames=[], view.catalogAndNamespace.part.1=testviewschm2]
Table Properties [view.catalogAndNamespace.numParts=2, view.catalogAndNamespace.part.0=spark_catalog, view.catalogAndNamespace.part.1=testviewschm2, view.query.out.col.0=a, view.query.out.col.1=b, view.query.out.numCols=2, view.referredTempFunctionsNames=[], view.referredTempViewNames=[], view.sqlConfig.spark.sql.ansi.enabled=true]


-- !query
Expand Down

0 comments on commit 22ef681

Please sign in to comment.