Skip to content

Commit

Permalink
Fix CTAS test failures in TestCreateTable for Spark3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
dongkelun committed May 16, 2022
1 parent 61030d8 commit 4e95722
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -383,80 +383,86 @@ class TestCreateTable extends HoodieSparkSqlTestBase {
}

test("Test Create Table As Select With Tblproperties For Filter Props") {
Seq("cow", "mor").foreach { tableType =>
val tableName = generateTableName
spark.sql(
s"""
| create table $tableName using hudi
| partitioned by (dt)
| tblproperties(
| hoodie.database.name = "databaseName",
| hoodie.table.name = "tableName",
| primaryKey = 'id',
| preCombineField = 'ts',
| hoodie.datasource.write.operation = 'upsert',
| type = '$tableType'
| )
| AS
| select 1 as id, 'a1' as name, 10 as price, '2021-04-01' as dt, 1000 as ts
withTempDir { tmp =>
Seq("cow", "mor").foreach { tableType =>
val tableName = generateTableName
spark.sql(
s"""
| create table $tableName using hudi
| partitioned by (dt)
| tblproperties(
| hoodie.database.name = "databaseName",
| hoodie.table.name = "tableName",
| primaryKey = 'id',
| preCombineField = 'ts',
| hoodie.datasource.write.operation = 'upsert',
| type = '$tableType'
| )
| location '${tmp.getCanonicalPath}/$tableName'
| AS
| select 1 as id, 'a1' as name, 10 as price, '2021-04-01' as dt, 1000 as ts
""".stripMargin
)
checkAnswer(s"select id, name, price, dt from $tableName")(
Seq(1, "a1", 10, "2021-04-01")
)
val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
assertFalse(table.properties.contains(HoodieTableConfig.DATABASE_NAME.key()))
assertFalse(table.properties.contains(HoodieTableConfig.NAME.key()))
assertFalse(table.properties.contains(OPERATION.key()))
)
checkAnswer(s"select id, name, price, dt from $tableName")(
Seq(1, "a1", 10, "2021-04-01")
)
val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
assertFalse(table.properties.contains(HoodieTableConfig.DATABASE_NAME.key()))
assertFalse(table.properties.contains(HoodieTableConfig.NAME.key()))
assertFalse(table.properties.contains(OPERATION.key()))

val tablePath = table.storage.properties("path")
val metaClient = HoodieTableMetaClient.builder()
.setBasePath(tablePath)
.setConf(spark.sessionState.newHadoopConf())
.build()
val tableConfig = metaClient.getTableConfig.getProps.asScala.toMap
assertResult("default")(tableConfig(HoodieTableConfig.DATABASE_NAME.key()))
assertResult(tableName)(tableConfig(HoodieTableConfig.NAME.key()))
assertFalse(tableConfig.contains(OPERATION.key()))
val tablePath = table.storage.properties("path")
val metaClient = HoodieTableMetaClient.builder()
.setBasePath(tablePath)
.setConf(spark.sessionState.newHadoopConf())
.build()
val tableConfig = metaClient.getTableConfig.getProps.asScala.toMap
assertResult("default")(tableConfig(HoodieTableConfig.DATABASE_NAME.key()))
assertResult(tableName)(tableConfig(HoodieTableConfig.NAME.key()))
assertFalse(tableConfig.contains(OPERATION.key()))
}
}
}

test("Test Create Table As Select With Options For Filter Props") {
Seq("cow", "mor").foreach { tableType =>
val tableName = generateTableName
spark.sql(
s"""
| create table $tableName using hudi
| partitioned by (dt)
| options(
| hoodie.database.name = "databaseName",
| hoodie.table.name = "tableName",
| primaryKey = 'id',
| preCombineField = 'ts',
| hoodie.datasource.write.operation = 'upsert',
| type = '$tableType'
| )
| AS
| select 1 as id, 'a1' as name, 10 as price, '2021-04-01' as dt, 1000 as ts
withTempDir { tmp =>
Seq("cow", "mor").foreach { tableType =>
val tableName = generateTableName
spark.sql(
s"""
| create table $tableName using hudi
| partitioned by (dt)
| options(
| hoodie.database.name = "databaseName",
| hoodie.table.name = "tableName",
| primaryKey = 'id',
| preCombineField = 'ts',
| hoodie.datasource.write.operation = 'upsert',
| type = '$tableType'
| )
| location '${tmp.getCanonicalPath}/$tableName'
| AS
| select 1 as id, 'a1' as name, 10 as price, '2021-04-01' as dt, 1000 as ts
""".stripMargin
)
checkAnswer(s"select id, name, price, dt from $tableName")(
Seq(1, "a1", 10, "2021-04-01")
)
val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
assertFalse(table.properties.contains(HoodieTableConfig.DATABASE_NAME.key()))
assertFalse(table.properties.contains(HoodieTableConfig.NAME.key()))
assertFalse(table.properties.contains(OPERATION.key()))
)
checkAnswer(s"select id, name, price, dt from $tableName")(
Seq(1, "a1", 10, "2021-04-01")
)
val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
assertFalse(table.properties.contains(HoodieTableConfig.DATABASE_NAME.key()))
assertFalse(table.properties.contains(HoodieTableConfig.NAME.key()))
assertFalse(table.properties.contains(OPERATION.key()))

val tablePath = table.storage.properties("path")
val metaClient = HoodieTableMetaClient.builder()
.setBasePath(tablePath)
.setConf(spark.sessionState.newHadoopConf())
.build()
val tableConfig = metaClient.getTableConfig.getProps.asScala.toMap
assertResult("default")(tableConfig(HoodieTableConfig.DATABASE_NAME.key()))
assertResult(tableName)(tableConfig(HoodieTableConfig.NAME.key()))
assertFalse(tableConfig.contains(OPERATION.key()))
val tablePath = table.storage.properties("path")
val metaClient = HoodieTableMetaClient.builder()
.setBasePath(tablePath)
.setConf(spark.sessionState.newHadoopConf())
.build()
val tableConfig = metaClient.getTableConfig.getProps.asScala.toMap
assertResult("default")(tableConfig(HoodieTableConfig.DATABASE_NAME.key()))
assertResult(tableName)(tableConfig(HoodieTableConfig.NAME.key()))
assertFalse(tableConfig.contains(OPERATION.key()))
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import org.apache.hudi.{DataSourceWriteOptions, SparkAdapterSupport}
import org.apache.spark.sql.HoodieSpark3SqlUtils.convertTransforms
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException, UnresolvedAttribute}
import org.apache.spark.sql.catalyst.catalog.HoodieCatalogTable.needFilterProps
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils, HoodieCatalogTable}
import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, ColumnChange, UpdateColumnComment, UpdateColumnType}
Expand Down Expand Up @@ -215,7 +216,7 @@ class HoodieCatalog extends DelegatingCatalogExtension
val loc = locUriOpt
.orElse(existingTableOpt.flatMap(_.storage.locationUri))
.getOrElse(spark.sessionState.catalog.defaultTablePath(id))
val storage = DataSource.buildStorageFormatFromOptions(writeOptions)
val storage = DataSource.buildStorageFormatFromOptions(writeOptions.--(needFilterProps))
.copy(locationUri = Option(loc))
val tableType =
if (location.isDefined) CatalogTableType.EXTERNAL else CatalogTableType.MANAGED
Expand All @@ -233,7 +234,7 @@ class HoodieCatalog extends DelegatingCatalogExtension
provider = Option("hudi"),
partitionColumnNames = newPartitionColumns,
bucketSpec = newBucketSpec,
properties = tablePropertiesNew.asScala.toMap,
properties = tablePropertiesNew.asScala.toMap.--(needFilterProps),
comment = commentOpt)

val hoodieCatalogTable = HoodieCatalogTable(spark, tableDesc)
Expand Down

0 comments on commit 4e95722

Please sign in to comment.