Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HUDI-4925] Should Force to use ExpressionPayload in MergeIntoTableCommand #6355

Merged
merged 7 commits into from
Sep 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,8 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Hoodie
val targetTableDb = targetTableIdentify.database.getOrElse("default")
val targetTableName = targetTableIdentify.identifier
val path = hoodieCatalogTable.tableLocation
val catalogProperties = hoodieCatalogTable.catalogProperties
// force to use ExpressionPayload as WRITE_PAYLOAD_CLASS_NAME in MergeIntoHoodieTableCommand
val catalogProperties = hoodieCatalogTable.catalogProperties + (PAYLOAD_CLASS_NAME.key -> classOf[ExpressionPayload].getCanonicalName)
val tableConfig = hoodieCatalogTable.tableConfig
val tableSchema = hoodieCatalogTable.tableSchema
val partitionColumns = tableConfig.getPartitionFieldProp.split(",").map(_.toLowerCase)
Expand All @@ -487,14 +488,13 @@ case class MergeIntoHoodieTableCommand(mergeInto: MergeIntoTable) extends Hoodie
val hoodieProps = getHoodieProps(catalogProperties, tableConfig, sparkSession.sqlContext.conf)
val hiveSyncConfig = buildHiveSyncConfig(hoodieProps, hoodieCatalogTable)

withSparkConf(sparkSession, hoodieCatalogTable.catalogProperties) {
withSparkConf(sparkSession, catalogProperties) {
Map(
"path" -> path,
RECORDKEY_FIELD.key -> tableConfig.getRecordKeyFieldProp,
PRECOMBINE_FIELD.key -> preCombineField,
TBL_NAME.key -> hoodieCatalogTable.tableName,
PARTITIONPATH_FIELD.key -> tableConfig.getPartitionFieldProp,
PAYLOAD_CLASS_NAME.key -> classOf[ExpressionPayload].getCanonicalName,
HIVE_STYLE_PARTITIONING.key -> tableConfig.getHiveStylePartitioningEnable,
URL_ENCODE_PARTITIONING.key -> tableConfig.getUrlEncodePartitioning,
KEYGENERATOR_CLASS_NAME.key -> classOf[SqlKeyGenerator].getCanonicalName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
}
}

test ("Test Merge into with String cast to Double") {
test("Test Merge into with String cast to Double") {
withTempDir { tmp =>
val tableName = generateTableName
// Create a cow partitioned table.
Expand Down Expand Up @@ -713,4 +713,42 @@ class TestMergeIntoTable2 extends HoodieSparkSqlTestBase {
)
}
}

test("Test Merge into where manually set DefaultHoodieRecordPayload") {
withTempDir { tmp =>
val tableName = generateTableName
// Create a cow table with default payload class, check whether it will be overwritten by ExpressionPayload.
// if not, this ut cannot pass since DefaultHoodieRecordPayload can not promotion int to long when insert a ts with Integer value
spark.sql(
s"""
| create table $tableName (
| id int,
| name string,
| ts long
| ) using hudi
| tblproperties (
| type = 'cow',
| primaryKey = 'id',
| preCombineField = 'ts',
| hoodie.datasource.write.payload.class = 'org.apache.hudi.common.model.DefaultHoodieRecordPayload'
| ) location '${tmp.getCanonicalPath}'
""".stripMargin)
// Insert data
spark.sql(s"insert into $tableName select 1, 'a1', 999")
spark.sql(
s"""
| merge into $tableName as t0
| using (
| select 'a2' as name, 1 as id, 1000 as ts
| ) as s0
| on t0.id = s0.id
| when matched then update set t0.name = s0.name, t0.ts = s0.ts
| when not matched then insert *
""".stripMargin
)
checkAnswer(s"select id,name,ts from $tableName")(
Seq(1, "a2", 1000)
)
}
}
}