From 89bb32b7a05f26eb49ec3cd2143f95dc6aeb43cf Mon Sep 17 00:00:00 2001 From: Sagar Sumit Date: Thu, 15 Sep 2022 16:46:10 +0530 Subject: [PATCH] [HUDI-4071] Remove default value for mandatory record key field --- .../apache/hudi/config/HoodieIndexConfig.java | 4 ++-- .../hudi/index/TestHoodieIndexConfigs.java | 8 ++++++-- .../bucket/TestHoodieSimpleBucketIndex.java | 1 + .../commit/TestCopyOnWriteActionExecutor.java | 3 +-- ...estHoodieSparkMergeOnReadTableCompaction.java | 10 ++++------ .../hudi/common/config/ConfigProperty.java | 4 ++-- .../keygen/constant/KeyGeneratorOptions.java | 2 +- .../quickstart/HoodieSparkQuickstart.java | 3 +-- .../quickstart/TestHoodieSparkQuickstart.java | 11 +---------- .../org/apache/hudi/DataSourceOptions.scala | 5 +---- .../org/apache/hudi/HoodieWriterUtils.scala | 1 - .../spark/sql/hudi/HoodieOptionConfig.scala | 1 - .../org/apache/hudi/TestDataSourceOptions.scala | 2 ++ .../apache/hudi/TestHoodieSparkSqlWriter.scala | 16 ++++++++++++---- ...la => TestMORDataSourceWithBucketIndex.scala} | 16 +++++++--------- 15 files changed, 41 insertions(+), 46 deletions(-) rename hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/{TestMORDataSourceWithBucket.scala => TestMORDataSourceWithBucketIndex.scala} (99%) diff --git a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java index 57e201bb21eed..17e05ebd6325e 100644 --- a/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java +++ b/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java @@ -676,10 +676,10 @@ private void validateBucketIndexConfig() { // check the bucket index hash field if (StringUtils.isNullOrEmpty(hoodieIndexConfig.getString(BUCKET_INDEX_HASH_FIELD))) { hoodieIndexConfig.setValue(BUCKET_INDEX_HASH_FIELD, - hoodieIndexConfig.getStringOrDefault(KeyGeneratorOptions.RECORDKEY_FIELD_NAME)); + hoodieIndexConfig.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME)); } else { boolean valid = Arrays - .stream(hoodieIndexConfig.getStringOrDefault(KeyGeneratorOptions.RECORDKEY_FIELD_NAME).split(",")) + .stream(hoodieIndexConfig.getString(KeyGeneratorOptions.RECORDKEY_FIELD_NAME).split(",")) .collect(Collectors.toSet()) .containsAll(Arrays.asList(hoodieIndexConfig.getString(BUCKET_INDEX_HASH_FIELD).split(","))); if (!valid) { diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/TestHoodieIndexConfigs.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/TestHoodieIndexConfigs.java index 4f32f0ec10617..632fabebb7f97 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/TestHoodieIndexConfigs.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/TestHoodieIndexConfigs.java @@ -31,6 +31,7 @@ import org.apache.hudi.index.hbase.SparkHoodieHBaseIndex; import org.apache.hudi.index.inmemory.HoodieInMemoryHashIndex; import org.apache.hudi.index.simple.HoodieSimpleIndex; +import org.apache.hudi.keygen.constant.KeyGeneratorOptions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -39,6 +40,7 @@ import org.junit.jupiter.params.provider.EnumSource; import java.nio.file.Path; +import java.util.Properties; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -88,13 +90,15 @@ public void testCreateIndex(IndexType indexType) { assertTrue(SparkHoodieIndexFactory.createIndex(config) instanceof SparkHoodieHBaseIndex); break; case BUCKET: + Properties props = new Properties(); + props.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid"); config = clientConfigBuilder.withPath(basePath) - .withIndexConfig(indexConfigBuilder.withIndexType(IndexType.BUCKET) + .withIndexConfig(indexConfigBuilder.fromProperties(props).withIndexType(IndexType.BUCKET) .withBucketIndexEngineType(HoodieIndex.BucketIndexEngineType.SIMPLE).build()).build(); assertTrue(SparkHoodieIndexFactory.createIndex(config) instanceof HoodieSimpleBucketIndex); config = HoodieWriteConfig.newBuilder().withPath(basePath) - .withIndexConfig(indexConfigBuilder.withIndexType(IndexType.BUCKET) + .withIndexConfig(indexConfigBuilder.fromProperties(props).withIndexType(IndexType.BUCKET) .withBucketIndexEngineType(HoodieIndex.BucketIndexEngineType.CONSISTENT_HASHING).build()) .build(); assertTrue(SparkHoodieIndexFactory.createIndex(config) instanceof HoodieSparkConsistentBucketIndex); diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestHoodieSimpleBucketIndex.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestHoodieSimpleBucketIndex.java index a96ce04077088..00a7b3688e952 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestHoodieSimpleBucketIndex.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/index/bucket/TestHoodieSimpleBucketIndex.java @@ -82,6 +82,7 @@ public void testBucketIndexValidityCheck() { .withBucketIndexEngineType(HoodieIndex.BucketIndexEngineType.SIMPLE) .withBucketNum("8").build(); }); + props.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid"); props.setProperty(HoodieIndexConfig.BUCKET_INDEX_HASH_FIELD.key(), "uuid"); HoodieIndexConfig.newBuilder().fromProperties(props) .withIndexType(HoodieIndex.IndexType.BUCKET) diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java index f165b48db05d1..907520bfe219d 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/action/commit/TestCopyOnWriteActionExecutor.java @@ -145,18 +145,17 @@ private Properties makeIndexConfig(HoodieIndex.IndexType indexType) { Properties props = new Properties(); HoodieIndexConfig.Builder indexConfig = HoodieIndexConfig.newBuilder() .withIndexType(indexType); - props.putAll(indexConfig.build().getProps()); if (indexType.equals(HoodieIndex.IndexType.BUCKET)) { props.setProperty(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "_row_key"); indexConfig.fromProperties(props) .withIndexKeyField("_row_key") .withBucketNum("1") .withBucketIndexEngineType(HoodieIndex.BucketIndexEngineType.SIMPLE); - props.putAll(indexConfig.build().getProps()); props.putAll(HoodieLayoutConfig.newBuilder().fromProperties(props) .withLayoutType(HoodieStorageLayout.LayoutType.BUCKET.name()) .withLayoutPartitioner(SparkBucketIndexPartitioner.class.getName()).build().getProps()); } + props.putAll(indexConfig.build().getProps()); return props; } diff --git a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java index f959a8f0d9526..8f326b5f9f623 100644 --- a/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java +++ b/hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/table/functional/TestHoodieSparkMergeOnReadTableCompaction.java @@ -86,6 +86,7 @@ public void setup() { @Test public void testWriteDuringCompaction() throws IOException { + Properties props = getPropertiesForKeyGen(true); HoodieWriteConfig config = HoodieWriteConfig.newBuilder() .forTable("test-trip-table") .withPath(basePath()) @@ -99,10 +100,8 @@ public void testWriteDuringCompaction() throws IOException { .withLayoutConfig(HoodieLayoutConfig.newBuilder() .withLayoutType(HoodieStorageLayout.LayoutType.BUCKET.name()) .withLayoutPartitioner(SparkBucketIndexPartitioner.class.getName()).build()) - .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build()) + .withIndexConfig(HoodieIndexConfig.newBuilder().fromProperties(props).withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build()) .build(); - - Properties props = getPropertiesForKeyGen(true); props.putAll(config.getProps()); metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, props); @@ -131,6 +130,7 @@ public void testWriteDuringCompaction() throws IOException { @ParameterizedTest @MethodSource("writeLogTest") public void testWriteLogDuringCompaction(boolean enableMetadataTable, boolean enableTimelineServer) throws IOException { + Properties props = getPropertiesForKeyGen(true); HoodieWriteConfig config = HoodieWriteConfig.newBuilder() .forTable("test-trip-table") .withPath(basePath()) @@ -144,10 +144,8 @@ public void testWriteLogDuringCompaction(boolean enableMetadataTable, boolean en .withLayoutConfig(HoodieLayoutConfig.newBuilder() .withLayoutType(HoodieStorageLayout.LayoutType.BUCKET.name()) .withLayoutPartitioner(SparkBucketIndexPartitioner.class.getName()).build()) - .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build()) + .withIndexConfig(HoodieIndexConfig.newBuilder().fromProperties(props).withIndexType(HoodieIndex.IndexType.BUCKET).withBucketNum("1").build()) .build(); - - Properties props = getPropertiesForKeyGen(true); props.putAll(config.getProps()); metaClient = getHoodieMetaClient(HoodieTableType.MERGE_ON_READ, props); diff --git a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigProperty.java b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigProperty.java index 934803d8d315e..08f36512c9150 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigProperty.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/config/ConfigProperty.java @@ -27,9 +27,9 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.function.Function; -import java.util.Objects; /** * ConfigProperty describes a configuration property. It contains the configuration @@ -76,7 +76,7 @@ public String key() { public T defaultValue() { if (defaultValue == null) { - throw new HoodieException("There's no default value for this config"); + throw new HoodieException(String.format("There's no default value for this config: %s", key)); } return defaultValue; } diff --git a/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java b/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java index ff182c4c1661f..a2be94a453e10 100644 --- a/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java +++ b/hudi-common/src/main/java/org/apache/hudi/keygen/constant/KeyGeneratorOptions.java @@ -45,7 +45,7 @@ public class KeyGeneratorOptions extends HoodieConfig { public static final ConfigProperty RECORDKEY_FIELD_NAME = ConfigProperty .key("hoodie.datasource.write.recordkey.field") - .defaultValue("uuid") + .noDefaultValue() .withDocumentation("Record key field. Value to be used as the `recordKey` component of `HoodieKey`.\n" + "Actual value will be obtained by invoking .toString() on the field value. Nested fields can be specified using\n" + "the dot notation eg: `a.b.c`"); diff --git a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java index 5a6db78f882e3..7c566e91877fd 100644 --- a/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java +++ b/hudi-examples/hudi-examples-spark/src/main/java/org/apache/hudi/examples/quickstart/HoodieSparkQuickstart.java @@ -123,7 +123,6 @@ public static void insertOverwriteData(SparkSession spark, JavaSparkContext jsc, .save(tablePath); } - /** * Load the data files into a DataFrame. */ @@ -185,7 +184,7 @@ public static void delete(SparkSession spark, String tablePath, String tableName df.write().format("org.apache.hudi") .options(QuickstartUtils.getQuickstartWriteConfigs()) .option(HoodieWriteConfig.PRECOMBINE_FIELD_NAME.key(), "ts") - .option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "uuid") + .option(KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key(), "uuid") .option(KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key(), "partitionpath") .option(TBL_NAME.key(), tableName) .option("hoodie.datasource.write.operation", WriteOperationType.DELETE.value()) diff --git a/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieSparkQuickstart.java b/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieSparkQuickstart.java index b9ab120460581..8297c788d8fd6 100644 --- a/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieSparkQuickstart.java +++ b/hudi-examples/hudi-examples-spark/src/test/java/org/apache/hudi/examples/quickstart/TestHoodieSparkQuickstart.java @@ -21,9 +21,8 @@ import org.apache.hudi.client.HoodieReadClient; import org.apache.hudi.client.SparkRDDWriteClient; import org.apache.hudi.client.common.HoodieSparkEngineContext; -import org.apache.hudi.common.model.HoodieAvroPayload; -import org.apache.hudi.examples.common.HoodieExampleDataGenerator; import org.apache.hudi.testutils.providers.SparkProvider; + import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.SQLContext; @@ -36,15 +35,7 @@ import java.io.File; import java.nio.file.Paths; -import static org.apache.hudi.examples.quickstart.HoodieSparkQuickstart.delete; -import static org.apache.hudi.examples.quickstart.HoodieSparkQuickstart.deleteByPartition; -import static org.apache.hudi.examples.quickstart.HoodieSparkQuickstart.incrementalQuery; -import static org.apache.hudi.examples.quickstart.HoodieSparkQuickstart.insertData; -import static org.apache.hudi.examples.quickstart.HoodieSparkQuickstart.insertOverwriteData; -import static org.apache.hudi.examples.quickstart.HoodieSparkQuickstart.pointInTimeQuery; -import static org.apache.hudi.examples.quickstart.HoodieSparkQuickstart.queryData; import static org.apache.hudi.examples.quickstart.HoodieSparkQuickstart.runQuickstart; -import static org.apache.hudi.examples.quickstart.HoodieSparkQuickstart.updateData; public class TestHoodieSparkQuickstart implements SparkProvider { protected static HoodieSparkEngineContext context; diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala index c694174b8c79a..e452be1b37cc8 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DataSourceOptions.scala @@ -605,9 +605,6 @@ object DataSourceWriteOptions { val RECORDKEY_FIELD_OPT_KEY = KeyGeneratorOptions.RECORDKEY_FIELD_NAME.key() /** @deprecated Use {@link RECORDKEY_FIELD} and its methods instead */ @Deprecated - val DEFAULT_RECORDKEY_FIELD_OPT_VAL = RECORDKEY_FIELD.defaultValue() - /** @deprecated Use {@link PARTITIONPATH_FIELD} and its methods instead */ - @Deprecated val PARTITIONPATH_FIELD_OPT_KEY = KeyGeneratorOptions.PARTITIONPATH_FIELD_NAME.key() /** @deprecated Use {@link PARTITIONPATH_FIELD} and its methods instead */ @Deprecated @@ -789,7 +786,7 @@ object DataSourceOptionsHelper { val partitionFields = props.getString(DataSourceWriteOptions.PARTITIONPATH_FIELD.key(), null) if (partitionFields != null) { val numPartFields = partitionFields.split(",").length - val recordsKeyFields = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD.key(), DataSourceWriteOptions.RECORDKEY_FIELD.defaultValue()) + val recordsKeyFields = props.getString(DataSourceWriteOptions.RECORDKEY_FIELD.key()) val numRecordKeyFields = recordsKeyFields.split(",").length if (numPartFields == 1 && numRecordKeyFields == 1) { classOf[SimpleKeyGenerator].getName diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala index aa29bad6b03d0..ca00d09ea9617 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieWriterUtils.scala @@ -58,7 +58,6 @@ object HoodieWriterUtils { hoodieConfig.setDefaultValue(TABLE_TYPE) hoodieConfig.setDefaultValue(PRECOMBINE_FIELD) hoodieConfig.setDefaultValue(PAYLOAD_CLASS_NAME) - hoodieConfig.setDefaultValue(RECORDKEY_FIELD) hoodieConfig.setDefaultValue(KEYGENERATOR_CLASS_NAME) hoodieConfig.setDefaultValue(ENABLE) hoodieConfig.setDefaultValue(COMMIT_METADATA_KEYPREFIX) diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala index 732367cf5a5e5..7efb60ae0b6a5 100644 --- a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala +++ b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieOptionConfig.scala @@ -46,7 +46,6 @@ object HoodieOptionConfig { .withSqlKey("primaryKey") .withHoodieKey(DataSourceWriteOptions.RECORDKEY_FIELD.key) .withTableConfigKey(HoodieTableConfig.RECORDKEY_FIELDS.key) - .defaultValue(DataSourceWriteOptions.RECORDKEY_FIELD.defaultValue()) .build() val SQL_KEY_TABLE_TYPE: HoodieSQLOption[String] = buildConf() diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceOptions.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceOptions.scala index 9920aa80baf09..302ddf38682ce 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceOptions.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestDataSourceOptions.scala @@ -27,6 +27,7 @@ import org.junit.jupiter.api.Test class TestDataSourceOptions { @Test def inferDataSourceOptions(): Unit = { val inputOptions1 = Map( + RECORDKEY_FIELD.key -> "uuid", TABLE_NAME.key -> "hudi_table", PARTITIONPATH_FIELD.key -> "year,month" ) @@ -38,6 +39,7 @@ class TestDataSourceOptions { modifiedOptions1(HoodieSyncConfig.META_SYNC_PARTITION_EXTRACTOR_CLASS.key)) val inputOptions2 = Map( + RECORDKEY_FIELD.key -> "uuid", TABLE_NAME.key -> "hudi_table", PARTITIONPATH_FIELD.key -> "year", HIVE_STYLE_PARTITIONING.key -> "true" diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala index 93469f2796cf9..bd1fbb104fa05 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/TestHoodieSparkSqlWriter.scala @@ -261,14 +261,22 @@ class TestHoodieSparkSqlWriter { @Test def testThrowExceptionAlreadyExistsWithAppendSaveMode(): Unit = { //create a new table - val fooTableModifier = Map("path" -> tempBasePath, HoodieWriteConfig.TBL_NAME.key -> hoodieFooTableName, - "hoodie.insert.shuffle.parallelism" -> "4", "hoodie.upsert.shuffle.parallelism" -> "4") + val fooTableModifier = Map( + "path" -> tempBasePath, + HoodieWriteConfig.TBL_NAME.key -> hoodieFooTableName, + "hoodie.datasource.write.recordkey.field" -> "uuid", + "hoodie.insert.shuffle.parallelism" -> "4", + "hoodie.upsert.shuffle.parallelism" -> "4") val dataFrame = spark.createDataFrame(Seq(StringLongTest(UUID.randomUUID().toString, new Date().getTime))) HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, fooTableModifier, dataFrame) //on same path try append with different("hoodie_bar_tbl") table name which should throw an exception - val barTableModifier = Map("path" -> tempBasePath, HoodieWriteConfig.TBL_NAME.key -> "hoodie_bar_tbl", - "hoodie.insert.shuffle.parallelism" -> "4", "hoodie.upsert.shuffle.parallelism" -> "4") + val barTableModifier = Map( + "path" -> tempBasePath, + HoodieWriteConfig.TBL_NAME.key -> "hoodie_bar_tbl", + "hoodie.datasource.write.recordkey.field" -> "uuid", + "hoodie.insert.shuffle.parallelism" -> "4", + "hoodie.upsert.shuffle.parallelism" -> "4") val dataFrame2 = spark.createDataFrame(Seq(StringLongTest(UUID.randomUUID().toString, new Date().getTime))) val tableAlreadyExistException = intercept[HoodieException](HoodieSparkSqlWriter.write(sqlContext, SaveMode.Append, barTableModifier, dataFrame2)) assert(tableAlreadyExistException.getMessage.contains("Config conflict")) diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSourceWithBucket.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSourceWithBucketIndex.scala similarity index 99% rename from hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSourceWithBucket.scala rename to hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSourceWithBucketIndex.scala index 57ebd038f2ae1..187de2d8e0671 100644 --- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSourceWithBucket.scala +++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestMORDataSourceWithBucketIndex.scala @@ -17,27 +17,25 @@ package org.apache.hudi.functional -import scala.collection.JavaConversions._ - import org.apache.hudi.common.testutils.HoodieTestDataGenerator -import org.apache.hudi.config.{HoodieIndexConfig, HoodieLayoutConfig, HoodieWriteConfig} -import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers} import org.apache.hudi.common.testutils.RawTripTestPayload.recordsToStrings +import org.apache.hudi.config.{HoodieIndexConfig, HoodieLayoutConfig, HoodieWriteConfig} import org.apache.hudi.index.HoodieIndex.IndexType import org.apache.hudi.keygen.constant.KeyGeneratorOptions +import org.apache.hudi.table.action.commit.SparkBucketIndexPartitioner +import org.apache.hudi.table.storage.HoodieStorageLayout import org.apache.hudi.testutils.HoodieClientTestBase - +import org.apache.hudi.{DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers} import org.apache.spark.sql._ -import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue} +import org.junit.jupiter.api.{AfterEach, BeforeEach, Test} -import org.apache.hudi.table.action.commit.SparkBucketIndexPartitioner -import org.apache.hudi.table.storage.HoodieStorageLayout +import scala.collection.JavaConversions._ /** * */ -class TestDataSourceForBucketIndex extends HoodieClientTestBase { +class TestMORDataSourceWithBucketIndex extends HoodieClientTestBase { var spark: SparkSession = null val commonOpts = Map(