Skip to content

Commit

Permalink
addressing comments
Browse files Browse the repository at this point in the history
  • Loading branch information
nsivabalan committed Apr 18, 2023
1 parent f8aa7b9 commit 9d90f97
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -375,21 +375,6 @@ private static List<String> getPartitionsAdded(HoodieCommitMetadata commitMetada
.collect(Collectors.toList());
}

private static List<String> getPartitionsDeleted(HoodieCommitMetadata commitMetadata) {
/*if (commitMetadata instanceof HoodieReplaceCommitMetadata
&& WriteOperationType.DELETE_PARTITION.equals(commitMetadata.getOperationType())) {
Map<String, List<String>> partitionToReplaceFileIds =
((HoodieReplaceCommitMetadata) commitMetadata).getPartitionToReplaceFileIds();
return partitionToReplaceFileIds.keySet().stream()
// We need to make sure we properly handle case of non-partitioned tables
.map(HoodieTableMetadataUtil::getPartitionIdentifier)
.collect(Collectors.toList());
}*/

return Collections.emptyList();
}

/**
* Convert commit action metadata to bloom filter records.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
package org.apache.spark.sql.hudi

import org.apache.hadoop.fs.Path
import org.apache.hudi.avro.model.HoodieCleanMetadata
import org.apache.hudi.{HoodieSparkRecordMerger, HoodieSparkUtils}
import org.apache.hudi.common.fs.FSUtils
import org.apache.hudi.common.config.HoodieStorageConfig
import org.apache.hudi.common.model.HoodieAvroRecordMerger
import org.apache.hudi.common.model.HoodieRecord.HoodieRecordType
import org.apache.hudi.common.table.HoodieTableMetaClient
import org.apache.hudi.common.table.timeline.TimelineMetadataUtils
import org.apache.hudi.config.HoodieWriteConfig
import org.apache.hudi.exception.ExceptionUtil.getRootCause
import org.apache.hudi.index.inmemory.HoodieInMemoryHashIndex
Expand Down Expand Up @@ -232,6 +234,17 @@ object HoodieSparkSqlTestBase {
metaClient.getActiveTimeline.getLastCommitMetadataWithValidData.get.getRight
}

def getLastCleanMetadata(spark: SparkSession, tablePath: String) = {
val metaClient = HoodieTableMetaClient.builder()
.setConf(spark.sparkContext.hadoopConfiguration)
.setBasePath(tablePath)
.build()

val cleanInstant = metaClient.getActiveTimeline.getCleanerTimeline.filterCompletedInstants().lastInstant().get()
TimelineMetadataUtils.deserializeHoodieCleanMetadata(metaClient
.getActiveTimeline.getInstantDetails(cleanInstant).get)
}

private def checkMessageContains(e: Throwable, text: String): Boolean =
e.getMessage.trim.contains(text.trim)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.sql.hudi

import org.apache.hudi.DataSourceWriteOptions._
import org.apache.hudi.avro.model.HoodieCleanMetadata
import org.apache.hudi.{HoodieCLIUtils, HoodieSparkUtils}
import org.apache.hudi.common.model.HoodieCommitMetadata
import org.apache.hudi.common.table.HoodieTableMetaClient
Expand All @@ -27,6 +28,7 @@ import org.apache.hudi.common.util.{PartitionPathEncodeUtils, StringUtils}
import org.apache.hudi.config.HoodieWriteConfig
import org.apache.hudi.keygen.{ComplexKeyGenerator, SimpleKeyGenerator}
import org.apache.spark.sql.SaveMode
import org.apache.spark.sql.hudi.HoodieSparkSqlTestBase.{getLastCleanMetadata, getLastCommitMetadata}
import org.junit.jupiter.api.Assertions
import org.junit.jupiter.api.Assertions.assertTrue

Expand Down Expand Up @@ -137,6 +139,9 @@ class TestAlterTableDropPartition extends HoodieSparkSqlTestBase {
spark.sql(s"call run_clean(table => '$tableName', retain_commits => 1)")
.collect()

val cleanMetadata: HoodieCleanMetadata = getLastCleanMetadata(spark, tablePath)
assertTrue(cleanMetadata.totalFilesDeleted > 0)

val partitionPath = if (urlencode) {
PartitionPathEncodeUtils.escapePathName("2021/10/01")
} else {
Expand Down Expand Up @@ -316,6 +321,9 @@ class TestAlterTableDropPartition extends HoodieSparkSqlTestBase {
spark.sql(s"call run_clean(table => '$tableName', retain_commits => 1)")
.collect()

val cleanMetadata: HoodieCleanMetadata = getLastCleanMetadata(spark, tablePath)
assertTrue(cleanMetadata.totalFilesDeleted > 0)

checkAnswer(s"select id, name, ts, year, month, day from $tableName")(
Seq(2, "l4", "v1", "2021", "10", "02")
)
Expand Down Expand Up @@ -384,6 +392,9 @@ class TestAlterTableDropPartition extends HoodieSparkSqlTestBase {
spark.sql(s"call run_clean(table => '$tableName', retain_commits => 1)")
.collect()

val cleanMetadata: HoodieCleanMetadata = getLastCleanMetadata(spark, tablePath)
assertTrue(cleanMetadata.totalFilesDeleted > 0)

// insert data
spark.sql(s"""insert into $tableName values (2, "l4", "v1", "2021", "10", "02")""")

Expand Down

0 comments on commit 9d90f97

Please sign in to comment.