Skip to content

Commit

Permalink
fix show partitions check
Browse files Browse the repository at this point in the history
  • Loading branch information
XuQianJin-Stars authored and codope committed Mar 31, 2022
1 parent 4d16ddf commit 5acbedb
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,22 @@

package org.apache.hudi.table.action.clean;

import java.io.IOException;
import java.io.Serializable;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hudi.avro.model.HoodieCleanMetadata;
import org.apache.hudi.avro.model.HoodieSavepointMetadata;
import org.apache.hudi.common.engine.HoodieEngineContext;
import org.apache.hudi.common.fs.FSUtils;
import org.apache.hudi.common.model.CleanFileInfo;
import org.apache.hudi.common.model.CompactionOperation;
import org.apache.hudi.common.model.FileSlice;
Expand All @@ -45,24 +57,11 @@
import org.apache.hudi.config.HoodieWriteConfig;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieSavepointException;
import org.apache.hudi.metadata.FileSystemBackedTableMetadata;
import org.apache.hudi.table.HoodieTable;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

import java.io.IOException;
import java.io.Serializable;
import java.time.Instant;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Cleaner is responsible for garbage collecting older files in a given partition path. Such that
* <p>
Expand Down Expand Up @@ -206,7 +205,15 @@ private List<String> getPartitionPathsForIncrementalCleaning(HoodieCleanMetadata
*/
private List<String> getPartitionPathsForFullCleaning() {
// Go to brute force mode of scanning all partitions
return FSUtils.getAllPartitionPaths(context, config.getMetadataConfig(), config.getBasePath());
try {
// Because the partition of BaseTableMetadata has been deleted,
// all partition information can only be obtained from FileSystemBackedTableMetadata.
FileSystemBackedTableMetadata fsBackedTableMetadata = new FileSystemBackedTableMetadata(context,
context.getHadoopConf(), config.getBasePath(), config.shouldAssumeDatePartitioning());
return fsBackedTableMetadata.getAllPartitionPaths();
} catch (IOException e) {
return Collections.emptyList();
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,23 @@ public static HoodieRecord<HoodieMetadataPayload> createPartitionListRecord(List
return new HoodieAvroRecord<>(key, payload);
}

/**
* Create and return a {@code HoodieMetadataPayload} to save list of partitions.
*
* @param partitionsAdded The list of added partitions
* @param partitionsDeleted The list of deleted partitions
*/
public static HoodieRecord<HoodieMetadataPayload> createPartitionListRecord(List<String> partitionsAdded, List<String> partitionsDeleted) {
Map<String, HoodieMetadataFileInfo> fileInfo = new HashMap<>();
partitionsAdded.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, false)));
partitionsDeleted.forEach(partition -> fileInfo.put(partition, new HoodieMetadataFileInfo(0L, true)));

HoodieKey key = new HoodieKey(RECORDKEY_PARTITION_LIST, MetadataPartitionType.FILES.getPartitionPath());
HoodieMetadataPayload payload = new HoodieMetadataPayload(key.getRecordKey(), METADATA_TYPE_PARTITION_LIST,
fileInfo);
return new HoodieAvroRecord<>(key, payload);
}

/**
* Create and return a {@code HoodieMetadataPayload} to save list of files within a partition.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,35 @@

package org.apache.hudi.metadata;

import static org.apache.hudi.avro.HoodieAvroUtils.addMetadataFields;
import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.COLUMN_RANGE_MERGE_FUNCTION;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MAX;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MIN;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.NULL_COUNT;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_SIZE;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_UNCOMPRESSED_SIZE;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.VALUE_COUNT;
import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME;
import static org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.annotation.Nonnull;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
Expand All @@ -38,7 +67,9 @@
import org.apache.hudi.common.model.HoodieDeltaWriteStat;
import org.apache.hudi.common.model.HoodieFileFormat;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.model.HoodieReplaceCommitMetadata;
import org.apache.hudi.common.model.HoodieWriteStat;
import org.apache.hudi.common.model.WriteOperationType;
import org.apache.hudi.common.table.HoodieTableConfig;
import org.apache.hudi.common.table.HoodieTableMetaClient;
import org.apache.hudi.common.table.TableSchemaResolver;
Expand All @@ -61,37 +92,6 @@
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.hudi.avro.HoodieAvroUtils.addMetadataFields;
import static org.apache.hudi.avro.HoodieAvroUtils.getNestedFieldValAsString;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.COLUMN_RANGE_MERGE_FUNCTION;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MAX;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.MIN;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.NULL_COUNT;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_SIZE;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.TOTAL_UNCOMPRESSED_SIZE;
import static org.apache.hudi.common.model.HoodieColumnRangeMetadata.Stats.VALUE_COUNT;
import static org.apache.hudi.common.util.StringUtils.isNullOrEmpty;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.metadata.HoodieTableMetadata.EMPTY_PARTITION_NAME;
import static org.apache.hudi.metadata.HoodieTableMetadata.NON_PARTITIONED_NAME;

/**
* A utility to convert timeline information to metadata table records.
*/
Expand Down Expand Up @@ -163,7 +163,10 @@ public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCo
// Add record bearing added partitions list
List<String> partitionsAdded = new ArrayList<>(commitMetadata.getPartitionToWriteStats().keySet());

records.add(HoodieMetadataPayload.createPartitionListRecord(partitionsAdded));
// Add record bearing deleted partitions list
List<String> partitionsDeleted = getPartitionsDeleted(commitMetadata);

records.add(HoodieMetadataPayload.createPartitionListRecord(partitionsAdded, partitionsDeleted));

// Update files listing records for each individual partition
List<HoodieRecord<HoodieMetadataPayload>> updatedPartitionFilesRecords =
Expand Down Expand Up @@ -213,6 +216,18 @@ public static List<HoodieRecord> convertMetadataToFilesPartitionRecords(HoodieCo
return records;
}

private static ArrayList<String> getPartitionsDeleted(HoodieCommitMetadata commitMetadata) {
if (commitMetadata instanceof HoodieReplaceCommitMetadata
&& WriteOperationType.DELETE_PARTITION.equals(commitMetadata.getOperationType())) {
Map<String, List<String>> partitionToReplaceFileIds =
((HoodieReplaceCommitMetadata) commitMetadata).getPartitionToReplaceFileIds();
if (!partitionToReplaceFileIds.isEmpty()) {
return new ArrayList<>(partitionToReplaceFileIds.keySet());
}
}
return new ArrayList<>();
}

/**
* Convert commit action metadata to bloom filter records.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,12 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
checkAnswer(s"select dt from $tableName")(Seq(s"2021/10/02"))
assertResult(true)(existsPath(s"${tmp.getCanonicalPath}/$tableName/$partitionPath"))

// TODO (HUDI-3135): These validations are failing. Due to lazy deletion,
// cleaner will delete the partition when it kicks in, however the replacecommit get written in the timeline.
// We should check why FSUtils#getAllPartitionPaths does not exclude replaced filegroups.
// show partitions
/*if (urlencode) {
if (urlencode) {
checkAnswer(s"show partitions $tableName")(Seq(PartitionPathEncodeUtils.escapePathName("2021/10/02")))
} else {
checkAnswer(s"show partitions $tableName")(Seq("2021/10/02"))
}*/
}
}
}
}
Expand Down Expand Up @@ -174,15 +171,12 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
checkAnswer(s"select dt from $tableName")(Seq(s"2021/10/02"))
assertResult(false)(existsPath(s"${tmp.getCanonicalPath}/$tableName/$partitionPath"))

// TODO (HUDI-3135): These validations are failing. Due to lazy deletion,
// cleaner will delete the partition when it kicks in, however the replacecommit get written in the timeline.
// We should check why FSUtils#getAllPartitionPaths does not exclude replaced filegroups.
// show partitions
/*if (urlencode) {
if (urlencode) {
checkAnswer(s"show partitions $tableName")(Seq(PartitionPathEncodeUtils.escapePathName("2021/10/02")))
} else {
checkAnswer(s"show partitions $tableName")(Seq("2021/10/02"))
}*/
}
}
}
}
Expand Down Expand Up @@ -217,11 +211,8 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {

checkAnswer(s"select id, name, ts, dt from $tableName")(Seq(2, "l4", "v1", "2021-10-02"))

// TODO (HUDI-3135): These validations are failing. Due to lazy deletion,
// cleaner will delete the partition when it kicks in, however the replacecommit get written in the timeline.
// We should check why FSUtils#getAllPartitionPaths does not exclude replaced filegroups.
// show partitions
// checkAnswer(s"show partitions $tableName")(Seq("dt=2021-10-02"))
checkAnswer(s"show partitions $tableName")(Seq("dt=2021-10-02"))
}

Seq(false, true).foreach { hiveStyle =>
Expand Down Expand Up @@ -265,15 +256,12 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
Seq(2, "l4", "v1", "2021", "10", "02")
)

// TODO (HUDI-3135): These validations are failing. Due to lazy deletion,
// cleaner will delete the partition when it kicks in, however the replacecommit gets written in the timeline.
// We should check why FSUtils#getAllPartitionPaths does not exclude replaced filegroups.
// show partitions
/*if (hiveStyle) {
if (hiveStyle) {
checkAnswer(s"show partitions $tableName")(Seq("year=2021/month=10/day=02"))
} else {
checkAnswer(s"show partitions $tableName")(Seq("2021/10/02"))
}*/
}
}
}
}
Expand Down Expand Up @@ -317,15 +305,12 @@ class TestAlterTableDropPartition extends TestHoodieSqlBase {
assertResult(false)(existsPath(
s"${tmp.getCanonicalPath}/$tableName/year=2021/month=10/day=01"))

// TODO (HUDI-3135): These validations are failing. Due to lazy deletion,
// cleaner will delete the partition when it kicks in, however the replacecommit get written in the timeline.
// We should check why FSUtils#getAllPartitionPaths does not exclude replaced filegroups.
// show partitions
/*if (hiveStyle) {
if (hiveStyle) {
checkAnswer(s"show partitions $tableName")(Seq("year=2021/month=10/day=02"))
} else {
checkAnswer(s"show partitions $tableName")(Seq("2021/10/02"))
}*/
}
}
}
}
Expand Down

0 comments on commit 5acbedb

Please sign in to comment.