Skip to content

Commit

Permalink
Disallow full-scans for "column_stats", "bloom_filters" partitions
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey Kudinkin committed Apr 5, 2022
1 parent 3193524 commit 085b13e
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ protected String getPayloadClassFQN() {
return payloadClassFQN;
}

protected Option<String> getPartitionName() {
public Option<String> getPartitionName() {
return partitionName;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@
import static org.apache.hudi.common.util.CollectionUtils.isNullOrEmpty;
import static org.apache.hudi.common.util.CollectionUtils.toStream;
import static org.apache.hudi.common.util.ValidationUtils.checkArgument;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_BLOOM_FILTERS;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_COLUMN_STATS;
import static org.apache.hudi.metadata.HoodieTableMetadataUtil.PARTITION_NAME_FILES;

/**
* Table metadata provided by an internal DFS backed Hudi metadata table.
Expand Down Expand Up @@ -236,7 +239,8 @@ private Map<String, Option<HoodieRecord<HoodieMetadataPayload>>> readLogRecords(
// Retrieve records from log file
timer.startTimer();
if (logRecordScanner != null) {
if (metadataConfig.allowFullScan()) {
String partitionName = logRecordScanner.getPartitionName().get();
if (isFullScanAllowedForPartition(partitionName)) {
checkArgument(fullKey, "If full-scan is required, only full keys could be used!");
// Path which does full scan of log files
for (String key : keys) {
Expand Down Expand Up @@ -507,6 +511,8 @@ public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<
Option<HoodieInstant> latestMetadataInstant = metadataMetaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
String latestMetadataInstantTime = latestMetadataInstant.map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);

boolean allowFullScan = isFullScanAllowedForPartition(partitionName);

// Load the schema
Schema schema = HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
HoodieCommonConfig commonConfig = HoodieCommonConfig.newBuilder().fromProperties(metadataConfig.getProps()).build();
Expand All @@ -522,7 +528,7 @@ public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<
.withDiskMapType(commonConfig.getSpillableDiskMapType())
.withBitCaskDiskMapCompressionEnabled(commonConfig.isBitCaskDiskMapCompressionEnabled())
.withLogBlockTimestamps(validInstantTimestamps)
.allowFullScan(metadataConfig.allowFullScan())
.allowFullScan(allowFullScan)
.withPartition(partitionName)
.build();

Expand All @@ -532,6 +538,21 @@ public Pair<HoodieMetadataMergedLogRecordReader, Long> getLogRecordScanner(List<
return Pair.of(logRecordScanner, logScannerOpenMs);
}

private boolean isFullScanAllowedForPartition(String partitionName) {
// NOTE: We're allowing eager full-scan of the log-files only for "files" partition.
// Other partitions (like "column_stats", "bloom_filters") will have to be fetched
// t/h point-lookups
switch (partitionName) {
case PARTITION_NAME_FILES:
return metadataConfig.allowFullScan();

case PARTITION_NAME_COLUMN_STATS:
case PARTITION_NAME_BLOOM_FILTERS:
default:
return false;
}
}

/**
* Returns a list of commits which were rolled back as part of a Rollback or Restore operation.
*
Expand Down

0 comments on commit 085b13e

Please sign in to comment.