Skip to content

Commit

Permalink
[HUDI-3879] Suppress exceptions that are not fatal in HoodieMetadataT…
Browse files Browse the repository at this point in the history
…ableValidator (apache#5344)

Co-authored-by: yuezhang <yuezhang@freewheel.tv>
Co-authored-by: Y Ethan Guo <ethan.guoyihua@gmail.com>
  • Loading branch information
3 people authored Sep 13, 2022
1 parent a942d9d commit 1b21792
Showing 1 changed file with 24 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import org.apache.hudi.exception.HoodieException;
import org.apache.hudi.exception.HoodieIOException;
import org.apache.hudi.exception.HoodieValidationException;
import org.apache.hudi.exception.TableNotFoundException;
import org.apache.hudi.io.storage.HoodieFileReader;
import org.apache.hudi.io.storage.HoodieFileReaderFactory;
import org.apache.hudi.metadata.HoodieTableMetadata;
Expand Down Expand Up @@ -399,7 +400,9 @@ public void doMetadataTableValidation() {
Set<String> baseFilesForCleaning = Collections.emptySet();

// check metadata table is available to read.
checkMetadataTableIsAvailable();
if (!checkMetadataTableIsAvailable()) {
return;
}

if (cfg.skipDataFilesForCleaning) {
HoodieTimeline inflightCleaningTimeline = metaClient.getActiveTimeline().getCleanerTimeline().filterInflights();
Expand Down Expand Up @@ -428,6 +431,12 @@ public void doMetadataTableValidation() {

HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc);
List<String> allPartitions = validatePartitions(engineContext, basePath);

if (allPartitions.isEmpty()) {
LOG.warn("The result of getting all partitions is null or empty, skip current validation.");
return;
}

HoodieMetadataValidationContext metadataTableBasedContext =
new HoodieMetadataValidationContext(engineContext, cfg, metaClient, true);
HoodieMetadataValidationContext fsBasedContext =
Expand Down Expand Up @@ -465,18 +474,29 @@ public void doMetadataTableValidation() {
* Check metadata is initialized and available to ready.
* If not we will log.warn and skip current validation.
*/
private void checkMetadataTableIsAvailable() {
private boolean checkMetadataTableIsAvailable() {
try {
HoodieTableMetaClient mdtMetaClient = HoodieTableMetaClient.builder()
.setConf(jsc.hadoopConfiguration()).setBasePath(new Path(cfg.basePath, HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH).toString())
.setLoadActiveTimelineOnLoad(true)
.build();
int finishedInstants = mdtMetaClient.getActiveTimeline().filterCompletedInstants().countInstants();
if (finishedInstants == 0) {
throw new HoodieValidationException("There is no completed instant for metadata table.");
if (metaClient.getActiveTimeline().filterCompletedInstants().countInstants() == 0) {
LOG.info("There is no completed instant both in metadata table and corresponding data table.");
return false;
} else {
throw new HoodieValidationException("There is no completed instant for metadata table.");
}
}
return true;
} catch (TableNotFoundException tbe) {
// Suppress the TableNotFound exception if Metadata table is not available to read for now
LOG.warn("Metadata table is not found. Skip current validation.");
return false;
} catch (Exception ex) {
LOG.warn("Metadata table is not available to ready for now, ", ex);
LOG.warn("Metadata table is not available to read for now, ", ex);
return false;
}
}

Expand Down

0 comments on commit 1b21792

Please sign in to comment.