diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java index 80cc56a4e579..9d1b0cbd8730 100644 --- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java +++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/HoodieMetadataTableValidator.java @@ -52,6 +52,7 @@ import org.apache.hudi.exception.HoodieException; import org.apache.hudi.exception.HoodieIOException; import org.apache.hudi.exception.HoodieValidationException; +import org.apache.hudi.exception.TableNotFoundException; import org.apache.hudi.io.storage.HoodieFileReader; import org.apache.hudi.io.storage.HoodieFileReaderFactory; import org.apache.hudi.metadata.HoodieTableMetadata; @@ -399,7 +400,9 @@ public void doMetadataTableValidation() { Set baseFilesForCleaning = Collections.emptySet(); // check metadata table is available to read. - checkMetadataTableIsAvailable(); + if (!checkMetadataTableIsAvailable()) { + return; + } if (cfg.skipDataFilesForCleaning) { HoodieTimeline inflightCleaningTimeline = metaClient.getActiveTimeline().getCleanerTimeline().filterInflights(); @@ -428,6 +431,12 @@ public void doMetadataTableValidation() { HoodieSparkEngineContext engineContext = new HoodieSparkEngineContext(jsc); List allPartitions = validatePartitions(engineContext, basePath); + + if (allPartitions.isEmpty()) { + LOG.warn("The result of getting all partitions is null or empty, skip current validation."); + return; + } + HoodieMetadataValidationContext metadataTableBasedContext = new HoodieMetadataValidationContext(engineContext, cfg, metaClient, true); HoodieMetadataValidationContext fsBasedContext = @@ -465,7 +474,7 @@ public void doMetadataTableValidation() { * Check metadata is initialized and available to ready. * If not we will log.warn and skip current validation. */ - private void checkMetadataTableIsAvailable() { + private boolean checkMetadataTableIsAvailable() { try { HoodieTableMetaClient mdtMetaClient = HoodieTableMetaClient.builder() .setConf(jsc.hadoopConfiguration()).setBasePath(new Path(cfg.basePath, HoodieTableMetaClient.METADATA_TABLE_FOLDER_PATH).toString()) @@ -473,10 +482,21 @@ private void checkMetadataTableIsAvailable() { .build(); int finishedInstants = mdtMetaClient.getActiveTimeline().filterCompletedInstants().countInstants(); if (finishedInstants == 0) { - throw new HoodieValidationException("There is no completed instant for metadata table."); + if (metaClient.getActiveTimeline().filterCompletedInstants().countInstants() == 0) { + LOG.info("There is no completed instant both in metadata table and corresponding data table."); + return false; + } else { + throw new HoodieValidationException("There is no completed instant for metadata table."); + } } + return true; + } catch (TableNotFoundException tbe) { + // Suppress the TableNotFound exception if Metadata table is not available to read for now + LOG.warn("Metadata table is not found. Skip current validation."); + return false; } catch (Exception ex) { - LOG.warn("Metadata table is not available to ready for now, ", ex); + LOG.warn("Metadata table is not available to read for now, ", ex); + return false; } }