Skip to content

Commit

Permalink
[HUDI-5253] HoodieMergeOnReadTableInputFormat could have duplicate re…
Browse files Browse the repository at this point in the history
…cords issue if it contains delta files while still splittable (apache#7264)
  • Loading branch information
boneanxs authored and Alexey Kudinkin committed Dec 14, 2022
1 parent a78cb09 commit 4ccee72
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ public boolean getBelongsToIncrementalQuery() {
}

public boolean isSplitable() {
return !toString().isEmpty() && !includeBootstrapFilePath();
return !toString().contains(".log") && deltaLogFiles.isEmpty() && !includeBootstrapFilePath();
}

public PathWithBootstrapFileStatus getPathWithBootstrapFileStatus() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.apache.hudi.hadoop.realtime;

import org.apache.hudi.common.model.HoodieLogFile;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.hadoop.PathWithBootstrapFileStatus;

Expand Down Expand Up @@ -65,4 +66,16 @@ void pathNotSplitableForBootstrapScenario() throws IOException {
rtPath.setPathWithBootstrapFileStatus(path);
assertFalse(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, rtPath), "Path for bootstrap should not be splitable.");
}

@Test
void pathNotSplitableIfContainsDeltaFiles() throws IOException {
URI basePath = Files.createTempFile(tempDir, "target", ".parquet").toUri();
HoodieRealtimePath rtPath = new HoodieRealtimePath(new Path("foo"), "bar", basePath.toString(), Collections.emptyList(), "000", false, Option.empty());
assertTrue(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, rtPath), "Path only contains the base file should be splittable");

URI logPath = Files.createTempFile(tempDir, ".test", ".log.4_1-149-180").toUri();
HoodieLogFile logFile = new HoodieLogFile(fs.getFileStatus(new Path(logPath)));
rtPath = new HoodieRealtimePath(new Path("foo"), "bar", basePath.toString(), Collections.singletonList(logFile), "000", false, Option.empty());
assertFalse(new HoodieMergeOnReadTableInputFormat().isSplitable(fs, rtPath), "Path contains log files should not be splittable.");
}
}

0 comments on commit 4ccee72

Please sign in to comment.