apache · namrathamyske · Jan 23, 2023 · Jan 23, 2023 · Jan 23, 2023 · Jan 23, 2023
diff --git a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/SparkWriteConf.java b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/SparkWriteConf.java
@@ -27,6 +27,7 @@
 import org.apache.iceberg.DistributionMode;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.IsolationLevel;
+import org.apache.iceberg.SnapshotRef;
 import org.apache.iceberg.SnapshotSummary;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableProperties;
@@ -304,4 +305,9 @@ public boolean caseSensitive() {
         .defaultValue(SQLConf.CASE_SENSITIVE().defaultValueString())
         .parse();
   }
+
+  public String branch() {
+    return confParser.stringConf().option(SparkWriteOptions.BRANCH).defaultValue(SnapshotRef.MAIN_BRANCH).parse();
+  }
+
 }
diff --git a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/SparkWriteOptions.java b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/SparkWriteOptions.java
@@ -77,4 +77,6 @@ private SparkWriteOptions() {}
 
   // Isolation Level for DataFrame calls. Currently supported by overwritePartitions
   public static final String ISOLATION_LEVEL = "isolation-level";
+  // Branch to write to
+  public static final String BRANCH = "branch";
 }
diff --git a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/source/SparkTable.java b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/source/SparkTable.java
@@ -247,9 +247,6 @@ public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
 
   @Override
   public WriteBuilder newWriteBuilder(LogicalWriteInfo info) {
-    Preconditions.checkArgument(
-        snapshotId == null, "Cannot write to table at a specific snapshot: %s", snapshotId);
 if (branch != null) { 
 Snapshot snapshot = table.currentSnapshot(); 
 Snapshot snapshot = table.currentSnapshot(); 
 if (branch != null) { 
 Snapshot snapshot = table.currentSnapshot(); 
 Snapshot snapshot = table.currentSnapshot(); 
-
     return new SparkWriteBuilder(sparkSession(), icebergTable, info);
   }
 

diff --git a/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java b/spark/v3.3/spark/src/main/java/org/apache/iceberg/spark/source/SparkWrite.java
@@ -114,6 +114,8 @@ abstract class SparkWrite implements Write, RequiresDistributionAndOrdering {
   private final SortOrder[] requiredOrdering;
 
   private boolean cleanupOnAbort = true;
+  private String branch;
+
 
   SparkWrite(
       SparkSession spark,
@@ -140,6 +142,7 @@ abstract class SparkWrite implements Write, RequiresDistributionAndOrdering {
     this.partitionedFanoutEnabled = writeConf.fanoutWriterEnabled();
     this.requiredDistribution = requiredDistribution;
     this.requiredOrdering = requiredOrdering;
+    this.branch = writeConf.branch();
   }
 
   @Override
@@ -290,7 +293,8 @@ public String toString() {
   private class BatchAppend extends BaseBatchWrite {
     @Override
     public void commit(WriterCommitMessage[] messages) {
-      AppendFiles append = table.newAppend();
+      AppendFiles append = table.newAppend().toBranch(branch);
+
 
       int numFiles = 0;
       for (DataFile file : files(messages)) {
@@ -312,7 +316,8 @@ public void commit(WriterCommitMessage[] messages) {
         return;
       }
 
-      ReplacePartitions dynamicOverwrite = table.newReplacePartitions();
+      ReplacePartitions dynamicOverwrite = table.newReplacePartitions().toBranch(branch);
+
       IsolationLevel isolationLevel = writeConf.isolationLevel();
       Long validateFromSnapshotId = writeConf.validateFromSnapshotId();
 
@@ -349,8 +354,7 @@ private OverwriteByFilter(Expression overwriteExpr) {
 
     @Override
     public void commit(WriterCommitMessage[] messages) {
-      OverwriteFiles overwriteFiles = table.newOverwrite();
-      overwriteFiles.overwriteByRowFilter(overwriteExpr);
+      OverwriteFiles overwriteFiles = table.newOverwrite().toBranch(branch).overwriteByRowFilter(overwriteExpr);
 
       int numFiles = 0;
       for (DataFile file : files(messages)) {
@@ -411,7 +415,7 @@ private Expression conflictDetectionFilter() {
 
     @Override
     public void commit(WriterCommitMessage[] messages) {
-      OverwriteFiles overwriteFiles = table.newOverwrite();
+      OverwriteFiles overwriteFiles = table.newOverwrite().toBranch(branch);
 
       List<DataFile> overwrittenFiles = overwrittenFiles();
       int numOverwrittenFiles = overwrittenFiles.size();
@@ -536,7 +540,7 @@ protected <T> void commit(SnapshotUpdate<T> snapshotUpdate, long epochId, String
     }
 
     private Long findLastCommittedEpochId() {
-      Snapshot snapshot = table.currentSnapshot();
+      Snapshot snapshot = table.snapshot(branch);
       Long lastCommittedEpochId = null;
       while (snapshot != null) {
         Map<String, String> summary = snapshot.summary();
@@ -570,7 +574,7 @@ protected String mode() {
 
     @Override
     protected void doCommit(long epochId, WriterCommitMessage[] messages) {
-      AppendFiles append = table.newFastAppend();
+      AppendFiles append = table.newFastAppend().toBranch(branch);
       int numFiles = 0;
       for (DataFile file : files(messages)) {
         append.appendFile(file);
@@ -588,7 +592,7 @@ protected String mode() {
 
     @Override
     public void doCommit(long epochId, WriterCommitMessage[] messages) {
-      OverwriteFiles overwriteFiles = table.newOverwrite();
+      OverwriteFiles overwriteFiles = table.newOverwrite().toBranch(branch);
       overwriteFiles.overwriteByRowFilter(Expressions.alwaysTrue());
       int numFiles = 0;
       for (DataFile file : files(messages)) {

diff --git a/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDataWrite.java b/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDataWrite.java
@@ -71,9 +71,18 @@ public class TestSparkDataWrite {
 
   @Rule public TemporaryFolder temp = new TemporaryFolder();
 
-  @Parameterized.Parameters(name = "format = {0}")
+  private String branch;
+
+  @Parameterized.Parameters(name = "format = {0}, branch = {1}")
   public static Object[] parameters() {
-    return new Object[] {"parquet", "avro", "orc"};
+    return new Object[][] {
+            new Object[] {"parquet", "main"},
+            new Object[] {"parquet", "testBranch"},
+            new Object[] {"avro", "main"},
+            new Object[] {"avro", "testBranch"},
+            new Object[] {"orc", "main"},
+            new Object[] {"orc", "testBranch"},
+    };
   }
 
   @BeforeClass
@@ -93,8 +102,9 @@ public static void stopSpark() {
     currentSpark.stop();
   }
 
-  public TestSparkDataWrite(String format) {
+  public TestSparkDataWrite(String format, String branch) {
     this.format = FileFormat.fromString(format);
+    this.branch = branch;
   }
 
   @Test
@@ -117,17 +127,18 @@ public void testBasicWrite() throws IOException {
         .format("iceberg")
         .option(SparkWriteOptions.WRITE_FORMAT, format.toString())
         .mode(SaveMode.Append)
+            .option("branch", branch)
         .save(location.toString());
 
     table.refresh();
 
-    Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
+    Dataset<Row> result = spark.read().format("iceberg").option("branch", branch).load(location.toString());
 
     List<SimpleRecord> actual =
         result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
     Assert.assertEquals("Number of rows should match", expected.size(), actual.size());
     Assert.assertEquals("Result rows should match", expected, actual);
-    for (ManifestFile manifest : table.currentSnapshot().allManifests(table.io())) {
+    for (ManifestFile manifest : table.snapshot(branch).allManifests(table.io())) {
       for (DataFile file : ManifestFiles.read(manifest, table.io())) {
         // TODO: avro not support split
         if (!format.equals(FileFormat.AVRO)) {
@@ -175,6 +186,7 @@ public void testAppend() throws IOException {
         .format("iceberg")
         .option(SparkWriteOptions.WRITE_FORMAT, format.toString())
         .mode(SaveMode.Append)
+            .option("branch", branch)
         .save(location.toString());
 
     df.withColumn("id", df.col("id").plus(3))
@@ -183,11 +195,12 @@ public void testAppend() throws IOException {
         .format("iceberg")
         .option(SparkWriteOptions.WRITE_FORMAT, format.toString())
         .mode(SaveMode.Append)
-        .save(location.toString());
+            .option("branch", branch)
+            .save(location.toString());
 
     table.refresh();
 
-    Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
+    Dataset<Row> result = spark.read().format("iceberg").option("branch", branch).load(location.toString());
 
     List<SimpleRecord> actual =
         result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
@@ -216,7 +229,8 @@ public void testEmptyOverwrite() throws IOException {
         .format("iceberg")
         .option(SparkWriteOptions.WRITE_FORMAT, format.toString())
         .mode(SaveMode.Append)
-        .save(location.toString());
+            .option("branch", branch)
+            .save(location.toString());
 
     Dataset<Row> empty = spark.createDataFrame(ImmutableList.of(), SimpleRecord.class);
     empty
@@ -226,11 +240,12 @@ public void testEmptyOverwrite() throws IOException {
         .option(SparkWriteOptions.WRITE_FORMAT, format.toString())
         .mode(SaveMode.Overwrite)
         .option("overwrite-mode", "dynamic")
-        .save(location.toString());
+            .option("branch", branch)
+            .save(location.toString());
 
     table.refresh();
 
-    Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
+    Dataset<Row> result = spark.read().format("iceberg").option("branch", branch).load(location.toString());
 
     List<SimpleRecord> actual =
         result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
@@ -266,7 +281,8 @@ public void testOverwrite() throws IOException {
         .format("iceberg")
         .option(SparkWriteOptions.WRITE_FORMAT, format.toString())
         .mode(SaveMode.Append)
-        .save(location.toString());
+            .option("branch", branch)
+            .save(location.toString());
 
     // overwrite with 2*id to replace record 2, append 4 and 6
     df.withColumn("id", df.col("id").multiply(2))
@@ -276,11 +292,12 @@ public void testOverwrite() throws IOException {
         .option(SparkWriteOptions.WRITE_FORMAT, format.toString())
         .mode(SaveMode.Overwrite)
         .option("overwrite-mode", "dynamic")
-        .save(location.toString());
+            .option("branch", branch)
+            .save(location.toString());
 
     table.refresh();
 
-    Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
+    Dataset<Row> result = spark.read().format("iceberg").option("branch", branch).load(location.toString());
 
     List<SimpleRecord> actual =
         result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
@@ -308,19 +325,21 @@ public void testUnpartitionedOverwrite() throws IOException {
         .format("iceberg")
         .option(SparkWriteOptions.WRITE_FORMAT, format.toString())
         .mode(SaveMode.Append)
-        .save(location.toString());
+            .option("branch", branch)
+            .save(location.toString());
 
     // overwrite with the same data; should not produce two copies
     df.select("id", "data")
         .write()
         .format("iceberg")
         .option(SparkWriteOptions.WRITE_FORMAT, format.toString())
         .mode(SaveMode.Overwrite)
-        .save(location.toString());
+            .option("branch", branch)
+            .save(location.toString());
 
     table.refresh();
 
-    Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
+    Dataset<Row> result = spark.read().format("iceberg").option("branch", branch).load(location.toString());
 
     List<SimpleRecord> actual =
         result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
@@ -354,19 +373,20 @@ public void testUnpartitionedCreateWithTargetFileSizeViaTableProperties() throws
         .format("iceberg")
         .option(SparkWriteOptions.WRITE_FORMAT, format.toString())
         .mode(SaveMode.Append)
-        .save(location.toString());
+            .option("branch", branch)
+            .save(location.toString());
 
     table.refresh();
 
-    Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
+    Dataset<Row> result = spark.read().format("iceberg").option("branch", branch).load(location.toString());
 
     List<SimpleRecord> actual =
         result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
     Assert.assertEquals("Number of rows should match", expected.size(), actual.size());
     Assert.assertEquals("Result rows should match", expected, actual);
 
     List<DataFile> files = Lists.newArrayList();
-    for (ManifestFile manifest : table.currentSnapshot().allManifests(table.io())) {
+    for (ManifestFile manifest : table.snapshot(branch).allManifests(table.io())) {
       for (DataFile file : ManifestFiles.read(manifest, table.io())) {
         files.add(file);
       }