From 35a73dfff643c828fe8765278a22bb9a2d7f25c6 Mon Sep 17 00:00:00 2001 From: Scott Sandre Date: Fri, 22 Mar 2024 14:02:39 -0700 Subject: [PATCH] get ColumnWithDefaultExprUtils tp x-compile --- .../delta/ColumnWithDefaultExprUtils.scala | 14 ++----- .../spark-3.5/IncrementalExecutionShim.scala | 38 ++++++++++++++++++ .../spark-4.0/IncrementalExecutionShim.scala | 40 +++++++++++++++++++ 3 files changed, 81 insertions(+), 11 deletions(-) create mode 100644 spark/src/shims/spark-3.5/IncrementalExecutionShim.scala create mode 100644 spark/src/shims/spark-4.0/IncrementalExecutionShim.scala diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/ColumnWithDefaultExprUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/ColumnWithDefaultExprUtils.scala index 55b3e65878..f29fb2ddc3 100644 --- a/spark/src/main/scala/org/apache/spark/sql/delta/ColumnWithDefaultExprUtils.scala +++ b/spark/src/main/scala/org/apache/spark/sql/delta/ColumnWithDefaultExprUtils.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.EqualNullSafe import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._ import org.apache.spark.sql.execution.QueryExecution -import org.apache.spark.sql.execution.streaming.{IncrementalExecution, StreamExecution} +import org.apache.spark.sql.execution.streaming.{IncrementalExecution, IncrementalExecutionShim, StreamExecution} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType} @@ -210,18 +210,10 @@ object ColumnWithDefaultExprUtils extends DeltaLogging { df: DataFrame, cols: Column*): DataFrame = { val newMicroBatch = df.select(cols: _*) - val newIncrementalExecution = new IncrementalExecution( + val newIncrementalExecution = IncrementalExecutionShim.newInstance( newMicroBatch.sparkSession, newMicroBatch.queryExecution.logical, - incrementalExecution.outputMode, - incrementalExecution.checkpointLocation, - incrementalExecution.queryId, - incrementalExecution.runId, - incrementalExecution.currentBatchId, - incrementalExecution.prevOffsetSeqMetadata, - incrementalExecution.offsetSeqMetadata, - incrementalExecution.watermarkPropagator - ) + incrementalExecution) newIncrementalExecution.executedPlan // Force the lazy generation of execution plan diff --git a/spark/src/shims/spark-3.5/IncrementalExecutionShim.scala b/spark/src/shims/spark-3.5/IncrementalExecutionShim.scala new file mode 100644 index 0000000000..a1b555e0d3 --- /dev/null +++ b/spark/src/shims/spark-3.5/IncrementalExecutionShim.scala @@ -0,0 +1,38 @@ +/* + * Copyright (2024) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.streaming + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan + +object IncrementalExecutionShim { + def newInstance( + sparkSession: SparkSession, + logicalPlan: LogicalPlan, + incrementalExecution: IncrementalExecution): IncrementalExecution = new IncrementalExecution( + sparkSession, + logicalPlan, + incrementalExecution.outputMode, + incrementalExecution.checkpointLocation, + incrementalExecution.queryId, + incrementalExecution.runId, + incrementalExecution.currentBatchId, + incrementalExecution.prevOffsetSeqMetadata, + incrementalExecution.offsetSeqMetadata, + incrementalExecution.watermarkPropagator + ) +} diff --git a/spark/src/shims/spark-4.0/IncrementalExecutionShim.scala b/spark/src/shims/spark-4.0/IncrementalExecutionShim.scala new file mode 100644 index 0000000000..67f14b7f85 --- /dev/null +++ b/spark/src/shims/spark-4.0/IncrementalExecutionShim.scala @@ -0,0 +1,40 @@ +/* + * Copyright (2024) The Delta Lake Project Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.streaming + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan + +object IncrementalExecutionShim { + def newInstance( + sparkSession: SparkSession, + logicalPlan: LogicalPlan, + incrementalExecution: IncrementalExecution): IncrementalExecution = new IncrementalExecution( + sparkSession, + logicalPlan, + incrementalExecution.outputMode, + incrementalExecution.checkpointLocation, + incrementalExecution.queryId, + incrementalExecution.runId, + incrementalExecution.currentBatchId, + incrementalExecution.prevOffsetSeqMetadata, + incrementalExecution.offsetSeqMetadata, + incrementalExecution.watermarkPropagator, + incrementalExecution.isFirstBatch // Spark 4.0 API + ) + +}