NVIDIA · jlowe · Sep 22, 2023 · Sep 19, 2023 · Sep 19, 2023 · Sep 21, 2023
diff --git a/...ke/common/src/main/databricks/scala/com/nvidia/spark/rapids/delta/DeltaProviderImpl.scala b/...ke/common/src/main/databricks/scala/com/nvidia/spark/rapids/delta/DeltaProviderImpl.scala
@@ -16,14 +16,15 @@
 
 package com.nvidia.spark.rapids.delta
 
-import com.databricks.sql.transaction.tahoe.DeltaLog
+import com.databricks.sql.transaction.tahoe.{DeltaLog, DeltaParquetFileFormat}
 import com.databricks.sql.transaction.tahoe.commands.{DeleteCommand, DeleteCommandEdge, MergeIntoCommand, MergeIntoCommandEdge, UpdateCommand, UpdateCommandEdge}
 import com.databricks.sql.transaction.tahoe.sources.DeltaDataSource
 import com.nvidia.spark.rapids._
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.command.RunnableCommand
-import org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand
+import org.apache.spark.sql.execution.datasources.{FileFormat, SaveIntoDataSourceCommand}
 import org.apache.spark.sql.rapids.ExternalSource
 import org.apache.spark.sql.sources.CreatableRelationProvider
 
@@ -72,6 +73,25 @@ object DeltaProviderImpl extends DeltaProviderImplBase {
           .disabledByDefault("Delta Lake update support is experimental")
     ).map(r => (r.getClassFor.asSubclass(classOf[RunnableCommand]), r)).toMap
   }
+
+  override def isSupportedFormat(format: Class[_ <: FileFormat]): Boolean = {
+    format == classOf[DeltaParquetFileFormat]
+  }
+
+  override def tagSupportForGpuFileSourceScan(meta: SparkPlanMeta[FileSourceScanExec]): Unit = {
+    val format = meta.wrapped.relation.fileFormat
+    if (format.getClass == classOf[DeltaParquetFileFormat]) {
+      GpuReadParquetFileFormat.tagSupport(meta)
+      GpuDeltaParquetFileFormat.tagSupportForGpuFileSourceScan(meta)
+    } else {
+      meta.willNotWorkOnGpu(s"format ${format.getClass} is not supported")
+    }
+  }
+
+  override def getReadFileFormat(format: FileFormat): FileFormat = {
+    val cpuFormat = format.asInstanceOf[DeltaParquetFileFormat]
+    GpuDeltaParquetFileFormat.convertToGpu(cpuFormat)
+  }
 }
 
 class DeltaCreatableRelationProviderMeta(

diff --git a/...c/main/databricks/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormatBase.scala b/...c/main/databricks/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormatBase.scala
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.delta
+
+import com.databricks.sql.transaction.tahoe.{DeltaColumnMapping, DeltaColumnMappingMode, NoMapping}
+import com.nvidia.spark.rapids.{GpuMetric, GpuParquetMultiFilePartitionReaderFactory, GpuReadParquetFileFormat}
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.rapids.GpuFileSourceScanExec
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.SerializableConfiguration
+
+abstract class GpuDeltaParquetFileFormatBase extends GpuReadParquetFileFormat {
+  val columnMappingMode: DeltaColumnMappingMode
+  val referenceSchema: StructType
+
+  def prepareSchema(inputSchema: StructType): StructType = {
+    DeltaColumnMapping.createPhysicalSchema(inputSchema, referenceSchema, columnMappingMode)
+  }
+
+  override def createMultiFileReaderFactory(
+      broadcastedConf: Broadcast[SerializableConfiguration],
+      pushedFilters: Array[Filter],
+      fileScan: GpuFileSourceScanExec): PartitionReaderFactory = {
+    GpuParquetMultiFilePartitionReaderFactory(
+      fileScan.conf,
+      broadcastedConf,
+      prepareSchema(fileScan.relation.dataSchema),
+      prepareSchema(fileScan.requiredSchema),
+      prepareSchema(fileScan.readPartitionSchema),
+      pushedFilters,
+      fileScan.rapidsConf,
+      fileScan.allMetrics,
+      fileScan.queryUsesInputFile,
+      fileScan.alluxioPathsMap)
+  }
+
+  override def buildReaderWithPartitionValuesAndMetrics(
+      sparkSession: SparkSession,
+      dataSchema: StructType,
+      partitionSchema: StructType,
+      requiredSchema: StructType,
+      filters: Seq[Filter],
+      options: Map[String, String],
+      hadoopConf: Configuration,
+      metrics: Map[String, GpuMetric],
+      alluxioPathReplacementMap: Option[Map[String, String]])
+  : PartitionedFile => Iterator[InternalRow] = {
+    super.buildReaderWithPartitionValuesAndMetrics(
+      sparkSession,
+      prepareSchema(dataSchema),
+      prepareSchema(partitionSchema),
+      prepareSchema(requiredSchema),
+      filters,
+      options,
+      hadoopConf,
+      metrics,
+      alluxioPathReplacementMap)
+  }
+
+  override def supportFieldName(name: String): Boolean = {
+    if (columnMappingMode != NoMapping) true else super.supportFieldName(name)
+  }
+}
diff --git a/...ake/common/src/main/databricks/scala/com/nvidia/spark/rapids/delta/RapidsDeltaUtils.scala b/...ake/common/src/main/databricks/scala/com/nvidia/spark/rapids/delta/RapidsDeltaUtils.scala
@@ -33,11 +33,11 @@ object RapidsDeltaUtils {
       options: Map[String, String],
       spark: SparkSession): Unit = {
     FileFormatChecks.tag(meta, schema, DeltaFormatType, WriteFileOp)
-    DeltaLogShim.fileFormat(deltaLog) match {
-      case _: DeltaParquetFileFormat =>
-        GpuParquetFileFormat.tagGpuSupport(meta, spark, options, schema)
-      case f =>
-        meta.willNotWorkOnGpu(s"file format $f is not supported")
+    val format = DeltaLogShim.fileFormat(deltaLog)
+    if (format.getClass == classOf[DeltaParquetFileFormat]) {
+      GpuParquetFileFormat.tagGpuSupport(meta, spark, options, schema)
+    } else {
+      meta.willNotWorkOnGpu(s"file format $format is not supported")
     }
     checkIncompatibleConfs(meta, schema, deltaLog, spark.sessionState.conf, options)
   }

diff --git a/...a-lake/common/src/main/delta-io/scala/com/nvidia/spark/rapids/delta/DeltaIOProvider.scala b/...a-lake/common/src/main/delta-io/scala/com/nvidia/spark/rapids/delta/DeltaIOProvider.scala
@@ -21,10 +21,10 @@ import scala.util.Try
 import com.nvidia.spark.rapids._
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.delta.DeltaLog
+import org.apache.spark.sql.delta.{DeltaLog, DeltaParquetFileFormat}
 import org.apache.spark.sql.delta.rapids.DeltaRuntimeShim
 import org.apache.spark.sql.delta.sources.DeltaDataSource
-import org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand
+import org.apache.spark.sql.execution.datasources.{FileFormat, SaveIntoDataSourceCommand}
 import org.apache.spark.sql.rapids.ExternalSource
 import org.apache.spark.sql.rapids.execution.UnshimmedTrampolineUtil
 import org.apache.spark.sql.sources.CreatableRelationProvider
@@ -44,6 +44,10 @@ abstract class DeltaIOProvider extends DeltaProviderImplBase {
         })
     ).map(r => (r.getClassFor.asSubclass(classOf[CreatableRelationProvider]), r)).toMap
   }
+
+  override def isSupportedFormat(format: Class[_ <: FileFormat]): Boolean = {
+    format == classOf[DeltaParquetFileFormat]
+  }
 }
 
 class DeltaCreatableRelationProviderMeta(

diff --git a/...mon/src/main/delta-io/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormat.scala b/...mon/src/main/delta-io/scala/com/nvidia/spark/rapids/delta/GpuDeltaParquetFileFormat.scala
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.delta
+
+import com.nvidia.spark.rapids.{GpuMetric, GpuParquetMultiFilePartitionReaderFactory, GpuReadParquetFileFormat}
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.read.PartitionReaderFactory
+import org.apache.spark.sql.delta.{DeltaColumnMapping, DeltaColumnMappingMode, NoMapping}
+import org.apache.spark.sql.execution.datasources.PartitionedFile
+import org.apache.spark.sql.rapids.GpuFileSourceScanExec
+import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.SerializableConfiguration
+
+trait GpuDeltaParquetFileFormat extends GpuReadParquetFileFormat {
+  val columnMappingMode: DeltaColumnMappingMode
+  val referenceSchema: StructType
+
+  def prepareSchema(inputSchema: StructType): StructType = {
+    DeltaColumnMapping.createPhysicalSchema(inputSchema, referenceSchema, columnMappingMode)
+  }
+
+  override def createMultiFileReaderFactory(
+      broadcastedConf: Broadcast[SerializableConfiguration],
+      pushedFilters: Array[Filter],
+      fileScan: GpuFileSourceScanExec): PartitionReaderFactory = {
+    GpuParquetMultiFilePartitionReaderFactory(
+      fileScan.conf,
+      broadcastedConf,
+      prepareSchema(fileScan.relation.dataSchema),
+      prepareSchema(fileScan.requiredSchema),
+      prepareSchema(fileScan.readPartitionSchema),
+      pushedFilters,
+      fileScan.rapidsConf,
+      fileScan.allMetrics,
+      fileScan.queryUsesInputFile,
+      fileScan.alluxioPathsMap)
+  }
+
+  override def buildReaderWithPartitionValuesAndMetrics(
+      sparkSession: SparkSession,
+      dataSchema: StructType,
+      partitionSchema: StructType,
+      requiredSchema: StructType,
+      filters: Seq[Filter],
+      options: Map[String, String],
+      hadoopConf: Configuration,
+      metrics: Map[String, GpuMetric],
+      alluxioPathReplacementMap: Option[Map[String, String]])
+  : PartitionedFile => Iterator[InternalRow] = {
+    super.buildReaderWithPartitionValuesAndMetrics(
+      sparkSession,
+      prepareSchema(dataSchema),
+      prepareSchema(partitionSchema),
+      prepareSchema(requiredSchema),
+      filters,
+      options,
+      hadoopConf,
+      metrics,
+      alluxioPathReplacementMap)
+  }
+
+  override def supportFieldName(name: String): Boolean = {
+    if (columnMappingMode != NoMapping) true else super.supportFieldName(name)
+  }
+}
diff --git a/...-lake/common/src/main/delta-io/scala/com/nvidia/spark/rapids/delta/RapidsDeltaUtils.scala b/...-lake/common/src/main/delta-io/scala/com/nvidia/spark/rapids/delta/RapidsDeltaUtils.scala
@@ -32,11 +32,11 @@ object RapidsDeltaUtils {
       options: Map[String, String],
       spark: SparkSession): Unit = {
     FileFormatChecks.tag(meta, schema, DeltaFormatType, WriteFileOp)
-    DeltaRuntimeShim.fileFormatFromLog(deltaLog) match {
-      case _: DeltaParquetFileFormat =>
-        GpuParquetFileFormat.tagGpuSupport(meta, spark, options, schema)
-      case f =>
-        meta.willNotWorkOnGpu(s"file format $f is not supported")
+    val format = DeltaRuntimeShim.fileFormatFromLog(deltaLog)
+    if (format.getClass == classOf[DeltaParquetFileFormat]) {
+      GpuParquetFileFormat.tagGpuSupport(meta, spark, options, schema)
+    } else {
+      meta.willNotWorkOnGpu(s"file format $format is not supported")
     }
     checkIncompatibleConfs(meta, deltaLog, spark.sessionState.conf, options)
   }

diff --git a/...ke/delta-20x/src/main/scala/com/nvidia/spark/rapids/delta/delta20x/Delta20xProvider.scala b/...ke/delta-20x/src/main/scala/com/nvidia/spark/rapids/delta/delta20x/Delta20xProvider.scala
@@ -16,11 +16,14 @@
 
 package com.nvidia.spark.rapids.delta.delta20x
 
-import com.nvidia.spark.rapids.{GpuOverrides, RunnableCommandRule}
+import com.nvidia.spark.rapids.{GpuOverrides, GpuReadParquetFileFormat, RunnableCommandRule, SparkPlanMeta}
 import com.nvidia.spark.rapids.delta.DeltaIOProvider
 
+import org.apache.spark.sql.delta.DeltaParquetFileFormat
 import org.apache.spark.sql.delta.commands.{DeleteCommand, MergeIntoCommand, UpdateCommand}
+import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.command.RunnableCommand
+import org.apache.spark.sql.execution.datasources.FileFormat
 
 object Delta20xProvider extends DeltaIOProvider {
 
@@ -41,4 +44,18 @@ object Delta20xProvider extends DeltaIOProvider {
           .disabledByDefault("Delta Lake update support is experimental")
     ).map(r => (r.getClassFor.asSubclass(classOf[RunnableCommand]), r)).toMap
   }
+
+  override def tagSupportForGpuFileSourceScan(meta: SparkPlanMeta[FileSourceScanExec]): Unit = {
+    val format = meta.wrapped.relation.fileFormat
+    if (format.getClass == classOf[DeltaParquetFileFormat]) {
+      GpuReadParquetFileFormat.tagSupport(meta)
+    } else {
+      meta.willNotWorkOnGpu(s"format ${format.getClass} is not supported")
+    }
+  }
+
+  override def getReadFileFormat(format: FileFormat): FileFormat = {
+    val cpuFormat = format.asInstanceOf[DeltaParquetFileFormat]
+    GpuDelta20xParquetFileFormat(cpuFormat.columnMappingMode, cpuFormat.referenceSchema)
+  }
 }
diff --git a/.../src/main/scala/com/nvidia/spark/rapids/delta/delta20x/GpuDelta20xParquetFileFormat.scala b/.../src/main/scala/com/nvidia/spark/rapids/delta/delta20x/GpuDelta20xParquetFileFormat.scala
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.nvidia.spark.rapids.delta.delta20x
+
+import com.nvidia.spark.rapids.delta.GpuDeltaParquetFileFormat
+
+import org.apache.spark.sql.delta.DeltaColumnMappingMode
+import org.apache.spark.sql.types.StructType
+
+case class GpuDelta20xParquetFileFormat(
+    override val columnMappingMode: DeltaColumnMappingMode,
+    override val referenceSchema: StructType) extends GpuDeltaParquetFileFormat {
+}
diff --git a/.../src/main/scala/org/apache/spark/sql/delta/rapids/delta20x/GpuOptimisticTransaction.scala b/.../src/main/scala/org/apache/spark/sql/delta/rapids/delta20x/GpuOptimisticTransaction.scala
@@ -200,9 +200,11 @@ class GpuOptimisticTransaction
           }.toMap
       }
 
-      val gpuFileFormat = deltaLog.fileFormat(metadata) match {
-        case _: DeltaParquetFileFormat => new GpuParquetFileFormat
-        case f => throw new IllegalStateException(s"file format $f is not supported")
+      val deltaFileFormat = deltaLog.fileFormat(metadata)
+      val gpuFileFormat = if (deltaFileFormat.getClass == classOf[DeltaParquetFileFormat]) {
+        new GpuParquetFileFormat
+      } else {
+        throw new IllegalStateException(s"file format $deltaFileFormat is not supported")
       }
 
       try {

diff --git a/...ke/delta-21x/src/main/scala/com/nvidia/spark/rapids/delta/delta21x/Delta21xProvider.scala b/...ke/delta-21x/src/main/scala/com/nvidia/spark/rapids/delta/delta21x/Delta21xProvider.scala
@@ -16,11 +16,14 @@
 
 package com.nvidia.spark.rapids.delta.delta21x
 
-import com.nvidia.spark.rapids.{GpuOverrides, RunnableCommandRule}
+import com.nvidia.spark.rapids.{GpuOverrides, GpuReadParquetFileFormat, RunnableCommandRule, SparkPlanMeta}
 import com.nvidia.spark.rapids.delta.DeltaIOProvider
 
+import org.apache.spark.sql.delta.DeltaParquetFileFormat
 import org.apache.spark.sql.delta.commands.{DeleteCommand, MergeIntoCommand, UpdateCommand}
+import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.command.RunnableCommand
+import org.apache.spark.sql.execution.datasources.FileFormat
 
 object Delta21xProvider extends DeltaIOProvider {
 
@@ -41,4 +44,18 @@ object Delta21xProvider extends DeltaIOProvider {
           .disabledByDefault("Delta Lake update support is experimental")
     ).map(r => (r.getClassFor.asSubclass(classOf[RunnableCommand]), r)).toMap
   }
+
+  override def tagSupportForGpuFileSourceScan(meta: SparkPlanMeta[FileSourceScanExec]): Unit = {
+    val format = meta.wrapped.relation.fileFormat
+    if (format.getClass == classOf[DeltaParquetFileFormat]) {
+      GpuReadParquetFileFormat.tagSupport(meta)
+    } else {
+      meta.willNotWorkOnGpu(s"format ${format.getClass} is not supported")
+    }
+  }
+
+  override def getReadFileFormat(format: FileFormat): FileFormat = {
+    val cpuFormat = format.asInstanceOf[DeltaParquetFileFormat]
+    GpuDelta21xParquetFileFormat(cpuFormat.columnMappingMode, cpuFormat.referenceSchema)
+  }
 }