delta-io · johanl-db · Feb 16, 2024 · Feb 19, 2024 · Feb 19, 2024 · Feb 22, 2024
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaConfig.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaConfig.scala
@@ -725,6 +725,17 @@ trait DeltaConfigsBase extends DeltaLogging {
     "needs to be a boolean."
   )
 
+  /**
+   * Whether widening the type of an existing column or field is allowed, either manually using
+   * ALTER TABLE CHANGE COLUMN or automatically if automatic schema evolution is enabled.
+   */
+  val ENABLE_TYPE_WIDENING = buildConfig[Boolean](
+    key = "enableTypeWidening",
+    defaultValue = false.toString,
+    fromString = _.toBoolean,
+    validationFunction = _ => true,
+    helpMessage = "needs to be a boolean.")
+
   val MANAGED_COMMIT_OWNER_NAME = buildConfig[Option[String]](
     "managedCommits.commitOwner-dev",
     null,

diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/TableFeature.scala b/spark/src/main/scala/org/apache/spark/sql/delta/TableFeature.scala
@@ -355,7 +355,8 @@ object TableFeature {
         // managed-commits are under development and only available in testing.
         ManagedCommitTableFeature,
         // Row IDs are still under development and only available in testing.
-        RowTrackingFeature)
+        RowTrackingFeature,
+        TypeWideningTableFeature)
     }
     val featureMap = features.map(f => f.name.toLowerCase(Locale.ROOT) -> f).toMap
     require(features.size == featureMap.size, "Lowercase feature names must not duplicate.")
@@ -625,6 +626,18 @@ object ManagedCommitTableFeature
   }
 }
 
+object TypeWideningTableFeature extends ReaderWriterFeature(name = "typeWidening-dev")
+    with FeatureAutomaticallyEnabledByMetadata {
+  override def automaticallyUpdateProtocolOfExistingTables: Boolean = true
+
+  private def isTypeWideningSupportNeededByMetadata(metadata: Metadata): Boolean =
+    DeltaConfigs.ENABLE_TYPE_WIDENING.fromMetaData(metadata)
+
+  override def metadataRequiresFeatureToBeEnabled(
+      metadata: Metadata,
+      spark: SparkSession): Boolean = isTypeWideningSupportNeededByMetadata(metadata)
+}
+
 /**
  * Features below are for testing only, and are being registered to the system only in the testing
  * environment. See [[TableFeature.allSupportedFeaturesMap]] for the registration.

diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala
@@ -0,0 +1,63 @@
+/*
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta
+
+import org.apache.spark.sql.delta.actions.{Metadata, Protocol, TableFeatureProtocolUtils}
+
+import org.apache.spark.sql.catalyst.expressions.Cast
+import org.apache.spark.sql.types._
+
+object TypeWidening {
+
+  /**
+   * Returns whether the protocol version supports the Type Widening table feature.
+   */
+  def isSupported(protocol: Protocol): Boolean =
+    protocol.isFeatureSupported(TypeWideningTableFeature)
+
+  /**
+   * Returns whether Type Widening is enabled on this table version. Checks that Type Widening is
+   * supported, which is a pre-requisite for enabling Type Widening, throws an error if
+   * not. When Type Widening is enabled, the type of existing columns or fields can be widened
+   * using ALTER TABLE CHANGE COLUMN.
+   */
+  def isEnabled(protocol: Protocol, metadata: Metadata): Boolean = {
+    val isEnabled = DeltaConfigs.ENABLE_TYPE_WIDENING.fromMetaData(metadata)
+    if (isEnabled && !isSupported(protocol)) {
+      throw new IllegalStateException(
+        s"Table property '${DeltaConfigs.ENABLE_TYPE_WIDENING.key}' is " +
+          s"set on the table but this table version doesn't support table feature " +
+          s"'${TableFeatureProtocolUtils.propertyKey(TypeWideningTableFeature)}'.")
+    }
+    isEnabled
+  }
+
+  /**
+   * Returns whether the given type change is eligible for widening. This only checks atomic types.
+   * It is the responsibility of the caller to recurse into structs, maps and arrays.
+   */
+  def isTypeChangeSupported(fromType: AtomicType, toType: AtomicType): Boolean =
+    (fromType, toType) match {
+      case (from, to) if from == to => true
+      // All supported type changes below are supposed to be widening, but to be safe, reject any
+      // non-widening change upfront.
+      case (from, to) if !Cast.canUpCast(from, to) => false
+      case (ByteType, ShortType) => true
+      case (ByteType | ShortType, IntegerType) => true
+      case _ => false
+    }
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/commands/alterDeltaTableCommands.scala b/spark/src/main/scala/org/apache/spark/sql/delta/commands/alterDeltaTableCommands.scala
@@ -706,7 +706,8 @@ case class AlterTableChangeColumnDeltaCommand(
         newType,
         resolver,
         txn.metadata.columnMappingMode,
-        columnPath :+ originalField.name
+        columnPath :+ originalField.name,
+        allowTypeWidening = TypeWidening.isEnabled(txn.protocol, txn.metadata)
       ).nonEmpty) {
       throw DeltaErrors.alterTableChangeColumnException(
         fieldPath = UnresolvedAttribute(columnPath :+ originalField.name).name,
@@ -785,6 +786,7 @@ case class AlterTableReplaceColumnsDeltaCommand(
         changingSchema,
         resolver,
         txn.metadata.columnMappingMode,
+        allowTypeWidening = TypeWidening.isEnabled(txn.protocol, txn.metadata),
         failOnAmbiguousChanges = true
       ).foreach { operation =>
         throw DeltaErrors.alterTableReplaceColumnsException(

diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala b/spark/src/main/scala/org/apache/spark/sql/delta/schema/SchemaUtils.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
-import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaColumnMappingMode, DeltaErrors, DeltaLog, GeneratedColumn, NoMapping}
+import org.apache.spark.sql.delta.{DeltaAnalysisException, DeltaColumnMappingMode, DeltaErrors, DeltaLog, GeneratedColumn, NoMapping, TypeWidening}
 import org.apache.spark.sql.delta.actions.Protocol
 import org.apache.spark.sql.delta.commands.cdc.CDCReader
 import org.apache.spark.sql.delta.metering.DeltaLogging
@@ -911,6 +911,8 @@ def normalizeColumnNamesInDataType(
    * @param failOnAmbiguousChanges Throw an error if a StructField both has columns dropped and new
    *                               columns added. These are ambiguous changes, because we don't
    *                               know if a column needs to be renamed, dropped, or added.
+   * @param allowTypeWidening      Whether widening type changes as defined in [[TypeWidening]]
+   *                               can be applied.
    * @return None if the data types can be changed, otherwise Some(err) containing the reason.
    */
   def canChangeDataType(
@@ -919,7 +921,8 @@ def normalizeColumnNamesInDataType(
       resolver: Resolver,
       columnMappingMode: DeltaColumnMappingMode,
       columnPath: Seq[String] = Nil,
-      failOnAmbiguousChanges: Boolean = false): Option[String] = {
+      failOnAmbiguousChanges: Boolean = false,
+      allowTypeWidening: Boolean = false): Option[String] = {
     def verify(cond: Boolean, err: => String): Unit = {
       if (!cond) {
         throw DeltaErrors.cannotChangeDataType(err)
@@ -970,6 +973,11 @@ def normalizeColumnNamesInDataType(
                 (if (columnPath.nonEmpty) s" from $columnName" else ""))
           }
 
+        case (fromDataType: AtomicType, toDataType: AtomicType) if allowTypeWidening =>
+          verify(TypeWidening.isTypeChangeSupported(fromDataType, toDataType),
+            s"changing data type of ${UnresolvedAttribute(columnPath).name} " +
+              s"from $fromDataType to $toDataType")
+
         case (fromDataType, toDataType) =>
           verify(fromDataType == toDataType,
             s"changing data type of ${UnresolvedAttribute(columnPath).name} " +