delta-io · johanl-db · Dec 10, 2024 · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024
diff --git a/iceberg/src/test/scala/org/apache/spark/sql/delta/uniform/TypeWideningUniformSuite.scala b/iceberg/src/test/scala/org/apache/spark/sql/delta/uniform/TypeWideningUniformSuite.scala
@@ -0,0 +1,21 @@
+/*
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta.uniform
+
+import org.apache.spark.sql.delta.typewidening.TypeWideningUniformTests
+
+class TypeWideningUniformSuite extends TypeWideningUniformTests with WriteDeltaHMSReadIceberg
diff --git a/spark/src/main/resources/error/delta-error-classes.json b/spark/src/main/resources/error/delta-error-classes.json
@@ -1221,6 +1221,12 @@
           "<schema>"
         ]
       },
+      "UNSUPPORTED_TYPE_WIDENING" : {
+        "message" : [
+          "IcebergCompatV<version> is incompatible with a type change applied to this table:",
+          "Field <fieldPath> was changed from <prevType> to <newType>."
+        ]
+      },
       "VERSION_MUTUAL_EXCLUSIVE" : {
         "message" : [
           "Only one IcebergCompat version can be enabled, please explicitly disable all other IcebergCompat versions that are not needed."

diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaAnalysis.scala
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/DeltaErrors.scala b/spark/src/main/scala/org/apache/spark/sql/delta/DeltaErrors.scala
@@ -3402,6 +3402,23 @@ trait DeltaErrorsBase
     )
   }
 
+  def icebergCompatUnsupportedTypeWideningException(
+      version: Int,
+      fieldPath: Seq[String],
+      oldType: DataType,
+      newType: DataType): Throwable = {
+    new DeltaUnsupportedOperationException(
+      errorClass = "DELTA_ICEBERG_COMPAT_VIOLATION.UNSUPPORTED_TYPE_WIDENING",
+      messageParameters = Array(
+        version.toString,
+        version.toString,
+        SchemaUtils.prettyFieldName(fieldPath),
+        toSQLType(oldType),
+        toSQLType(newType)
+      )
+    )
+  }
+
   def universalFormatConversionFailedException(
       failedOnCommitVersion: Long,
       format: String,

diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/IcebergCompat.scala b/spark/src/main/scala/org/apache/spark/sql/delta/IcebergCompat.scala
@@ -21,8 +21,10 @@ import org.apache.spark.sql.delta.commands.DeletionVectorUtils
 import org.apache.spark.sql.delta.logging.DeltaLogKeys
 import org.apache.spark.sql.delta.metering.DeltaLogging
 import org.apache.spark.sql.delta.schema.SchemaUtils
+import org.apache.spark.sql.delta.sources.DeltaSQLConf
 
 import org.apache.spark.internal.MDC
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.types._
 
 /**
@@ -47,7 +49,8 @@ object IcebergCompatV1 extends IcebergCompat(
     CheckAddFileHasStats,
     CheckNoPartitionEvolution,
     CheckNoListMapNullType,
-    CheckDeletionVectorDisabled
+    CheckDeletionVectorDisabled,
+    CheckTypeWideningSupported
   )
 )
 
@@ -62,7 +65,8 @@ object IcebergCompatV2 extends IcebergCompat(
     CheckTypeInV2AllowList,
     CheckPartitionDataTypeInV2AllowList,
     CheckNoPartitionEvolution,
-    CheckDeletionVectorDisabled
+    CheckDeletionVectorDisabled,
+    CheckTypeWideningSupported
   )
 )
 
@@ -104,6 +108,7 @@ case class IcebergCompat(
    *         updates need to be applied, will return None.
    */
   def enforceInvariantsAndDependencies(
+      spark: SparkSession,
       prevSnapshot: Snapshot,
       newestProtocol: Protocol,
       newestMetadata: Metadata,
@@ -190,6 +195,7 @@ case class IcebergCompat(
 
         // Apply additional checks
         val context = IcebergCompatContext(
+          spark,
           prevSnapshot,
           protocolResult.getOrElse(newestProtocol),
           metadataResult.getOrElse(newestMetadata),
@@ -301,6 +307,7 @@ object RequireColumnMapping extends RequiredDeltaTableProperty(
 }
 
 case class IcebergCompatContext(
+    spark: SparkSession,
     prevSnapshot: Snapshot,
     newestProtocol: Protocol,
     newestMetadata: Metadata,
@@ -457,3 +464,31 @@ object CheckDeletionVectorDisabled extends IcebergCompatCheck {
     }
   }
 }
+
+/**
+ * Checks that the table didn't go through any type changes that Iceberg doesn't support. See
+ * `TypeWidening.isTypeChangeSupportedByIceberg()` for supported type changes.
+ * Note that this check covers both:
+ * - When the table had an unsupported type change applied in the past and Uniform is being enabled.
+ * - When Uniform is enabled and a new, unsupported type change is being applied.
+ */
+object CheckTypeWideningSupported extends IcebergCompatCheck {
+  override def apply(context: IcebergCompatContext): Unit = {
+    val skipCheck = context.spark.sessionState.conf
+      .getConf(DeltaSQLConf.DELTA_TYPE_WIDENING_ALLOW_UNSUPPORTED_ICEBERG_TYPE_CHANGES)
+
+    if (skipCheck || !TypeWidening.isSupported(context.newestProtocol)) return
+
+    TypeWideningMetadata.getAllTypeChanges(context.newestMetadata.schema).foreach {
+      case (fieldPath, TypeChange(_, fromType: AtomicType, toType: AtomicType, _))
+        // We ignore type changes that are not generally supported with type widening to reduce the
+        // risk of this check misfiring. These are handled by `TypeWidening.assertTableReadable()`.
+        // The error here only captures type changes that are supported in Delta but not Iceberg.
+        if TypeWidening.isTypeChangeSupported(fromType, toType) &&
+          !TypeWidening.isTypeChangeSupportedByIceberg(fromType, toType) =>
+        throw DeltaErrors.icebergCompatUnsupportedTypeWideningException(
+          context.version, fieldPath, fromType, toType)
+      case _ => () // ignore
+    }
+  }
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/OptimisticTransaction.scala b/spark/src/main/scala/org/apache/spark/sql/delta/OptimisticTransaction.scala
@@ -1970,6 +1970,7 @@ trait OptimisticTransactionImpl extends TransactionalWrite
 
     val (protocolUpdate1, metadataUpdate1) =
       UniversalFormat.enforceInvariantsAndDependencies(
+        spark,
         // Note: if this txn has no protocol or metadata updates, then `prev` will equal `newest`.
         snapshot,
         newestProtocol = protocol, // Note: this will try to use `newProtocol`

diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/ResolveDeltaMergeInto.scala b/spark/src/main/scala/org/apache/spark/sql/delta/ResolveDeltaMergeInto.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{StructField, StructType}
+import org.apache.spark.sql.types.{AtomicType, StructField, StructType}
 
 /**
  * Implements logic to resolve conditions and actions in MERGE clauses and handles schema evolution.
@@ -292,11 +292,13 @@ object ResolveDeltaMergeInto {
         })
 
       val migrationSchema = filterSchema(source.schema, Seq.empty)
-      val allowTypeWidening = target.exists {
-        case DeltaTable(fileIndex) =>
-          TypeWidening.isEnabled(fileIndex.protocol, fileIndex.metadata)
-        case _ => false
-      }
+
+      val typeWideningMode =
+        target.collectFirst {
+          case DeltaTable(index) if TypeWidening.isEnabled(index.protocol, index.metadata) =>
+            TypeWideningMode.TypeEvolution(
+              uniformIcebergCompatibleOnly = UniversalFormat.icebergEnabled(index.metadata))
+        }.getOrElse(TypeWideningMode.NoTypeWidening)
 
       // The implicit conversions flag allows any type to be merged from source to target if Spark
       // SQL considers the source type implicitly castable to the target. Normally, mergeSchemas
@@ -306,7 +308,7 @@ object ResolveDeltaMergeInto {
         target.schema,
         migrationSchema,
         allowImplicitConversions = true,
-        allowTypeWidening = allowTypeWidening
+        typeWideningMode = typeWideningMode
       )
     } else {
       target.schema

diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWidening.scala
@@ -68,14 +68,41 @@ object TypeWidening {
    * It is the responsibility of the caller to recurse into structs, maps and arrays.
    */
   def isTypeChangeSupported(fromType: AtomicType, toType: AtomicType): Boolean =
-    TypeWideningShims.isTypeChangeSupported(fromType, toType)
+    TypeWideningShims.isTypeChangeSupported(fromType = fromType, toType = toType)
 
   /**
    * Returns whether the given type change can be applied during schema evolution. Only a
    * subset of supported type changes are considered for schema evolution.
    */
-  def isTypeChangeSupportedForSchemaEvolution(fromType: AtomicType, toType: AtomicType): Boolean =
-    TypeWideningShims.isTypeChangeSupportedForSchemaEvolution(fromType, toType)
+  def isTypeChangeSupportedForSchemaEvolution(
+      fromType: AtomicType,
+      toType: AtomicType,
+      uniformIcebergCompatibleOnly: Boolean): Boolean =
+    TypeWideningShims.isTypeChangeSupportedForSchemaEvolution(
+      fromType = fromType,
+      toType = toType
+    ) && (
+      !uniformIcebergCompatibleOnly ||
+        isTypeChangeSupportedByIceberg(fromType = fromType, toType = toType)
+    )
+
+  /**
+   * Returns whether the given type change is supported by Iceberg, and by extension can be read
+   * using Uniform. See https://iceberg.apache.org/spec/#schema-evolution.
+   * Note that these are type promotions supported by Iceberg V1 & V2 (both support the same type
+   * promotions). Iceberg V3 will add support for date -> timestamp_ntz and void -> any but Uniform
+   * doesn't currently support Iceberg V3.
+   */
+  def isTypeChangeSupportedByIceberg(fromType: AtomicType, toType: AtomicType): Boolean =
+    (fromType, toType) match {
+      case (from, to) if from == to => true
+      case (from, to) if !isTypeChangeSupported(from, to) => false
+      case (from: IntegralType, to: IntegralType) => from.defaultSize <= to.defaultSize
+      case (FloatType, DoubleType) => true
+      case (from: DecimalType, to: DecimalType)
+        if from.scale == to.scale && from.precision <= to.precision => true
+      case _ => false
+    }
 
   /**
    * Asserts that the given table doesn't contain any unsupported type changes. This should never

diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/TypeWideningMode.scala b/spark/src/main/scala/org/apache/spark/sql/delta/TypeWideningMode.scala
@@ -0,0 +1,52 @@
+/*
+ * Copyright (2021) The Delta Lake Project Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.delta
+
+import org.apache.spark.sql.types.AtomicType
+
+/**
+ * A type widening mode captures a specific set of type changes that are allowed to be applied.
+ * Currently:
+ *  - NoTypeWidening: No type change is allowed.
+ *  - TypeEvolution(uniformIcebergCompatibleOnly = true): Type changes that are eligible to be
+ *    applied automatically during schema evolution and that are supported by Iceberg are allowed.
+ *  - TypeEvolution(uniformIcebergCompatibleOnly = false): Type changes that are eligible to be
+ *    applied automatically during schema evolution are allowed, even if they are not supported by
+ *    Iceberg.
+ */
+sealed trait TypeWideningMode {
+  def shouldWidenType(fromType: AtomicType, toType: AtomicType): Boolean
+}
+
+object TypeWideningMode {
+  /**
+   * No type change allowed. Typically because type widening and/or schema evolution isn't enabled.
+   */
+  case object NoTypeWidening extends TypeWideningMode {
+    override def shouldWidenType(fromType: AtomicType, toType: AtomicType): Boolean = false
+  }
+
+  /**
+   * Type changes that are eligible to be applied automatically during schema evolution are allowed.
+   * Can be restricted to only type changes supported by Iceberg.
+   */
+  case class TypeEvolution(uniformIcebergCompatibleOnly: Boolean) extends TypeWideningMode {
+    override def shouldWidenType(fromType: AtomicType, toType: AtomicType): Boolean =
+        TypeWidening.isTypeChangeSupportedForSchemaEvolution(
+          fromType = fromType, toType = toType, uniformIcebergCompatibleOnly)
+  }
+}
diff --git a/spark/src/main/scala/org/apache/spark/sql/delta/UniversalFormat.scala b/spark/src/main/scala/org/apache/spark/sql/delta/UniversalFormat.scala
@@ -109,14 +109,15 @@ object UniversalFormat extends DeltaLogging {
    *         updates need to be applied, will return None.
    */
   def enforceInvariantsAndDependencies(
+      spark: SparkSession,
       snapshot: Snapshot,
       newestProtocol: Protocol,
       newestMetadata: Metadata,
       operation: Option[DeltaOperations.Operation],
       actions: Seq[Action]): (Option[Protocol], Option[Metadata]) = {
     enforceHudiDependencies(newestMetadata, snapshot)
     enforceIcebergInvariantsAndDependencies(
-      snapshot, newestProtocol, newestMetadata, operation, actions)
+      spark, snapshot, newestProtocol, newestMetadata, operation, actions)
   }
 
   /**
@@ -151,6 +152,7 @@ object UniversalFormat extends DeltaLogging {
    *         updates need to be applied, will return None.
    */
   def enforceIcebergInvariantsAndDependencies(
+      spark: SparkSession,
       snapshot: Snapshot,
       newestProtocol: Protocol,
       newestMetadata: Metadata,
@@ -200,6 +202,7 @@ object UniversalFormat extends DeltaLogging {
     changed = uniformProtocol.nonEmpty || uniformMetadata.nonEmpty
 
     val (v1protocolUpdate, v1metadataUpdate) = IcebergCompatV1.enforceInvariantsAndDependencies(
+      spark,
       snapshot,
       newestProtocol = protocolToCheck,
       newestMetadata = metadataToCheck,
@@ -211,6 +214,7 @@ object UniversalFormat extends DeltaLogging {
     changed ||= v1protocolUpdate.nonEmpty || v1metadataUpdate.nonEmpty
 
     val (v2protocolUpdate, v2metadataUpdate) = IcebergCompatV2.enforceInvariantsAndDependencies(
+      spark,
       snapshot,
       newestProtocol = protocolToCheck,
       newestMetadata = metadataToCheck,
@@ -238,12 +242,14 @@ object UniversalFormat extends DeltaLogging {
    *         otherwise the original configuration.
    */
   def enforceDependenciesInConfiguration(
+      spark: SparkSession,
       configuration: Map[String, String],
       snapshot: Snapshot): Map[String, String] = {
     var metadata = snapshot.metadata.copy(configuration = configuration)
 
     // Check UniversalFormat related property dependencies
     val (_, universalMetadata) = UniversalFormat.enforceInvariantsAndDependencies(
+      spark,
       snapshot,
       newestProtocol = snapshot.protocol,
       newestMetadata = metadata,