Skip to content

Commit

Permalink
[Spark] Fix char/varchar to string column conversions
Browse files Browse the repository at this point in the history
Users are currently allowed to change the data type of their columns
from char/varchar to string. Even though the command succeeds, the
column metadata that indicates that the column is char/varchar is not
removed during the process. As a result, Delta enforces the old
char/varchar constraints on future inserts.

This PR fixes that by removing this metadata from any updated
char/varchar column when the column is updated.

## How was this patch tested?

Added a new test which was failing without the code fix.

## Does this PR introduce _any_ user-facing changes?
Yes, `ALTER COLUMN X TYPE STRING;` will now work correctly when X is
char/varchar.
  • Loading branch information
dhruvarya-db authored Mar 14, 2024
1 parent 4456a12 commit 9a59c0a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute}
import org.apache.spark.sql.catalyst.catalog.CatalogUtils
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, QualifiedColType}
import org.apache.spark.sql.catalyst.util.CharVarcharUtils
import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, SparkCharVarcharUtils}
import org.apache.spark.sql.connector.catalog.TableCatalog
import org.apache.spark.sql.connector.catalog.TableChange.{After, ColumnPosition, First}
import org.apache.spark.sql.connector.expressions.FieldReference
Expand Down Expand Up @@ -578,12 +578,25 @@ case class AlterTableChangeColumnDeltaCommand(
case Some(newDefaultValue) => result.withCurrentDefaultValue(newDefaultValue)
case None => result.clearCurrentDefaultValue()
}
val updatedColumnMetadata =
if (!SparkCharVarcharUtils.hasCharVarchar(newColumn.dataType)) {
// Remove the char/varchar property from the metadata that
// indicates that this column is a char/varchar column.
// We construct this throwaway object because
// CharVarcharUtils.cleanAttrMetadata takes an AttributeReference.
val throwAwayAttrRef = AttributeReference(
result.name, result.dataType, nullable = result.nullable, result.metadata)()
CharVarcharUtils.cleanAttrMetadata(throwAwayAttrRef).metadata
} else {
result.metadata
}
result
.copy(
name = newColumn.name,
dataType =
SchemaUtils.changeDataType(oldColumn.dataType, newColumn.dataType, resolver),
nullable = newColumn.nullable)
nullable = newColumn.nullable,
metadata = updatedColumnMetadata)
}

// Replace existing field with new field
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1628,6 +1628,17 @@ trait DeltaAlterTableTests extends DeltaAlterTableTestBase {
checkColType(spark.table("t").schema(1), VarcharType(5))
}
}

test("CHANGE COLUMN: allow change from char(x) to string type") {
withTable("t") {
sql("CREATE TABLE t(i VARCHAR(4)) USING delta")
sql("ALTER TABLE t CHANGE COLUMN i TYPE STRING")
val col = spark.table("t").schema.head
assert(col.dataType == StringType)
assert(CharVarcharUtils.getRawType(col.metadata).isEmpty)
sql("INSERT INTO t VALUES ('123456789')")
}
}
}

trait DeltaAlterTableByNameTests extends DeltaAlterTableTests {
Expand Down

0 comments on commit 9a59c0a

Please sign in to comment.