From b3010fc07d7e9355436283c5d302ce97cbde77e1 Mon Sep 17 00:00:00 2001 From: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> Date: Mon, 13 Jan 2025 09:35:21 +0100 Subject: [PATCH] fix: disallow new generated columns Signed-off-by: Ion Koutsouris <15728914+ion-elgreco@users.noreply.github.com> --- crates/core/src/operations/add_column.rs | 13 +++++++++- .../core/src/operations/cast/merge_schema.rs | 26 +++++++++++++++++-- crates/core/src/operations/create.rs | 1 - 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/crates/core/src/operations/add_column.rs b/crates/core/src/operations/add_column.rs index a3477405af..9da5b86111 100644 --- a/crates/core/src/operations/add_column.rs +++ b/crates/core/src/operations/add_column.rs @@ -8,7 +8,7 @@ use itertools::Itertools; use super::transaction::{CommitBuilder, CommitProperties, PROTOCOL}; use super::{CustomExecuteHandler, Operation}; -use crate::kernel::StructField; +use crate::kernel::{StructField, StructTypeExt}; use crate::logstore::LogStoreRef; use crate::operations::cast::merge_schema::merge_delta_struct; use crate::protocol::DeltaOperation; @@ -85,6 +85,17 @@ impl std::future::IntoFuture for AddColumnBuilder { this.pre_execute(operation_id).await?; let fields_right = &StructType::new(fields.clone()); + + if !fields_right + .get_generated_columns() + .unwrap_or_default() + .is_empty() + { + return Err(DeltaTableError::Generic( + "New columns cannot be a generated column".to_string(), + )); + } + let table_schema = this.snapshot.schema(); let new_table_schema = merge_delta_struct(table_schema, fields_right)?; diff --git a/crates/core/src/operations/cast/merge_schema.rs b/crates/core/src/operations/cast/merge_schema.rs index 64fe2b7ed6..b57c29b2e8 100644 --- a/crates/core/src/operations/cast/merge_schema.rs +++ b/crates/core/src/operations/cast/merge_schema.rs @@ -7,6 +7,7 @@ use arrow_schema::{ ArrowError, DataType, Field as ArrowField, Fields, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef, }; +use delta_kernel::schema::ColumnMetadataKey; use crate::kernel::{ArrayType, DataType as DeltaDataType, MapType, StructField, StructType}; @@ -23,7 +24,16 @@ fn try_merge_metadata( ))); } } else { - left.insert(k.clone(), v.clone()); + // I'm not sure if updating the schema metadata is even valid? + if k != ColumnMetadataKey::GenerationExpression.as_ref() { + // At least new generated expression may not be insert into existing column metadata! + left.insert(k.clone(), v.clone()); + } else { + return Err(ArrowError::SchemaError(format!( + "Cannot add generated expressions to exists columns {}", + k + ))); + } } } Ok(()) @@ -322,6 +332,10 @@ fn merge_arrow_vec_fields( Err(e) } Ok(mut f) => { + // UNDO the implicit schema merging of batch fields into table fields that is done by + // field.try_merge + f.set_metadata(right_field.metadata().clone()); + let mut field_matadata = f.metadata().clone(); try_merge_metadata(&mut field_matadata, right_field.metadata())?; f.set_metadata(field_matadata); @@ -338,7 +352,15 @@ fn merge_arrow_vec_fields( if preserve_new_fields { for field in batch_fields.into_iter() { if table_fields.find(field.name()).is_none() { - fields.push(field.as_ref().clone()); + if !field + .metadata() + .contains_key(ColumnMetadataKey::GenerationExpression.as_ref()) + { + fields.push(field.as_ref().clone()); + } else { + errors.push("Schema evolved fields cannot have generated expressions. Recreate the table to achieve this.".to_string()); + return Err(ArrowError::SchemaError(errors.join("\n"))); + } } } } diff --git a/crates/core/src/operations/create.rs b/crates/core/src/operations/create.rs index bcf79650cf..a4006c692c 100644 --- a/crates/core/src/operations/create.rs +++ b/crates/core/src/operations/create.rs @@ -20,7 +20,6 @@ use crate::protocol::{DeltaOperation, SaveMode}; use crate::table::builder::ensure_table_uri; use crate::table::config::TableProperty; use crate::{DeltaTable, DeltaTableBuilder}; -use delta_kernel::table_features::{ReaderFeatures, WriterFeatures}; #[derive(thiserror::Error, Debug)] enum CreateError {