Skip to content

Commit

Permalink
fix: disallow new generated columns
Browse files Browse the repository at this point in the history
Signed-off-by: Ion Koutsouris <[email protected]>
  • Loading branch information
ion-elgreco authored and rtyler committed Jan 15, 2025
1 parent 11b0893 commit b3010fc
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 4 deletions.
13 changes: 12 additions & 1 deletion crates/core/src/operations/add_column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use itertools::Itertools;

use super::transaction::{CommitBuilder, CommitProperties, PROTOCOL};
use super::{CustomExecuteHandler, Operation};
use crate::kernel::StructField;
use crate::kernel::{StructField, StructTypeExt};
use crate::logstore::LogStoreRef;
use crate::operations::cast::merge_schema::merge_delta_struct;
use crate::protocol::DeltaOperation;
Expand Down Expand Up @@ -85,6 +85,17 @@ impl std::future::IntoFuture for AddColumnBuilder {
this.pre_execute(operation_id).await?;

let fields_right = &StructType::new(fields.clone());

if !fields_right
.get_generated_columns()
.unwrap_or_default()
.is_empty()
{
return Err(DeltaTableError::Generic(
"New columns cannot be a generated column".to_string(),
));
}

let table_schema = this.snapshot.schema();
let new_table_schema = merge_delta_struct(table_schema, fields_right)?;

Expand Down
26 changes: 24 additions & 2 deletions crates/core/src/operations/cast/merge_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use arrow_schema::{
ArrowError, DataType, Field as ArrowField, Fields, Schema as ArrowSchema,
SchemaRef as ArrowSchemaRef,
};
use delta_kernel::schema::ColumnMetadataKey;

use crate::kernel::{ArrayType, DataType as DeltaDataType, MapType, StructField, StructType};

Expand All @@ -23,7 +24,16 @@ fn try_merge_metadata<T: std::cmp::PartialEq + Clone>(
)));
}
} else {
left.insert(k.clone(), v.clone());
// I'm not sure if updating the schema metadata is even valid?
if k != ColumnMetadataKey::GenerationExpression.as_ref() {
// At least new generated expression may not be insert into existing column metadata!
left.insert(k.clone(), v.clone());
} else {
return Err(ArrowError::SchemaError(format!(
"Cannot add generated expressions to exists columns {}",
k
)));
}
}
}
Ok(())
Expand Down Expand Up @@ -322,6 +332,10 @@ fn merge_arrow_vec_fields(
Err(e)
}
Ok(mut f) => {
// UNDO the implicit schema merging of batch fields into table fields that is done by
// field.try_merge
f.set_metadata(right_field.metadata().clone());

let mut field_matadata = f.metadata().clone();
try_merge_metadata(&mut field_matadata, right_field.metadata())?;
f.set_metadata(field_matadata);
Expand All @@ -338,7 +352,15 @@ fn merge_arrow_vec_fields(
if preserve_new_fields {
for field in batch_fields.into_iter() {
if table_fields.find(field.name()).is_none() {
fields.push(field.as_ref().clone());
if !field
.metadata()
.contains_key(ColumnMetadataKey::GenerationExpression.as_ref())
{
fields.push(field.as_ref().clone());
} else {
errors.push("Schema evolved fields cannot have generated expressions. Recreate the table to achieve this.".to_string());
return Err(ArrowError::SchemaError(errors.join("\n")));
}
}
}
}
Expand Down
1 change: 0 additions & 1 deletion crates/core/src/operations/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ use crate::protocol::{DeltaOperation, SaveMode};
use crate::table::builder::ensure_table_uri;
use crate::table::config::TableProperty;
use crate::{DeltaTable, DeltaTableBuilder};
use delta_kernel::table_features::{ReaderFeatures, WriterFeatures};

#[derive(thiserror::Error, Debug)]
enum CreateError {
Expand Down

0 comments on commit b3010fc

Please sign in to comment.