Skip to content

Commit 8cd9e09

Browse files
committed
address pr comments
1 parent d0f3ebc commit 8cd9e09

File tree

2 files changed

+23
-20
lines changed

2 files changed

+23
-20
lines changed

kernel/src/schema/schema_compare.rs kernel/src/schema/compare.rs

+22-19
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
//! StructField::new("value", DataType::STRING, true),
1616
//! StructField::new("year", DataType::INTEGER, true),
1717
//! ]);
18-
//! // Schemas are compatible since the `year` value is nullable
18+
//! // Schemas are compatible since the `read_schema` adds a nullable column `year`
1919
//! assert!(schema.can_read_as(&read_schema).is_ok());
2020
//! ````
2121
//!
@@ -26,6 +26,12 @@ use crate::utils::require;
2626

2727
use super::{DataType, StructField, StructType};
2828

29+
/// The nullability flag of a schema's field. This can be compared with a read schema field's
30+
/// nullability flag using [`Nullable::can_read_as`].
31+
#[allow(unused)]
32+
#[derive(Clone, Copy)]
33+
pub(crate) struct Nullable(bool);
34+
2935
/// Represents the ways a schema comparison can fail.
3036
#[derive(Debug, thiserror::Error)]
3137
pub(crate) enum Error {
@@ -49,20 +55,17 @@ pub(crate) type SchemaComparisonResult = Result<(), Error>;
4955

5056
/// Represents a schema compatibility check for the type. If `self` can be read as `read_type`,
5157
/// this function returns `Ok(())`. Otherwise, this function returns `Err`.
58+
///
59+
/// TODO (Oussama): Remove the `allow(unsued)` once this is used in CDF.
5260
#[allow(unused)]
5361
pub(crate) trait SchemaComparison {
5462
fn can_read_as(&self, read_type: &Self) -> SchemaComparisonResult;
5563
}
5664

57-
/// The nullability flag of a schema's field. This can be compared with a read schema field's
58-
/// nullability flag using [`NullabilityFlag::can_read_as`].
59-
#[allow(unused)]
60-
pub(crate) struct NullabilityFlag(bool);
61-
62-
impl SchemaComparison for NullabilityFlag {
65+
impl SchemaComparison for Nullable {
6366
/// Represents a nullability comparison between two schemas' fields. Returns true if the
6467
/// read nullability is the same or wider than the nullability of self.
65-
fn can_read_as(&self, read_nullable: &NullabilityFlag) -> SchemaComparisonResult {
68+
fn can_read_as(&self, read_nullable: &Nullable) -> SchemaComparisonResult {
6669
// The case to avoid is when the column is nullable, but the read schema specifies the
6770
// column as non-nullable. So we avoid the case where !read_nullable && nullable
6871
// Hence we check that !(!read_nullable && existing_nullable)
@@ -79,7 +82,7 @@ impl SchemaComparison for StructField {
7982
/// 2. The both this field and `read_field` must have the same name.
8083
/// 3. You can read this data type as the `read_field`'s data type.
8184
fn can_read_as(&self, read_field: &Self) -> SchemaComparisonResult {
82-
NullabilityFlag(self.nullable).can_read_as(&NullabilityFlag(read_field.nullable))?;
85+
Nullable(self.nullable).can_read_as(&Nullable(read_field.nullable))?;
8386
require!(self.name() == read_field.name(), Error::FieldNameMismatch);
8487
self.data_type().can_read_as(read_field.data_type())?;
8588
Ok(())
@@ -113,18 +116,18 @@ impl SchemaComparison for StructType {
113116
);
114117

115118
// Check that the field names are a subset of the read fields.
116-
if !lowercase_field_map
119+
if lowercase_field_map
117120
.keys()
118-
.all(|name| lowercase_read_field_names.contains(name))
121+
.any(|name| !lowercase_read_field_names.contains(name))
119122
{
120123
return Err(Error::MissingColumn);
121124
}
122125
for read_field in read_type.fields() {
123126
match lowercase_field_map.get(&read_field.name().to_lowercase()) {
124127
Some(existing_field) => existing_field.can_read_as(read_field)?,
125128
None => {
126-
// Note: Delta spark does not perform the following check. Hence it ignores fields
127-
// that exist in the read schema that aren't in this schema.
129+
// Note: Delta spark does not perform the following check. Hence it ignores
130+
// non-null fields that exist in the read schema that aren't in this schema.
128131
require!(read_field.is_nullable(), Error::NewNonNullableColumn);
129132
}
130133
}
@@ -139,14 +142,14 @@ impl SchemaComparison for DataType {
139142
/// compatible data types with type widening. See issue [`#623`]
140143
/// 2. For complex data types, the nested types must be compatible as defined by [`SchemaComparison`]
141144
/// 3. For array data types, the nullability may not be tightened in the `read_type`. See
142-
/// [`NullabilityFlag::can_read_as`]
145+
/// [`Nullable::can_read_as`]
143146
///
144147
/// [`#623`]: <https://github.com/delta-io/delta-kernel-rs/issues/623>
145148
fn can_read_as(&self, read_type: &Self) -> SchemaComparisonResult {
146149
match (self, read_type) {
147150
(Self::Array(self_array), Self::Array(read_array)) => {
148-
NullabilityFlag(self_array.contains_null())
149-
.can_read_as(&NullabilityFlag(read_array.contains_null()))?;
151+
Nullable(self_array.contains_null())
152+
.can_read_as(&Nullable(read_array.contains_null()))?;
150153
self_array
151154
.element_type()
152155
.can_read_as(read_array.element_type())?;
@@ -155,8 +158,8 @@ impl SchemaComparison for DataType {
155158
self_struct.can_read_as(read_struct)?
156159
}
157160
(Self::Map(self_map), Self::Map(read_map)) => {
158-
NullabilityFlag(self_map.value_contains_null())
159-
.can_read_as(&NullabilityFlag(read_map.value_contains_null()))?;
161+
Nullable(self_map.value_contains_null())
162+
.can_read_as(&Nullable(read_map.value_contains_null()))?;
160163
self_map.key_type().can_read_as(read_map.key_type())?;
161164
self_map.value_type().can_read_as(read_map.value_type())?;
162165
}
@@ -172,7 +175,7 @@ impl SchemaComparison for DataType {
172175

173176
#[cfg(test)]
174177
mod tests {
175-
use crate::schema::schema_compare::{Error, SchemaComparison};
178+
use crate::schema::compare::{Error, SchemaComparison};
176179
use crate::schema::{ArrayType, DataType, MapType, StructField, StructType};
177180

178181
#[test]

kernel/src/schema/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ pub(crate) use crate::expressions::{column_name, ColumnName};
1414
use crate::utils::require;
1515
use crate::{DeltaResult, Error};
1616

17-
pub(crate) mod schema_compare;
17+
pub(crate) mod compare;
1818

1919
pub type Schema = StructType;
2020
pub type SchemaRef = Arc<StructType>;

0 commit comments

Comments
 (0)