Skip to content

Commit 9506c1b

Browse files
Merge branch 'main' into schema-compat
2 parents 8cd9e09 + d999b5c commit 9506c1b

File tree

18 files changed

+679
-154
lines changed

18 files changed

+679
-154
lines changed

ffi/examples/read-table/schema.h

+23-16
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ typedef struct
3434
{
3535
char* name;
3636
char* type;
37+
bool is_nullable;
3738
uintptr_t children;
3839
} SchemaItem;
3940

@@ -51,11 +52,12 @@ typedef struct
5152

5253
// lists are preallocated to have exactly enough space, so we just fill in the next open slot and
5354
// increment our length
54-
SchemaItem* add_to_list(SchemaItemList* list, char* name, char* type)
55+
SchemaItem* add_to_list(SchemaItemList* list, char* name, char* type, bool is_nullable)
5556
{
5657
int idx = list->len;
5758
list->list[idx].name = name;
5859
list->list[idx].type = type;
60+
list->list[idx].is_nullable = is_nullable;
5961
list->len++;
6062
return &list->list[idx];
6163
}
@@ -106,49 +108,53 @@ void visit_struct(
106108
void* data,
107109
uintptr_t sibling_list_id,
108110
struct KernelStringSlice name,
111+
bool is_nullable,
109112
uintptr_t child_list_id)
110113
{
111114
SchemaBuilder* builder = data;
112115
char* name_ptr = allocate_string(name);
113116
PRINT_CHILD_VISIT("struct", name_ptr, sibling_list_id, "Children", child_list_id);
114-
SchemaItem* struct_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "struct");
117+
SchemaItem* struct_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "struct", is_nullable);
115118
struct_item->children = child_list_id;
116119
}
120+
117121
void visit_array(
118122
void* data,
119123
uintptr_t sibling_list_id,
120124
struct KernelStringSlice name,
121-
bool contains_null,
125+
bool is_nullable,
122126
uintptr_t child_list_id)
123127
{
124128
SchemaBuilder* builder = data;
125-
char* name_ptr = malloc(sizeof(char) * (name.len + 24));
129+
char* name_ptr = malloc(sizeof(char) * (name.len + 22));
126130
snprintf(name_ptr, name.len + 1, "%s", name.ptr);
127-
snprintf(name_ptr + name.len, 24, " (contains null: %s)", contains_null ? "true" : "false");
131+
snprintf(name_ptr + name.len, 22, " (is nullable: %s)", is_nullable ? "true" : "false");
128132
PRINT_CHILD_VISIT("array", name_ptr, sibling_list_id, "Types", child_list_id);
129-
SchemaItem* array_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "array");
133+
SchemaItem* array_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "array", is_nullable);
130134
array_item->children = child_list_id;
131135
}
136+
132137
void visit_map(
133138
void* data,
134139
uintptr_t sibling_list_id,
135140
struct KernelStringSlice name,
136-
bool value_contains_null,
141+
bool is_nullable,
137142
uintptr_t child_list_id)
138143
{
139144
SchemaBuilder* builder = data;
140-
char* name_ptr = malloc(sizeof(char) * (name.len + 24));
145+
char* name_ptr = malloc(sizeof(char) * (name.len + 22));
141146
snprintf(name_ptr, name.len + 1, "%s", name.ptr);
142-
snprintf(name_ptr + name.len, 24, " (contains null: %s)", value_contains_null ? "true" : "false");
147+
snprintf(name_ptr + name.len, 22, " (is nullable: %s)", is_nullable ? "true" : "false");
143148
PRINT_CHILD_VISIT("map", name_ptr, sibling_list_id, "Types", child_list_id);
144-
SchemaItem* map_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "map");
149+
SchemaItem* map_item = add_to_list(&builder->lists[sibling_list_id], name_ptr, "map", is_nullable);
145150
map_item->children = child_list_id;
146151
}
147152

148153
void visit_decimal(
149154
void* data,
150155
uintptr_t sibling_list_id,
151156
struct KernelStringSlice name,
157+
bool is_nullable,
152158
uint8_t precision,
153159
uint8_t scale)
154160
{
@@ -157,25 +163,26 @@ void visit_decimal(
157163
char* type = malloc(19 * sizeof(char));
158164
snprintf(type, 19, "decimal(%u)(%d)", precision, scale);
159165
PRINT_NO_CHILD_VISIT(type, name_ptr, sibling_list_id);
160-
add_to_list(&builder->lists[sibling_list_id], name_ptr, type);
166+
add_to_list(&builder->lists[sibling_list_id], name_ptr, type, is_nullable);
161167
}
162168

163169
void visit_simple_type(
164170
void* data,
165171
uintptr_t sibling_list_id,
166172
struct KernelStringSlice name,
173+
bool is_nullable,
167174
char* type)
168175
{
169176
SchemaBuilder* builder = data;
170177
char* name_ptr = allocate_string(name);
171178
PRINT_NO_CHILD_VISIT(type, name_ptr, sibling_list_id);
172-
add_to_list(&builder->lists[sibling_list_id], name_ptr, type);
179+
add_to_list(&builder->lists[sibling_list_id], name_ptr, type, is_nullable);
173180
}
174181

175-
#define DEFINE_VISIT_SIMPLE_TYPE(typename) \
176-
void visit_##typename(void* data, uintptr_t sibling_list_id, struct KernelStringSlice name) \
177-
{ \
178-
visit_simple_type(data, sibling_list_id, name, #typename); \
182+
#define DEFINE_VISIT_SIMPLE_TYPE(typename) \
183+
void visit_##typename(void* data, uintptr_t sibling_list_id, struct KernelStringSlice name, bool is_nullable)\
184+
{ \
185+
visit_simple_type(data, sibling_list_id, name, is_nullable, #typename); \
179186
}
180187

181188
DEFINE_VISIT_SIMPLE_TYPE(string)

ffi/src/scan.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ fn kernel_scan_data_next_impl(
230230
.data
231231
.lock()
232232
.map_err(|_| Error::generic("poisoned mutex"))?;
233-
if let Some((data, sel_vec)) = data.next().transpose()? {
233+
if let Some((data, sel_vec, _transforms)) = data.next().transpose()? {
234234
let bool_slice = KernelBoolSlice::from(sel_vec);
235235
(engine_visitor)(engine_context, data.into(), bool_slice);
236236
Ok(true)

ffi/src/schema.rs

+115-38
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use delta_kernel::schema::{ArrayType, DataType, MapType, PrimitiveType, StructTy
2828
/// that element's (already-visited) children.
2929
/// 4. The [`visit_schema`] method returns the id of the list of top-level columns
3030
// WARNING: the visitor MUST NOT retain internal references to the string slices passed to visitor methods
31-
// TODO: struct nullability and field metadata
31+
// TODO: struct field metadata
3232
#[repr(C)]
3333
pub struct EngineSchemaVisitor {
3434
/// opaque state pointer
@@ -43,6 +43,7 @@ pub struct EngineSchemaVisitor {
4343
data: *mut c_void,
4444
sibling_list_id: usize,
4545
name: KernelStringSlice,
46+
is_nullable: bool,
4647
child_list_id: usize,
4748
),
4849

@@ -52,7 +53,7 @@ pub struct EngineSchemaVisitor {
5253
data: *mut c_void,
5354
sibling_list_id: usize,
5455
name: KernelStringSlice,
55-
contains_null: bool, // if this array can contain null values
56+
is_nullable: bool,
5657
child_list_id: usize,
5758
),
5859

@@ -63,7 +64,7 @@ pub struct EngineSchemaVisitor {
6364
data: *mut c_void,
6465
sibling_list_id: usize,
6566
name: KernelStringSlice,
66-
value_contains_null: bool, // if this map can contain null values
67+
is_nullable: bool,
6768
child_list_id: usize,
6869
),
6970

@@ -72,57 +73,106 @@ pub struct EngineSchemaVisitor {
7273
data: *mut c_void,
7374
sibling_list_id: usize,
7475
name: KernelStringSlice,
76+
is_nullable: bool,
7577
precision: u8,
7678
scale: u8,
7779
),
7880

7981
/// Visit a `string` belonging to the list identified by `sibling_list_id`.
80-
pub visit_string:
81-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
82+
pub visit_string: extern "C" fn(
83+
data: *mut c_void,
84+
sibling_list_id: usize,
85+
name: KernelStringSlice,
86+
is_nullable: bool,
87+
),
8288

8389
/// Visit a `long` belonging to the list identified by `sibling_list_id`.
84-
pub visit_long:
85-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
90+
pub visit_long: extern "C" fn(
91+
data: *mut c_void,
92+
sibling_list_id: usize,
93+
name: KernelStringSlice,
94+
is_nullable: bool,
95+
),
8696

8797
/// Visit an `integer` belonging to the list identified by `sibling_list_id`.
88-
pub visit_integer:
89-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
98+
pub visit_integer: extern "C" fn(
99+
data: *mut c_void,
100+
sibling_list_id: usize,
101+
name: KernelStringSlice,
102+
is_nullable: bool,
103+
),
90104

91105
/// Visit a `short` belonging to the list identified by `sibling_list_id`.
92-
pub visit_short:
93-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
106+
pub visit_short: extern "C" fn(
107+
data: *mut c_void,
108+
sibling_list_id: usize,
109+
name: KernelStringSlice,
110+
is_nullable: bool,
111+
),
94112

95113
/// Visit a `byte` belonging to the list identified by `sibling_list_id`.
96-
pub visit_byte:
97-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
114+
pub visit_byte: extern "C" fn(
115+
data: *mut c_void,
116+
sibling_list_id: usize,
117+
name: KernelStringSlice,
118+
is_nullable: bool,
119+
),
98120

99121
/// Visit a `float` belonging to the list identified by `sibling_list_id`.
100-
pub visit_float:
101-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
122+
pub visit_float: extern "C" fn(
123+
data: *mut c_void,
124+
sibling_list_id: usize,
125+
name: KernelStringSlice,
126+
is_nullable: bool,
127+
),
102128

103129
/// Visit a `double` belonging to the list identified by `sibling_list_id`.
104-
pub visit_double:
105-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
130+
pub visit_double: extern "C" fn(
131+
data: *mut c_void,
132+
sibling_list_id: usize,
133+
name: KernelStringSlice,
134+
is_nullable: bool,
135+
),
106136

107137
/// Visit a `boolean` belonging to the list identified by `sibling_list_id`.
108-
pub visit_boolean:
109-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
138+
pub visit_boolean: extern "C" fn(
139+
data: *mut c_void,
140+
sibling_list_id: usize,
141+
name: KernelStringSlice,
142+
is_nullable: bool,
143+
),
110144

111145
/// Visit `binary` belonging to the list identified by `sibling_list_id`.
112-
pub visit_binary:
113-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
146+
pub visit_binary: extern "C" fn(
147+
data: *mut c_void,
148+
sibling_list_id: usize,
149+
name: KernelStringSlice,
150+
is_nullable: bool,
151+
),
114152

115153
/// Visit a `date` belonging to the list identified by `sibling_list_id`.
116-
pub visit_date:
117-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
154+
pub visit_date: extern "C" fn(
155+
data: *mut c_void,
156+
sibling_list_id: usize,
157+
name: KernelStringSlice,
158+
is_nullable: bool,
159+
),
118160

119161
/// Visit a `timestamp` belonging to the list identified by `sibling_list_id`.
120-
pub visit_timestamp:
121-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
162+
pub visit_timestamp: extern "C" fn(
163+
data: *mut c_void,
164+
sibling_list_id: usize,
165+
name: KernelStringSlice,
166+
is_nullable: bool,
167+
),
122168

123169
/// Visit a `timestamp` with no timezone belonging to the list identified by `sibling_list_id`.
124-
pub visit_timestamp_ntz:
125-
extern "C" fn(data: *mut c_void, sibling_list_id: usize, name: KernelStringSlice),
170+
pub visit_timestamp_ntz: extern "C" fn(
171+
data: *mut c_void,
172+
sibling_list_id: usize,
173+
name: KernelStringSlice,
174+
is_nullable: bool,
175+
),
126176
}
127177

128178
/// Visit the schema of the passed `SnapshotHandle`, using the provided `visitor`. See the
@@ -143,28 +193,55 @@ pub unsafe extern "C" fn visit_schema(
143193
fn visit_struct_fields(visitor: &EngineSchemaVisitor, s: &StructType) -> usize {
144194
let child_list_id = (visitor.make_field_list)(visitor.data, s.fields.len());
145195
for field in s.fields() {
146-
visit_schema_item(field.data_type(), field.name(), visitor, child_list_id);
196+
visit_schema_item(
197+
field.name(),
198+
field.data_type(),
199+
field.is_nullable(),
200+
visitor,
201+
child_list_id,
202+
);
147203
}
148204
child_list_id
149205
}
150206

151-
fn visit_array_item(visitor: &EngineSchemaVisitor, at: &ArrayType) -> usize {
207+
fn visit_array_item(
208+
visitor: &EngineSchemaVisitor,
209+
at: &ArrayType,
210+
contains_null: bool,
211+
) -> usize {
152212
let child_list_id = (visitor.make_field_list)(visitor.data, 1);
153-
visit_schema_item(&at.element_type, "array_element", visitor, child_list_id);
213+
visit_schema_item(
214+
"array_element",
215+
&at.element_type,
216+
contains_null,
217+
visitor,
218+
child_list_id,
219+
);
154220
child_list_id
155221
}
156222

157-
fn visit_map_types(visitor: &EngineSchemaVisitor, mt: &MapType) -> usize {
223+
fn visit_map_types(
224+
visitor: &EngineSchemaVisitor,
225+
mt: &MapType,
226+
value_contains_null: bool,
227+
) -> usize {
158228
let child_list_id = (visitor.make_field_list)(visitor.data, 2);
159-
visit_schema_item(&mt.key_type, "map_key", visitor, child_list_id);
160-
visit_schema_item(&mt.value_type, "map_value", visitor, child_list_id);
229+
visit_schema_item("map_key", &mt.key_type, false, visitor, child_list_id);
230+
visit_schema_item(
231+
"map_value",
232+
&mt.value_type,
233+
value_contains_null,
234+
visitor,
235+
child_list_id,
236+
);
161237
child_list_id
162238
}
163239

164240
// Visit a struct field (recursively) and add the result to the list of siblings.
165241
fn visit_schema_item(
166-
data_type: &DataType,
167242
name: &str,
243+
data_type: &DataType,
244+
is_nullable: bool,
168245
visitor: &EngineSchemaVisitor,
169246
sibling_list_id: usize,
170247
) {
@@ -173,7 +250,8 @@ pub unsafe extern "C" fn visit_schema(
173250
(visitor.$visitor_fn)(
174251
visitor.data,
175252
sibling_list_id,
176-
kernel_string_slice!(name)
253+
kernel_string_slice!(name),
254+
is_nullable
177255
$(, $extra_args) *
178256
)
179257
};
@@ -183,12 +261,11 @@ pub unsafe extern "C" fn visit_schema(
183261
DataType::Map(mt) => {
184262
call!(
185263
visit_map,
186-
mt.value_contains_null,
187-
visit_map_types(visitor, mt)
264+
visit_map_types(visitor, mt, mt.value_contains_null)
188265
)
189266
}
190267
DataType::Array(at) => {
191-
call!(visit_array, at.contains_null, visit_array_item(visitor, at))
268+
call!(visit_array, visit_array_item(visitor, at, at.contains_null))
192269
}
193270
DataType::Primitive(PrimitiveType::Decimal(precision, scale)) => {
194271
call!(visit_decimal, *precision, *scale)

kernel/examples/inspect-table/src/main.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ fn try_main() -> DeltaResult<()> {
209209
let scan = ScanBuilder::new(snapshot).build()?;
210210
let scan_data = scan.scan_data(&engine)?;
211211
for res in scan_data {
212-
let (data, vector) = res?;
212+
let (data, vector, _transforms) = res?;
213213
delta_kernel::scan::state::visit_scan_files(
214214
data.as_ref(),
215215
&vector,

kernel/examples/read-table-multi-threaded/src/main.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ fn try_main() -> DeltaResult<()> {
210210
drop(record_batch_tx);
211211

212212
for res in scan_data {
213-
let (data, vector) = res?;
213+
let (data, vector, _transforms) = res?;
214214
scan_file_tx = delta_kernel::scan::state::visit_scan_files(
215215
data.as_ref(),
216216
&vector,

0 commit comments

Comments
 (0)