Skip to content

Commit 29c356f

Browse files
committed
chore
1 parent b95a13c commit 29c356f

File tree

2 files changed

+10
-41
lines changed
  • datafusion

2 files changed

+10
-41
lines changed

datafusion/functions-nested/src/map.rs

Lines changed: 9 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,6 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
6666

6767
match keys_arg {
6868
ColumnarValue::Array(_) => {
69-
// For array inputs, keys is a List array where each element represents
70-
// the keys of one map. Some list elements may be NULL, which represents
71-
// a NULL map (not a null key within a map).
72-
//
73-
// We should NOT check keys.null_count() here because:
74-
// - list_to_arrays() will flatten the list and skip NULL elements (via flatten())
75-
// - Those NULL elements represent NULL maps, which are valid
76-
// - Null keys WITHIN maps are checked later in check_unique_keys() and
77-
// make_map_array_internal()
7869
let row_keys = match key_array.data_type() {
7970
DataType::List(_) => list_to_arrays::<i32>(&keys),
8071
DataType::LargeList(_) => list_to_arrays::<i64>(&keys),
@@ -87,36 +78,12 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
8778
}
8879
};
8980

90-
// Check that keys within each map are unique and non-null
91-
// Note: row_keys only contains non-NULL map entries due to flatten()
9281
row_keys
9382
.iter()
94-
.try_for_each(|key| check_unique_keys(key.as_ref()))?;
83+
.try_for_each(|key| validate_map_keys(key.as_ref()))?;
9584
}
9685
ColumnarValue::Scalar(_) => {
97-
// For scalar inputs, process based on data type
98-
match key_array.data_type() {
99-
DataType::List(_)
100-
| DataType::LargeList(_)
101-
| DataType::FixedSizeList(_, _) => {
102-
// The scalar wraps a List<List<T>> which represents multiple maps
103-
// (e.g., from make_array(['a','b'], NULL, ['c']) in constant evaluation)
104-
// Some list elements may be NULL (representing NULL maps), so we use list_to_arrays
105-
let row_keys = match key_array.data_type() {
106-
DataType::List(_) => list_to_arrays::<i32>(&keys),
107-
DataType::LargeList(_) => list_to_arrays::<i64>(&keys),
108-
DataType::FixedSizeList(_, _) => fixed_size_list_to_arrays(&keys),
109-
_ => unreachable!(),
110-
};
111-
row_keys
112-
.iter()
113-
.try_for_each(|key| check_unique_keys(key.as_ref()))?;
114-
}
115-
_ => {
116-
// For non-list scalars (e.g., a single array of keys), check directly
117-
check_unique_keys(key_array)?;
118-
}
119-
}
86+
validate_map_keys(key_array)?;
12087
}
12188
}
12289

@@ -136,15 +103,19 @@ fn make_map_batch(args: &[ColumnarValue]) -> Result<ColumnarValue> {
136103
make_map_batch_internal(keys, values, can_evaluate_to_const, keys_arg.data_type())
137104
}
138105

139-
fn check_unique_keys(array: &dyn Array) -> Result<()> {
106+
/// Validates that map keys are non-null and unique.
107+
fn validate_map_keys(array: &dyn Array) -> Result<()> {
140108
let mut seen_keys = HashSet::with_capacity(array.len());
141109

142110
for i in 0..array.len() {
143111
let key = ScalarValue::try_from_array(array, i)?;
144-
// Map keys cannot be null
112+
113+
// Validation 1: Map keys cannot be null
145114
if key.is_null() {
146115
return exec_err!("map key cannot be null");
147116
}
117+
118+
// Validation 2: Map keys must be unique
148119
if seen_keys.contains(&key) {
149120
return exec_err!("map key must be unique, duplicate key found: {}", key);
150121
}
@@ -428,9 +399,7 @@ fn make_map_array_internal<O: OffsetSizeTrait>(
428399
let mut non_null_idx = 0;
429400

430401
for i in 0..original_len {
431-
let is_null = nulls_bitmap
432-
.as_ref()
433-
.map_or(false, |nulls| nulls.is_null(i));
402+
let is_null = nulls_bitmap.as_ref().is_some_and(|nulls| nulls.is_null(i));
434403
if is_null {
435404
// NULL map: offset doesn't advance (empty range)
436405
offset_buffer.push(running_offset);

datafusion/sqllogictest/test_files/map.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ SELECT map(column5, column6) FROM duplicate_keys_table;
233233

234234
# key is a nested type
235235
query error DataFusion error: Execution error: map key must be unique, duplicate key found: \[1, 2\]
236-
SELECT MAP([[1,2], [1,2], [NULL]], [41, 33, null]);
236+
SELECT MAP([[1,2], [1,2]], [41, 33]);
237237

238238
query error DataFusion error: Execution error: map key must be unique, duplicate key found: \[\{1:1\}\]
239239
SELECT MAP([Map {1:'1'}, Map {1:'1'}, Map {2:'2'}], [41, 33, null]);

0 commit comments

Comments
 (0)