@@ -6,6 +6,7 @@ use crate::{
6
6
core:: { ArrayVector , FlatVector , Inserter , ListVector , StructVector , Vector } ,
7
7
types:: DuckString ,
8
8
} ;
9
+ use arrow:: array:: as_map_array;
9
10
use arrow:: {
10
11
array:: {
11
12
as_boolean_array, as_generic_binary_array, as_large_list_array, as_list_array, as_primitive_array,
@@ -204,6 +205,7 @@ pub fn to_duckdb_logical_type(data_type: &DataType) -> Result<LogicalTypeHandle,
204
205
// DuckDB does not support negative decimal scales
205
206
Ok ( LogicalTypeHandle :: decimal ( * width, ( * scale) . try_into ( ) . unwrap ( ) ) )
206
207
}
208
+ DataType :: Map ( field, _) => arrow_map_to_duckdb_logical_type ( field) ,
207
209
DataType :: Boolean
208
210
| DataType :: Utf8
209
211
| DataType :: LargeUtf8
@@ -220,6 +222,35 @@ pub fn to_duckdb_logical_type(data_type: &DataType) -> Result<LogicalTypeHandle,
220
222
}
221
223
}
222
224
225
+ fn arrow_map_to_duckdb_logical_type ( field : & FieldRef ) -> Result < LogicalTypeHandle , Box < dyn std:: error:: Error > > {
226
+ // Map is a logical nested type that is represented as `List<entries: Struct<key: K, value: V>>`
227
+ let DataType :: Struct ( ref fields) = field. data_type ( ) else {
228
+ return Err ( format ! (
229
+ "The inner field of a Map must be a Struct, got: {:?}" ,
230
+ field. data_type( )
231
+ )
232
+ . into ( ) ) ;
233
+ } ;
234
+
235
+ if fields. len ( ) != 2 {
236
+ return Err ( format ! (
237
+ "The inner Struct field of a Map must have 2 fields, got {} fields" ,
238
+ fields. len( )
239
+ )
240
+ . into ( ) ) ;
241
+ }
242
+
243
+ let ( Some ( key_field) , Some ( value_field) ) = ( fields. first ( ) , fields. get ( 1 ) ) else {
244
+ // number of fields is verified above
245
+ unreachable ! ( )
246
+ } ;
247
+
248
+ Ok ( LogicalTypeHandle :: map (
249
+ & LogicalTypeHandle :: from ( to_duckdb_type_id ( key_field. data_type ( ) ) ?) ,
250
+ & LogicalTypeHandle :: from ( to_duckdb_type_id ( value_field. data_type ( ) ) ?) ,
251
+ ) )
252
+ }
253
+
223
254
// FIXME: flat vectors don't have all of thsese types. I think they only
224
255
/// Converts flat vector to an arrow array
225
256
pub fn flat_vector_to_arrow_array (
@@ -586,6 +617,19 @@ pub fn write_arrow_array_to_vector(
586
617
let mut struct_vector = chunk. struct_vector ( ) ;
587
618
struct_array_to_vector ( struct_array, & mut struct_vector) ?;
588
619
}
620
+ DataType :: Map ( _, _) => {
621
+ // [`MapArray`] is physically a [`ListArray`] of key values pairs stored as an `entries` [`StructArray`] with 2 child fields.
622
+ let map_array = as_map_array ( col. as_ref ( ) ) ;
623
+ let out = & mut chunk. list_vector ( ) ;
624
+ struct_array_to_vector ( map_array. entries ( ) , & mut out. struct_child ( map_array. entries ( ) . len ( ) ) ) ?;
625
+
626
+ for i in 0 ..map_array. len ( ) {
627
+ let offset = map_array. value_offsets ( ) [ i] ;
628
+ let length = map_array. value_length ( i) ;
629
+ out. set_entry ( i, offset. as_ ( ) , length. as_ ( ) ) ;
630
+ }
631
+ set_nulls_in_list_vector ( map_array, out) ;
632
+ }
589
633
dt => {
590
634
return Err ( format ! (
591
635
"column with data_type {} is not supported yet, please file an issue https://github.com/wangfenjin/duckdb-rs" ,
@@ -935,7 +979,10 @@ fn list_array_to_vector<O: OffsetSizeTrait + AsPrimitive<usize>>(
935
979
fixed_size_list_array_to_vector ( as_fixed_size_list_array ( value_array. as_ref ( ) ) , & mut out. array_child ( ) ) ?;
936
980
}
937
981
DataType :: Struct ( _) => {
938
- struct_array_to_vector ( as_struct_array ( value_array. as_ref ( ) ) , & mut out. struct_child ( ) ) ?;
982
+ struct_array_to_vector (
983
+ as_struct_array ( value_array. as_ref ( ) ) ,
984
+ & mut out. struct_child ( value_array. len ( ) ) ,
985
+ ) ?;
939
986
}
940
987
_ => {
941
988
return Err ( format ! (
@@ -993,13 +1040,13 @@ fn struct_array_to_vector(array: &StructArray, out: &mut StructVector) -> Result
993
1040
let column = array. column ( i) ;
994
1041
match column. data_type ( ) {
995
1042
dt if dt. is_primitive ( ) || matches ! ( dt, DataType :: Boolean ) => {
996
- primitive_array_to_vector ( column, & mut out. child ( i) ) ?;
1043
+ primitive_array_to_vector ( column, & mut out. child ( i, array . len ( ) ) ) ?;
997
1044
}
998
1045
DataType :: Utf8 => {
999
- string_array_to_vector ( as_string_array ( column. as_ref ( ) ) , & mut out. child ( i) ) ;
1046
+ string_array_to_vector ( as_string_array ( column. as_ref ( ) ) , & mut out. child ( i, array . len ( ) ) ) ;
1000
1047
}
1001
1048
DataType :: Binary => {
1002
- binary_array_to_vector ( as_generic_binary_array ( column. as_ref ( ) ) , & mut out. child ( i) ) ;
1049
+ binary_array_to_vector ( as_generic_binary_array ( column. as_ref ( ) ) , & mut out. child ( i, array . len ( ) ) ) ;
1003
1050
}
1004
1051
DataType :: List ( _) => {
1005
1052
list_array_to_vector ( as_list_array ( column. as_ref ( ) ) , & mut out. list_vector_child ( i) ) ?;
@@ -1112,10 +1159,10 @@ mod test {
1112
1159
Array , ArrayRef , AsArray , BinaryArray , BinaryViewArray , BooleanArray , Date32Array , Date64Array ,
1113
1160
Decimal128Array , Decimal256Array , DurationSecondArray , FixedSizeListArray , FixedSizeListBuilder ,
1114
1161
GenericByteArray , GenericListArray , Int32Array , Int32Builder , IntervalDayTimeArray ,
1115
- IntervalMonthDayNanoArray , IntervalYearMonthArray , LargeStringArray , ListArray , ListBuilder ,
1162
+ IntervalMonthDayNanoArray , IntervalYearMonthArray , LargeStringArray , ListArray , ListBuilder , MapArray ,
1116
1163
OffsetSizeTrait , PrimitiveArray , StringArray , StringViewArray , StructArray , Time32SecondArray ,
1117
1164
Time64MicrosecondArray , TimestampMicrosecondArray , TimestampMillisecondArray , TimestampNanosecondArray ,
1118
- TimestampSecondArray ,
1165
+ TimestampSecondArray , UInt32Array ,
1119
1166
} ,
1120
1167
buffer:: { OffsetBuffer , ScalarBuffer } ,
1121
1168
datatypes:: {
@@ -1894,4 +1941,66 @@ mod test {
1894
1941
1895
1942
Ok ( ( ) )
1896
1943
}
1944
+
1945
+ fn check_map_array_roundtrip ( array : MapArray ) -> Result < ( ) , Box < dyn Error > > {
1946
+ let expected = array. clone ( ) ;
1947
+
1948
+ let db = Connection :: open_in_memory ( ) ?;
1949
+ db. register_table_function :: < ArrowVTab > ( "arrow" ) ?;
1950
+
1951
+ // Roundtrip a record batch from Rust to DuckDB and back to Rust
1952
+ let schema = Schema :: new ( vec ! [ Field :: new( "a" , array. data_type( ) . clone( ) , true ) ] ) ;
1953
+
1954
+ let rb = RecordBatch :: try_new ( Arc :: new ( schema) , vec ! [ Arc :: new( array. clone( ) ) ] ) ?;
1955
+ let param = arrow_recordbatch_to_query_params ( rb. clone ( ) ) ;
1956
+ let mut stmt = db. prepare ( "select a from arrow(?, ?)" ) ?;
1957
+ let rb = stmt. query_arrow ( param) ?. next ( ) . expect ( "no record batch" ) ;
1958
+ let output_array = rb
1959
+ . column ( 0 )
1960
+ . as_any ( )
1961
+ . downcast_ref :: < MapArray > ( )
1962
+ . expect ( "Expected MapArray" ) ;
1963
+
1964
+ assert_eq ! ( output_array. keys( ) , expected. keys( ) ) ;
1965
+ assert_eq ! ( output_array. values( ) , expected. values( ) ) ;
1966
+
1967
+ Ok ( ( ) )
1968
+ }
1969
+
1970
+ #[ test]
1971
+ fn test_map_roundtrip ( ) -> Result < ( ) , Box < dyn Error > > {
1972
+ // Test 1 - simple MapArray
1973
+ let keys = vec ! [ "a" , "b" , "c" , "d" , "e" , "f" , "g" , "h" ] ;
1974
+ let values_data = UInt32Array :: from ( vec ! [
1975
+ Some ( 0u32 ) ,
1976
+ None ,
1977
+ Some ( 20 ) ,
1978
+ Some ( 30 ) ,
1979
+ None ,
1980
+ Some ( 50 ) ,
1981
+ Some ( 60 ) ,
1982
+ Some ( 70 ) ,
1983
+ ] ) ;
1984
+ // Construct a buffer for value offsets, for the nested array:
1985
+ // [[a, b, c], [d, e, f], [g, h]]
1986
+ let entry_offsets = [ 0 , 3 , 6 , 8 ] ;
1987
+ let map_array = MapArray :: new_from_strings ( keys. clone ( ) . into_iter ( ) , & values_data, & entry_offsets) . unwrap ( ) ;
1988
+ check_map_array_roundtrip ( map_array) ?;
1989
+
1990
+ // Test 2 - large MapArray of 4000 elements to test buffers capacity adjustment
1991
+ let keys: Vec < String > = ( 0 ..4000 ) . map ( |i| format ! ( "key-{}" , i) ) . collect ( ) ;
1992
+ let values_data = UInt32Array :: from (
1993
+ ( 0 ..4000 )
1994
+ . map ( |i| if i % 5 == 0 { None } else { Some ( i as u32 ) } )
1995
+ . collect :: < Vec < _ > > ( ) ,
1996
+ ) ;
1997
+ let mut entry_offsets: Vec < u32 > = ( 0 ..=4000 ) . step_by ( 3 ) . collect ( ) ;
1998
+ entry_offsets. push ( 4000 ) ;
1999
+ let map_array =
2000
+ MapArray :: new_from_strings ( keys. iter ( ) . map ( String :: as_str) , & values_data, entry_offsets. as_slice ( ) )
2001
+ . unwrap ( ) ;
2002
+ check_map_array_roundtrip ( map_array) ?;
2003
+
2004
+ Ok ( ( ) )
2005
+ }
1897
2006
}
0 commit comments