@@ -95,6 +95,8 @@ struct ArrowIpcDecoderPrivate {
95
95
// The number of buffers that future RecordBatch messages must have to match the schema
96
96
// that has been set.
97
97
int64_t n_buffers ;
98
+ // The number of union fields in the Schema.
99
+ int64_t n_union_fields ;
98
100
// A pointer to the last flatbuffers message.
99
101
const void * last_message ;
100
102
// Storage for a Footer
@@ -261,6 +263,8 @@ void ArrowIpcDecoderReset(struct ArrowIpcDecoder* decoder) {
261
263
private_data -> n_fields = 0 ;
262
264
}
263
265
266
+ private_data -> n_union_fields = 0 ;
267
+
264
268
ArrowIpcFooterReset (& private_data -> footer );
265
269
266
270
ArrowFree (private_data );
@@ -924,9 +928,16 @@ static int ArrowIpcDecoderDecodeRecordBatchHeader(struct ArrowIpcDecoder* decode
924
928
return EINVAL ;
925
929
}
926
930
927
- if ((n_buffers + 1 ) != private_data -> n_buffers ) {
931
+ int64_t n_expected_buffers = private_data -> n_buffers ;
932
+ if (decoder -> metadata_version < NANOARROW_IPC_METADATA_VERSION_V5 ) {
933
+ // Unions had null buffers before arrow 1.0, so expect one extra buffer per union
934
+ // field
935
+ n_expected_buffers += private_data -> n_union_fields ;
936
+ }
937
+
938
+ if ((n_buffers + 1 ) != n_expected_buffers ) {
928
939
ArrowErrorSet (error , "Expected %" PRId64 " buffers in message but found %" PRId64 ,
929
- private_data -> n_buffers - 1 , n_buffers );
940
+ n_expected_buffers - 1 , n_buffers );
930
941
return EINVAL ;
931
942
}
932
943
@@ -1179,14 +1190,14 @@ ArrowErrorCode ArrowIpcDecoderDecodeHeader(struct ArrowIpcDecoder* decoder,
1179
1190
1180
1191
switch (decoder -> metadata_version ) {
1181
1192
case ns (MetadataVersion_V5 ):
1193
+ case ns (MetadataVersion_V4 ):
1182
1194
break ;
1183
1195
case ns (MetadataVersion_V1 ):
1184
1196
case ns (MetadataVersion_V2 ):
1185
1197
case ns (MetadataVersion_V3 ):
1186
- case ns (MetadataVersion_V4 ):
1187
- ArrowErrorSet (error , "Expected metadata version V5 but found %s" ,
1198
+ ArrowErrorSet (error , "Expected metadata version V4 or V5 but found %s" ,
1188
1199
ns (MetadataVersion_name (ns (Message_version (message )))));
1189
- break ;
1200
+ return EINVAL ;
1190
1201
default :
1191
1202
ArrowErrorSet (error , "Unexpected value for Message metadata version (%d)" ,
1192
1203
decoder -> metadata_version );
@@ -1307,7 +1318,7 @@ static void ArrowIpcDecoderCountFields(struct ArrowSchema* schema, int64_t* n_fi
1307
1318
static void ArrowIpcDecoderInitFields (struct ArrowIpcField * fields ,
1308
1319
struct ArrowArrayView * array_view ,
1309
1320
struct ArrowArray * array , int64_t * n_fields ,
1310
- int64_t * n_buffers ) {
1321
+ int64_t * n_buffers , int64_t * n_union_fields ) {
1311
1322
struct ArrowIpcField * field = fields + (* n_fields );
1312
1323
field -> array_view = array_view ;
1313
1324
field -> array = array ;
@@ -1316,12 +1327,14 @@ static void ArrowIpcDecoderInitFields(struct ArrowIpcField* fields,
1316
1327
for (int i = 0 ; i < NANOARROW_MAX_FIXED_BUFFERS ; i ++ ) {
1317
1328
* n_buffers += array_view -> layout .buffer_type [i ] != NANOARROW_BUFFER_TYPE_NONE ;
1318
1329
}
1330
+ * n_union_fields += array_view -> storage_type == NANOARROW_TYPE_SPARSE_UNION ||
1331
+ array_view -> storage_type == NANOARROW_TYPE_DENSE_UNION ;
1319
1332
1320
1333
* n_fields += 1 ;
1321
1334
1322
1335
for (int64_t i = 0 ; i < array_view -> n_children ; i ++ ) {
1323
1336
ArrowIpcDecoderInitFields (fields , array_view -> children [i ], array -> children [i ],
1324
- n_fields , n_buffers );
1337
+ n_fields , n_buffers , n_union_fields );
1325
1338
}
1326
1339
}
1327
1340
@@ -1334,6 +1347,7 @@ ArrowErrorCode ArrowIpcDecoderSetSchema(struct ArrowIpcDecoder* decoder,
1334
1347
// Reset previously allocated schema-specific resources
1335
1348
private_data -> n_buffers = 0 ;
1336
1349
private_data -> n_fields = 0 ;
1350
+ private_data -> n_union_fields = 0 ;
1337
1351
ArrowArrayViewReset (& private_data -> array_view );
1338
1352
if (private_data -> array .release != NULL ) {
1339
1353
ArrowArrayRelease (& private_data -> array );
@@ -1368,7 +1382,8 @@ ArrowErrorCode ArrowIpcDecoderSetSchema(struct ArrowIpcDecoder* decoder,
1368
1382
// Init field information and calculate starting buffer offset for each
1369
1383
int64_t field_i = 0 ;
1370
1384
ArrowIpcDecoderInitFields (private_data -> fields , & private_data -> array_view ,
1371
- & private_data -> array , & field_i , & private_data -> n_buffers );
1385
+ & private_data -> array , & field_i , & private_data -> n_buffers ,
1386
+ & private_data -> n_union_fields );
1372
1387
1373
1388
return NANOARROW_OK ;
1374
1389
}
@@ -1604,6 +1619,7 @@ struct ArrowIpcArraySetter {
1604
1619
int64_t body_size_bytes ;
1605
1620
struct ArrowIpcBufferSource src ;
1606
1621
struct ArrowIpcBufferFactory factory ;
1622
+ enum ArrowIpcMetadataVersion version ;
1607
1623
};
1608
1624
1609
1625
static int ArrowIpcDecoderMakeBuffer (struct ArrowIpcArraySetter * setter , int64_t offset ,
@@ -1691,6 +1707,21 @@ static int ArrowIpcDecoderWalkSetArrayView(struct ArrowIpcArraySetter* setter,
1691
1707
array_view -> null_count = ns (FieldNode_null_count (field ));
1692
1708
setter -> field_i += 1 ;
1693
1709
1710
+ if (array_view -> storage_type == NANOARROW_TYPE_SPARSE_UNION ||
1711
+ array_view -> storage_type == NANOARROW_TYPE_DENSE_UNION ) {
1712
+ if (setter -> version < NANOARROW_IPC_METADATA_VERSION_V5 ) {
1713
+ ns (Buffer_struct_t ) buffer =
1714
+ ns (Buffer_vec_at (setter -> buffers , (size_t )setter -> buffer_i ));
1715
+ if (ns (Buffer_length (buffer )) != 0 ) {
1716
+ ArrowErrorSet (error ,
1717
+ "Cannot read pre-1.0.0 Union array with top-level validity bitmap" );
1718
+ return EINVAL ;
1719
+ }
1720
+ // skip the empty validity bitmap
1721
+ setter -> buffer_i += 1 ;
1722
+ }
1723
+ }
1724
+
1694
1725
for (int i = 0 ; i < NANOARROW_MAX_FIXED_BUFFERS ; i ++ ) {
1695
1726
if (array_view -> layout .buffer_type [i ] == NANOARROW_BUFFER_TYPE_NONE ) {
1696
1727
break ;
@@ -1803,6 +1834,7 @@ static ArrowErrorCode ArrowIpcDecoderDecodeArrayViewInternal(
1803
1834
setter .factory = factory ;
1804
1835
setter .src .codec = decoder -> codec ;
1805
1836
setter .src .swap_endian = ArrowIpcDecoderNeedsSwapEndian (decoder );
1837
+ setter .version = decoder -> metadata_version ;
1806
1838
1807
1839
// The flatbuffers FieldNode doesn't count the root struct so we have to loop over the
1808
1840
// children ourselves
0 commit comments