Skip to content

Commit

Permalink
GH-43994: [C++][Parquet] Fix schema conversion from two-level encodin…
Browse files Browse the repository at this point in the history
…g nested list
  • Loading branch information
wgtmac committed Sep 20, 2024
1 parent 25d42b9 commit 1bc2d11
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 0 deletions.
20 changes: 20 additions & 0 deletions cpp/src/parquet/arrow/arrow_schema_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,26 @@ TEST_F(TestConvertParquetSchema, ParquetLists) {
arrow_fields.push_back(::arrow::field("name", arrow_list, false));
}

// Two-level encoding List<List<Integer>>:
// optional group nested_list (LIST) {
// repeated group array (LIST) {
// repeated int32 array;
// }
// }
{
auto inner_element =
PrimitiveNode::Make("array", Repetition::REPEATED, ParquetType::INT32);
auto outer_element = GroupNode::Make("array", Repetition::REPEATED, {inner_element},
ConvertedType::LIST);
parquet_fields.push_back(GroupNode::Make("nested_list", Repetition::OPTIONAL,
{outer_element}, ConvertedType::LIST));
auto arrow_inner_element = ::arrow::field("array", INT32, /*nullable=*/false);
auto arrow_outer_element =
::arrow::field("array", ::arrow::list(arrow_inner_element), /*nullable=*/false);
auto arrow_list = ::arrow::list(arrow_outer_element);
arrow_fields.push_back(::arrow::field("nested_list", arrow_list, true));
}

auto arrow_schema = ::arrow::schema(arrow_fields);
ASSERT_OK(ConvertSchema(parquet_fields));

Expand Down
4 changes: 4 additions & 0 deletions cpp/src/parquet/arrow/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -681,6 +681,10 @@ Status ListToSchemaField(const GroupNode& group, LevelInfo current_levels,
// List of primitive type
RETURN_NOT_OK(
NodeToSchemaField(*list_group.field(0), current_levels, ctx, out, child_field));
} else if (list_group.field_count() == 1 && list_group.field(0)->is_repeated()) {
// Special case for nested list in two-level list encoding
RETURN_NOT_OK(
NodeToSchemaField(*list_group.field(0), current_levels, ctx, out, child_field));
} else {
RETURN_NOT_OK(GroupToStruct(list_group, current_levels, ctx, out, child_field));
}
Expand Down

0 comments on commit 1bc2d11

Please sign in to comment.