diff --git a/.github/workflows/sanitizer_test.yml b/.github/workflows/sanitizer_test.yml index 6c9c50e..51fee8a 100644 --- a/.github/workflows/sanitizer_test.yml +++ b/.github/workflows/sanitizer_test.yml @@ -50,9 +50,9 @@ jobs: - name: Run Tests working-directory: build env: - ASAN_OPTIONS: log_path=out.log:detect_leaks=1:symbolize=1:strict_string_checks=1:halt_on_error=0:detect_container_overflow=0 + ASAN_OPTIONS: log_path=out.log:detect_leaks=1:symbolize=1:strict_string_checks=1:halt_on_error=1:detect_container_overflow=0 LSAN_OPTIONS: suppressions=${{ github.workspace }}/.github/lsan-suppressions.txt - UBSAN_OPTIONS: log_path=out.log:halt_on_error=0:print_stacktrace=1:suppressions=${{ github.workspace }}/.github/ubsan-suppressions.txt + UBSAN_OPTIONS: log_path=out.log:halt_on_error=1:print_stacktrace=1:suppressions=${{ github.workspace }}/.github/ubsan-suppressions.txt run: | ctest --output-on-failure - name: Save the test output diff --git a/src/iceberg/schema_internal.cc b/src/iceberg/schema_internal.cc index 6218072..1ce279f 100644 --- a/src/iceberg/schema_internal.cc +++ b/src/iceberg/schema_internal.cc @@ -50,11 +50,9 @@ ArrowErrorCode ToArrowSchema(const Type& type, bool optional, std::string_view n switch (type.type_id()) { case TypeId::kStruct: { - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_STRUCT)); - const auto& struct_type = static_cast(type); const auto& fields = struct_type.fields(); - NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, fields.size())); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeStruct(schema, fields.size())); for (size_t i = 0; i < fields.size(); i++) { const auto& field = fields[i]; @@ -64,7 +62,7 @@ ArrowErrorCode ToArrowSchema(const Type& type, bool optional, std::string_view n } } break; case TypeId::kList: { - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_LIST)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_LIST)); const auto& list_type = static_cast(type); const auto& elem_field = list_type.fields()[0]; @@ -73,7 +71,7 @@ ArrowErrorCode ToArrowSchema(const Type& type, bool optional, std::string_view n schema->children[0])); } break; case TypeId::kMap: { - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_MAP)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_MAP)); const auto& map_type = static_cast(type); const auto& key_field = map_type.key(); @@ -86,61 +84,55 @@ ArrowErrorCode ToArrowSchema(const Type& type, bool optional, std::string_view n schema->children[0]->children[1])); } break; case TypeId::kBoolean: - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_BOOL)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BOOL)); break; case TypeId::kInt: - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_INT32)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT32)); break; case TypeId::kLong: - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_INT64)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_INT64)); break; case TypeId::kFloat: - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_FLOAT)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_FLOAT)); break; case TypeId::kDouble: - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_DOUBLE)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DOUBLE)); break; case TypeId::kDecimal: { - ArrowSchemaInit(schema); const auto& decimal_type = static_cast(type); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL128, decimal_type.precision(), decimal_type.scale())); } break; case TypeId::kDate: - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_DATE32)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_DATE32)); break; case TypeId::kTime: { - ArrowSchemaInit(schema); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIME64, NANOARROW_TIME_UNIT_MICRO, /*timezone=*/nullptr)); } break; case TypeId::kTimestamp: { - ArrowSchemaInit(schema); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MICRO, /*timezone=*/nullptr)); } break; case TypeId::kTimestampTz: { - ArrowSchemaInit(schema); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime( schema, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MICRO, "UTC")); } break; case TypeId::kString: - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_STRING)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_STRING)); break; case TypeId::kBinary: - NANOARROW_RETURN_NOT_OK(ArrowSchemaInitFromType(schema, NANOARROW_TYPE_BINARY)); + NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema, NANOARROW_TYPE_BINARY)); break; case TypeId::kFixed: { - ArrowSchemaInit(schema); const auto& fixed_type = static_cast(type); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeFixedSize( schema, NANOARROW_TYPE_FIXED_SIZE_BINARY, fixed_type.length())); } break; case TypeId::kUuid: { - ArrowSchemaInit(schema); NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeFixedSize( schema, NANOARROW_TYPE_FIXED_SIZE_BINARY, /*fixed_size=*/16)); NANOARROW_RETURN_NOT_OK( @@ -173,6 +165,8 @@ Status ToArrowSchema(const Schema& schema, ArrowSchema* out) { return InvalidArgument("Output Arrow schema cannot be null"); } + ArrowSchemaInit(out); + if (ArrowErrorCode errorCode = ToArrowSchema(schema, /*optional=*/false, /*name=*/"", /*field_id=*/std::nullopt, out); errorCode != NANOARROW_OK) { diff --git a/test/arrow_test.cc b/test/arrow_test.cc index 52cef04..e8714e9 100644 --- a/test/arrow_test.cc +++ b/test/arrow_test.cc @@ -283,6 +283,7 @@ TEST_P(FromArrowSchemaTest, PrimitiveType) { auto type_result = FromArrowSchema(exported_schema, /*schema_id=*/1); ASSERT_THAT(type_result, IsOk()); + ArrowSchemaRelease(&exported_schema); const auto& schema = type_result.value(); ASSERT_EQ(schema->schema_id(), 1); @@ -358,6 +359,7 @@ TEST(FromArrowSchemaTest, StructType) { auto schema_result = FromArrowSchema(exported_schema, /*schema_id=*/0); ASSERT_THAT(schema_result, IsOk()); + ArrowSchemaRelease(&exported_schema); const auto& iceberg_schema = schema_result.value(); ASSERT_EQ(iceberg_schema->schema_id(), 0); @@ -408,6 +410,7 @@ TEST(FromArrowSchemaTest, ListType) { auto schema_result = FromArrowSchema(exported_schema, /*schema_id=*/0); ASSERT_THAT(schema_result, IsOk()); + ArrowSchemaRelease(&exported_schema); const auto& iceberg_schema = schema_result.value(); ASSERT_EQ(iceberg_schema->schema_id(), 0); @@ -458,6 +461,7 @@ TEST(FromArrowSchemaTest, MapType) { auto schema_result = FromArrowSchema(exported_schema, /*schema_id=*/0); ASSERT_THAT(schema_result, IsOk()); + ArrowSchemaRelease(&exported_schema); const auto& iceberg_schema = schema_result.value(); ASSERT_EQ(iceberg_schema->schema_id(), 0);