diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index 31ead461aa6e2..9350922b2053e 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -757,14 +757,13 @@ TEST_F(TestConvertParquetSchema, ParquetSchemaArrowExtensions) { { // Parquet file does not contain Arrow schema. - // If Arrow extensions are enabled, both fields should be treated as json() extension - // fields. + // If Arrow extensions are enabled, fields will be interpreted as json(utf8()) + // extension fields. ArrowReaderProperties props; props.set_arrow_extensions_enabled(true); auto arrow_schema = ::arrow::schema( {::arrow::field("json_1", ::arrow::extension::json(), true), - ::arrow::field("json_2", ::arrow::extension::json(::arrow::large_utf8()), - true)}); + ::arrow::field("json_2", ::arrow::extension::json(::arrow::utf8()), true)}); std::shared_ptr metadata{}; ASSERT_OK(ConvertSchema(parquet_fields, metadata, props)); CheckFlatSchema(arrow_schema); @@ -772,8 +771,8 @@ TEST_F(TestConvertParquetSchema, ParquetSchemaArrowExtensions) { { // Parquet file contains Arrow schema. - // Both json_1 and json_2 should be returned as a json() field - // even though extensions are not enabled. + // json_1 and json_2 will be interpreted as json(utf8()) and json(large_utf8()) + // fields even though extensions are not enabled. ArrowReaderProperties props; props.set_arrow_extensions_enabled(false); std::shared_ptr field_metadata = @@ -791,7 +790,7 @@ TEST_F(TestConvertParquetSchema, ParquetSchemaArrowExtensions) { { // Parquet file contains Arrow schema. Extensions are enabled. - // Both json_1 and json_2 should be returned as a json() field + // json_1 and json_2 will be interpreted as json(utf8()) and json(large_utf8()). ArrowReaderProperties props; props.set_arrow_extensions_enabled(true); std::shared_ptr field_metadata = diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index 1623d80dcb0e4..ef175668b0bcc 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -1017,7 +1017,9 @@ Result ApplyOriginalMetadata(const Field& origin_field, SchemaField* infer // Restore extension type, if the storage type is the same as inferred // from the Parquet type - if (ex_type.storage_type()->Equals(*inferred->field->type())) { + if (ex_type.storage_type()->Equals(*inferred->field->type()) || + (ex_type.extension_name() == "arrow.json" && + !ex_type.storage_type()->Equals(*inferred->field->type()))) { inferred->field = inferred->field->WithType(origin_type); } }