From a78e3581ce4ff9b3595f645a6e56e2e1ca64b077 Mon Sep 17 00:00:00 2001 From: Marc Droogh Date: Tue, 10 Dec 2024 22:31:26 +0100 Subject: [PATCH] Fix compatibility changes schema handling apache-avro 0.17 - Handle ArraySchema struct - Handle MapSchema struct - Map BigDecimal => LargeBinary - Map TimestampNanos => Timestamp(TimeUnit::Nanosecond, None) - Map LocalTimestampNanos => todo!() - Add Default to FixedSchema test --- .../src/datasource/avro_to_arrow/arrow_array_reader.rs | 4 ++-- datafusion/core/src/datasource/avro_to_arrow/schema.rs | 8 ++++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs index 9f089c7c0cea8..a266ea05be4e7 100644 --- a/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs +++ b/datafusion/core/src/datasource/avro_to_arrow/arrow_array_reader.rs @@ -35,7 +35,7 @@ use crate::arrow::error::ArrowError; use crate::arrow::record_batch::RecordBatch; use crate::arrow::util::bit_util; use crate::error::{DataFusionError, Result}; -use apache_avro::schema::RecordSchema; +use apache_avro::schema::{ArraySchema, RecordSchema}; use apache_avro::{ schema::{Schema as AvroSchema, SchemaKind}, types::Value, @@ -138,7 +138,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> { } AvroSchema::Array(schema) => { let sub_parent_field_name = format!("{}.element", parent_field_name); - Self::child_schema_lookup(&sub_parent_field_name, schema, schema_lookup)?; + Self::child_schema_lookup(&sub_parent_field_name, &schema.items, schema_lookup)?; } _ => (), } diff --git a/datafusion/core/src/datasource/avro_to_arrow/schema.rs b/datafusion/core/src/datasource/avro_to_arrow/schema.rs index 039a6aacc07eb..263f605701333 100644 --- a/datafusion/core/src/datasource/avro_to_arrow/schema.rs +++ b/datafusion/core/src/datasource/avro_to_arrow/schema.rs @@ -73,11 +73,11 @@ fn schema_to_field_with_props( AvroSchema::Bytes => DataType::Binary, AvroSchema::String => DataType::Utf8, AvroSchema::Array(item_schema) => DataType::List(Arc::new( - schema_to_field_with_props(item_schema, Some("element"), false, None)?, + schema_to_field_with_props(&item_schema.items, Some("element"), false, None)?, )), AvroSchema::Map(value_schema) => { let value_field = - schema_to_field_with_props(value_schema, Some("value"), false, None)?; + schema_to_field_with_props(&value_schema.types, Some("value"), false, None)?; DataType::Dictionary( Box::new(DataType::Utf8), Box::new(value_field.data_type().clone()), @@ -144,14 +144,17 @@ fn schema_to_field_with_props( AvroSchema::Decimal(DecimalSchema { precision, scale, .. }) => DataType::Decimal128(*precision as u8, *scale as i8), + AvroSchema::BigDecimal => DataType::LargeBinary, AvroSchema::Uuid => DataType::FixedSizeBinary(16), AvroSchema::Date => DataType::Date32, AvroSchema::TimeMillis => DataType::Time32(TimeUnit::Millisecond), AvroSchema::TimeMicros => DataType::Time64(TimeUnit::Microsecond), AvroSchema::TimestampMillis => DataType::Timestamp(TimeUnit::Millisecond, None), AvroSchema::TimestampMicros => DataType::Timestamp(TimeUnit::Microsecond, None), + AvroSchema::TimestampNanos => DataType::Timestamp(TimeUnit::Nanosecond, None), AvroSchema::LocalTimestampMillis => todo!(), AvroSchema::LocalTimestampMicros => todo!(), + AvroSchema::LocalTimestampNanos => todo!(), AvroSchema::Duration => DataType::Duration(TimeUnit::Millisecond), }; @@ -371,6 +374,7 @@ mod test { aliases: Some(vec![alias("foofixed"), alias("barfixed")]), size: 1, doc: None, + default: None, attributes: Default::default(), }); let props = external_props(&fixed_schema);