diff --git a/arrow-array/src/numeric.rs b/arrow-array/src/numeric.rs index ad7b3eca1dbc..b5e474ba696a 100644 --- a/arrow-array/src/numeric.rs +++ b/arrow-array/src/numeric.rs @@ -618,7 +618,6 @@ mod tests { let mask = 0b01010101_01010101_10101010_10101010; let actual = UInt16Type::mask_from_u64(mask); let expected = expected_mask!(i16, mask); - dbg!(&expected); let expected = m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice())); assert_eq!(expected, actual); diff --git a/arrow-json/src/lib.rs b/arrow-json/src/lib.rs index e69eaaba3ef8..e39882e52620 100644 --- a/arrow-json/src/lib.rs +++ b/arrow-json/src/lib.rs @@ -82,7 +82,7 @@ pub type RawReader = Reader; pub type RawReaderBuilder = ReaderBuilder; pub use self::reader::{Reader, ReaderBuilder}; -pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer}; +pub use self::writer::{ArrayWriter, LineDelimitedWriter, Writer, WriterBuilder}; use half::f16; use serde_json::{Number, Value}; diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs index 5ecfc932364b..4f74817ca1e3 100644 --- a/arrow-json/src/writer.rs +++ b/arrow-json/src/writer.rs @@ -92,6 +92,10 @@ //! let buf = writer.into_inner(); //! assert_eq!(r#"[{"a":1},{"a":2},{"a":3}]"#, String::from_utf8(buf).unwrap()) //! ``` +//! +//! [`LineDelimitedWriter`] and [`ArrayWriter`] will omit writing keys with null values. +//! In order to explicitly write null values for keys, configure a custom [`Writer`] by +//! using a [`WriterBuilder`] to construct a [`Writer`]. use std::iter; use std::{fmt::Debug, io::Write}; @@ -124,6 +128,7 @@ where fn struct_array_to_jsonmap_array( array: &StructArray, + explicit_nulls: bool, ) -> Result>, ArrowError> { let inner_col_names = array.column_names(); @@ -132,13 +137,26 @@ fn struct_array_to_jsonmap_array( .collect::>>(); for (j, struct_col) in array.columns().iter().enumerate() { - set_column_for_json_rows(&mut inner_objs, struct_col, inner_col_names[j])? + set_column_for_json_rows( + &mut inner_objs, + struct_col, + inner_col_names[j], + explicit_nulls, + )? } Ok(inner_objs) } /// Converts an arrow [`Array`] into a `Vec` of Serde JSON [`serde_json::Value`]'s pub fn array_to_json_array(array: &dyn Array) -> Result, ArrowError> { + // For backwards compatibility, default to skip nulls + array_to_json_array_internal(array, false) +} + +fn array_to_json_array_internal( + array: &dyn Array, + explicit_nulls: bool, +) -> Result, ArrowError> { match array.data_type() { DataType::Null => Ok(iter::repeat(Value::Null).take(array.len()).collect()), DataType::Boolean => Ok(array @@ -180,32 +198,44 @@ pub fn array_to_json_array(array: &dyn Array) -> Result, ArrowError> DataType::List(_) => as_list_array(array) .iter() .map(|maybe_value| match maybe_value { - Some(v) => Ok(Value::Array(array_to_json_array(&v)?)), + Some(v) => Ok(Value::Array(array_to_json_array_internal( + &v, + explicit_nulls, + )?)), None => Ok(Value::Null), }) .collect(), DataType::LargeList(_) => as_large_list_array(array) .iter() .map(|maybe_value| match maybe_value { - Some(v) => Ok(Value::Array(array_to_json_array(&v)?)), + Some(v) => Ok(Value::Array(array_to_json_array_internal( + &v, + explicit_nulls, + )?)), None => Ok(Value::Null), }) .collect(), DataType::FixedSizeList(_, _) => as_fixed_size_list_array(array) .iter() .map(|maybe_value| match maybe_value { - Some(v) => Ok(Value::Array(array_to_json_array(&v)?)), + Some(v) => Ok(Value::Array(array_to_json_array_internal( + &v, + explicit_nulls, + )?)), None => Ok(Value::Null), }) .collect(), DataType::Struct(_) => { - let jsonmaps = struct_array_to_jsonmap_array(array.as_struct())?; + let jsonmaps = struct_array_to_jsonmap_array(array.as_struct(), explicit_nulls)?; Ok(jsonmaps.into_iter().map(Value::Object).collect()) } DataType::Map(_, _) => as_map_array(array) .iter() .map(|maybe_value| match maybe_value { - Some(v) => Ok(Value::Array(array_to_json_array(&v)?)), + Some(v) => Ok(Value::Array(array_to_json_array_internal( + &v, + explicit_nulls, + )?)), None => Ok(Value::Null), }) .collect(), @@ -216,14 +246,16 @@ pub fn array_to_json_array(array: &dyn Array) -> Result, ArrowError> } macro_rules! set_column_by_array_type { - ($cast_fn:ident, $col_name:ident, $rows:ident, $array:ident) => { + ($cast_fn:ident, $col_name:ident, $rows:ident, $array:ident, $explicit_nulls:ident) => { let arr = $cast_fn($array); $rows .iter_mut() .zip(arr.iter()) .for_each(|(row, maybe_value)| { - if let Some(v) = maybe_value { - row.insert($col_name.to_string(), v.into()); + if let Some(j) = maybe_value.map(Into::into) { + row.insert($col_name.to_string(), j); + } else if $explicit_nulls { + row.insert($col_name.to_string(), Value::Null); } }); }; @@ -233,6 +265,7 @@ fn set_column_by_primitive_type( rows: &mut [JsonMap], array: &ArrayRef, col_name: &str, + explicit_nulls: bool, ) where T: ArrowPrimitiveType, T::Native: JsonSerializable, @@ -242,9 +275,10 @@ fn set_column_by_primitive_type( rows.iter_mut() .zip(primitive_arr.iter()) .for_each(|(row, maybe_value)| { - // when value is null, we simply skip setting the key if let Some(j) = maybe_value.and_then(|v| v.into_json_value()) { row.insert(col_name.to_string(), j); + } else if explicit_nulls { + row.insert(col_name.to_string(), Value::Null); } }); } @@ -253,52 +287,57 @@ fn set_column_for_json_rows( rows: &mut [JsonMap], array: &ArrayRef, col_name: &str, + explicit_nulls: bool, ) -> Result<(), ArrowError> { match array.data_type() { DataType::Int8 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::Int16 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::Int32 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::Int64 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::UInt8 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::UInt16 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::UInt32 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::UInt64 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::Float16 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::Float32 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::Float64 => { - set_column_by_primitive_type::(rows, array, col_name); + set_column_by_primitive_type::(rows, array, col_name, explicit_nulls); } DataType::Null => { - // when value is null, we simply skip setting the key + if explicit_nulls { + rows.iter_mut().for_each(|row| { + row.insert(col_name.to_string(), Value::Null); + }); + } } DataType::Boolean => { - set_column_by_array_type!(as_boolean_array, col_name, rows, array); + set_column_by_array_type!(as_boolean_array, col_name, rows, array, explicit_nulls); } DataType::Utf8 => { - set_column_by_array_type!(as_string_array, col_name, rows, array); + set_column_by_array_type!(as_string_array, col_name, rows, array, explicit_nulls); } DataType::LargeUtf8 => { - set_column_by_array_type!(as_largestring_array, col_name, rows, array); + set_column_by_array_type!(as_largestring_array, col_name, rows, array, explicit_nulls); } DataType::Date32 | DataType::Date64 @@ -310,16 +349,19 @@ fn set_column_for_json_rows( let formatter = ArrayFormatter::try_new(array.as_ref(), &options)?; let nulls = array.nulls(); rows.iter_mut().enumerate().for_each(|(idx, row)| { - if nulls.map(|x| x.is_valid(idx)).unwrap_or(true) { - row.insert( - col_name.to_string(), - formatter.value(idx).to_string().into(), - ); - } + let maybe_value = nulls + .map(|x| x.is_valid(idx)) + .unwrap_or(true) + .then(|| formatter.value(idx).to_string().into()); + if let Some(j) = maybe_value { + row.insert(col_name.to_string(), j); + } else if explicit_nulls { + row.insert(col_name.to_string(), Value::Null); + }; }); } DataType::Struct(_) => { - let inner_objs = struct_array_to_jsonmap_array(array.as_struct())?; + let inner_objs = struct_array_to_jsonmap_array(array.as_struct(), explicit_nulls)?; rows.iter_mut().zip(inner_objs).for_each(|(row, obj)| { row.insert(col_name.to_string(), Value::Object(obj)); }); @@ -328,8 +370,13 @@ fn set_column_for_json_rows( let listarr = as_list_array(array); rows.iter_mut().zip(listarr.iter()).try_for_each( |(row, maybe_value)| -> Result<(), ArrowError> { - if let Some(v) = maybe_value { - row.insert(col_name.to_string(), Value::Array(array_to_json_array(&v)?)); + let maybe_value = maybe_value + .map(|v| array_to_json_array_internal(&v, explicit_nulls).map(Value::Array)) + .transpose()?; + if let Some(j) = maybe_value { + row.insert(col_name.to_string(), j); + } else if explicit_nulls { + row.insert(col_name.to_string(), Value::Null); } Ok(()) }, @@ -339,9 +386,13 @@ fn set_column_for_json_rows( let listarr = as_large_list_array(array); rows.iter_mut().zip(listarr.iter()).try_for_each( |(row, maybe_value)| -> Result<(), ArrowError> { - if let Some(v) = maybe_value { - let val = array_to_json_array(&v)?; - row.insert(col_name.to_string(), Value::Array(val)); + let maybe_value = maybe_value + .map(|v| array_to_json_array_internal(&v, explicit_nulls).map(Value::Array)) + .transpose()?; + if let Some(j) = maybe_value { + row.insert(col_name.to_string(), j); + } else if explicit_nulls { + row.insert(col_name.to_string(), Value::Null); } Ok(()) }, @@ -350,7 +401,7 @@ fn set_column_for_json_rows( DataType::Dictionary(_, value_type) => { let hydrated = arrow_cast::cast::cast(&array, value_type) .expect("cannot cast dictionary to underlying values"); - set_column_for_json_rows(rows, &hydrated, col_name)?; + set_column_for_json_rows(rows, &hydrated, col_name, explicit_nulls)?; } DataType::Map(_, _) => { let maparr = as_map_array(array); @@ -367,7 +418,7 @@ fn set_column_for_json_rows( } let keys = keys.as_string::(); - let values = array_to_json_array(values)?; + let values = array_to_json_array_internal(values, explicit_nulls)?; let mut kv = keys.iter().zip(values); @@ -401,6 +452,14 @@ fn set_column_for_json_rows( /// [`JsonMap`]s (objects) pub fn record_batches_to_json_rows( batches: &[&RecordBatch], +) -> Result>, ArrowError> { + // For backwards compatibility, default to skip nulls + record_batches_to_json_rows_internal(batches, false) +} + +fn record_batches_to_json_rows_internal( + batches: &[&RecordBatch], + explicit_nulls: bool, ) -> Result>, ArrowError> { let mut rows: Vec> = iter::repeat(JsonMap::new()) .take(batches.iter().map(|b| b.num_rows()).sum()) @@ -414,7 +473,7 @@ pub fn record_batches_to_json_rows( let row_slice = &mut rows[base..base + batch.num_rows()]; for (j, col) in batch.columns().iter().enumerate() { let col_name = schema.field(j).name(); - set_column_for_json_rows(row_slice, col, col_name)? + set_column_for_json_rows(row_slice, col, col_name, explicit_nulls)? } base += row_count; } @@ -450,7 +509,9 @@ pub trait JsonFormat: Debug + Default { } } -/// Produces JSON output with one record per line. For example +/// Produces JSON output with one record per line. +/// +/// For example: /// /// ```json /// {"foo":1} @@ -467,7 +528,9 @@ impl JsonFormat for LineDelimited { } } -/// Produces JSON output as a single JSON array. For example +/// Produces JSON output as a single JSON array. +/// +/// For example: /// /// ```json /// [{"foo":1},{"bar":1}] @@ -494,16 +557,101 @@ impl JsonFormat for JsonArray { } } -/// A JSON writer which serializes [`RecordBatch`]es to newline delimited JSON objects +/// A JSON writer which serializes [`RecordBatch`]es to newline delimited JSON objects. pub type LineDelimitedWriter = Writer; -/// A JSON writer which serializes [`RecordBatch`]es to JSON arrays +/// A JSON writer which serializes [`RecordBatch`]es to JSON arrays. pub type ArrayWriter = Writer; +/// JSON writer builder. +#[derive(Debug, Clone, Default)] +pub struct WriterBuilder { + /// Controls whether null values should be written explicitly for keys + /// in objects, or whether the key should be omitted entirely. + explicit_nulls: bool, +} + +impl WriterBuilder { + /// Create a new builder for configuring JSON writing options. + /// + /// # Example + /// + /// ``` + /// # use arrow_json::{Writer, WriterBuilder}; + /// # use arrow_json::writer::LineDelimited; + /// # use std::fs::File; + /// + /// fn example() -> Writer { + /// let file = File::create("target/out.json").unwrap(); + /// + /// // create a builder that keeps keys with null values + /// let builder = WriterBuilder::new().with_explicit_nulls(true); + /// let writer = builder.build::<_, LineDelimited>(file); + /// + /// writer + /// } + /// ``` + pub fn new() -> Self { + Self::default() + } + + /// Returns `true` if this writer is configured to keep keys with null values. + pub fn explicit_nulls(&self) -> bool { + self.explicit_nulls + } + + /// Set whether to keep keys with null values, or to omit writing them. + /// + /// For example, with [`LineDelimited`] format: + /// + /// Skip nulls (set to `false`): + /// + /// ```json + /// {"foo":1} + /// {"foo":1,"bar":2} + /// {} + /// ``` + /// + /// Keep nulls (set to `true`): + /// + /// ```json + /// {"foo":1,"bar":null} + /// {"foo":1,"bar":2} + /// {"foo":null,"bar":null} + /// ``` + /// + /// Default is to skip nulls (set to `false`). + pub fn with_explicit_nulls(mut self, explicit_nulls: bool) -> Self { + self.explicit_nulls = explicit_nulls; + self + } + + /// Create a new `Writer` with specified `JsonFormat` and builder options. + pub fn build(self, writer: W) -> Writer + where + W: Write, + F: JsonFormat, + { + Writer { + writer, + started: false, + finished: false, + format: F::default(), + explicit_nulls: self.explicit_nulls, + } + } +} + /// A JSON writer which serializes [`RecordBatch`]es to a stream of -/// `u8` encoded JSON objects. See the module level documentation for -/// detailed usage and examples. The specific format of the stream is -/// controlled by the [`JsonFormat`] type parameter. +/// `u8` encoded JSON objects. +/// +/// See the module level documentation for detailed usage and examples. +/// The specific format of the stream is controlled by the [`JsonFormat`] +/// type parameter. +/// +/// By default the writer will skip writing keys with null values for +/// backward compatibility. See [`WriterBuilder`] on how to customize +/// this behaviour when creating a new writer. #[derive(Debug)] pub struct Writer where @@ -521,6 +669,9 @@ where /// Determines how the byte stream is formatted format: F, + + /// Whether keys with null values should be written or skipped + explicit_nulls: bool, } impl Writer @@ -535,6 +686,7 @@ where started: false, finished: false, format: F::default(), + explicit_nulls: false, } } @@ -556,7 +708,7 @@ where /// Convert the `RecordBatch` into JSON rows, and write them to the output pub fn write(&mut self, batch: &RecordBatch) -> Result<(), ArrowError> { - for row in record_batches_to_json_rows(&[batch])? { + for row in record_batches_to_json_rows_internal(&[batch], self.explicit_nulls)? { self.write_row(&Value::Object(row))?; } Ok(()) @@ -564,7 +716,7 @@ where /// Convert the [`RecordBatch`] into JSON rows, and write them to the output pub fn write_batches(&mut self, batches: &[&RecordBatch]) -> Result<(), ArrowError> { - for row in record_batches_to_json_rows(batches)? { + for row in record_batches_to_json_rows_internal(batches, self.explicit_nulls)? { self.write_row(&Value::Object(row))?; } Ok(()) @@ -609,7 +761,7 @@ mod tests { use serde_json::json; - use arrow_array::builder::{Int32Builder, MapBuilder, StringBuilder}; + use arrow_array::builder::{Int32Builder, Int64Builder, MapBuilder, StringBuilder}; use arrow_buffer::{Buffer, ToByteSlice}; use arrow_data::ArrayData; @@ -1203,7 +1355,7 @@ mod tests { ); } - fn test_write_for_file(test_file: &str) { + fn test_write_for_file(test_file: &str, remove_nulls: bool) { let file = File::open(test_file).unwrap(); let mut reader = BufReader::new(file); let (schema, _) = infer_json_schema(&mut reader, None).unwrap(); @@ -1215,18 +1367,27 @@ mod tests { let mut buf = Vec::new(); { - let mut writer = LineDelimitedWriter::new(&mut buf); - writer.write_batches(&[&batch]).unwrap(); + if remove_nulls { + let mut writer = LineDelimitedWriter::new(&mut buf); + writer.write_batches(&[&batch]).unwrap(); + } else { + let mut writer = WriterBuilder::new() + .with_explicit_nulls(true) + .build::<_, LineDelimited>(&mut buf); + writer.write_batches(&[&batch]).unwrap(); + } } let result = String::from_utf8(buf).unwrap(); let expected = read_to_string(test_file).unwrap(); for (r, e) in result.lines().zip(expected.lines()) { let mut expected_json = serde_json::from_str::(e).unwrap(); - // remove null value from object to make comparison consistent: - if let Value::Object(obj) = expected_json { - expected_json = - Value::Object(obj.into_iter().filter(|(_, v)| *v != Value::Null).collect()); + if remove_nulls { + // remove null value from object to make comparison consistent: + if let Value::Object(obj) = expected_json { + expected_json = + Value::Object(obj.into_iter().filter(|(_, v)| *v != Value::Null).collect()); + } } assert_eq!(serde_json::from_str::(r).unwrap(), expected_json,); } @@ -1234,17 +1395,22 @@ mod tests { #[test] fn write_basic_rows() { - test_write_for_file("test/data/basic.json"); + test_write_for_file("test/data/basic.json", true); } #[test] fn write_arrays() { - test_write_for_file("test/data/arrays.json"); + test_write_for_file("test/data/arrays.json", true); } #[test] fn write_basic_nulls() { - test_write_for_file("test/data/basic_nulls.json"); + test_write_for_file("test/data/basic_nulls.json", true); + } + + #[test] + fn write_nested_with_nulls() { + test_write_for_file("test/data/nested_with_nulls.json", false); } #[test] @@ -1530,4 +1696,233 @@ mod tests { assert_eq!(array_to_json_array(&map_array).unwrap(), expected_json); } + + #[test] + fn test_writer_explicit_nulls() -> Result<(), ArrowError> { + fn nested_list() -> (Arc, Arc) { + let array = Arc::new(ListArray::from_iter_primitive::(vec![ + Some(vec![None, None, None]), + Some(vec![Some(1), Some(2), Some(3)]), + None, + Some(vec![None, None, None]), + ])); + let field = Arc::new(Field::new("list", array.data_type().clone(), true)); + // [{"list":[null,null,null]},{"list":[1,2,3]},{"list":null},{"list":[null,null,null]}] + (array, field) + } + + fn nested_dict() -> (Arc>, Arc) { + let array = Arc::new(DictionaryArray::from_iter(vec![ + Some("cupcakes"), + None, + Some("bear"), + Some("kuma"), + ])); + let field = Arc::new(Field::new("dict", array.data_type().clone(), true)); + // [{"dict":"cupcakes"},{"dict":null},{"dict":"bear"},{"dict":"kuma"}] + (array, field) + } + + fn nested_map() -> (Arc, Arc) { + let string_builder = StringBuilder::new(); + let int_builder = Int64Builder::new(); + let mut builder = MapBuilder::new(None, string_builder, int_builder); + + // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}] + builder.keys().append_value("foo"); + builder.values().append_value(10); + builder.append(true).unwrap(); + + builder.append(false).unwrap(); + + builder.append(true).unwrap(); + + builder.keys().append_value("bar"); + builder.values().append_value(20); + builder.keys().append_value("baz"); + builder.values().append_value(30); + builder.keys().append_value("qux"); + builder.values().append_value(40); + builder.append(true).unwrap(); + + let array = Arc::new(builder.finish()); + let field = Arc::new(Field::new("map", array.data_type().clone(), true)); + (array, field) + } + + fn root_list() -> (Arc, Field) { + let struct_array = StructArray::from(vec![ + ( + Arc::new(Field::new("utf8", DataType::Utf8, true)), + Arc::new(StringArray::from(vec![Some("a"), Some("b"), None, None])) as ArrayRef, + ), + ( + Arc::new(Field::new("int32", DataType::Int32, true)), + Arc::new(Int32Array::from(vec![Some(1), None, Some(5), None])) as ArrayRef, + ), + ]); + + let field = Field::new_list( + "list", + Field::new("struct", struct_array.data_type().clone(), true), + true, + ); + + // [{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}] + let entry_offsets = Buffer::from(&[0, 2, 2, 3, 3].to_byte_slice()); + let data = ArrayData::builder(field.data_type().clone()) + .len(4) + .add_buffer(entry_offsets) + .add_child_data(struct_array.into_data()) + .null_bit_buffer(Some([0b00000101].into())) + .build() + .unwrap(); + let array = Arc::new(ListArray::from(data)); + (array, field) + } + + let (nested_list_array, nested_list_field) = nested_list(); + let (nested_dict_array, nested_dict_field) = nested_dict(); + let (nested_map_array, nested_map_field) = nested_map(); + let (root_list_array, root_list_field) = root_list(); + + let schema = Schema::new(vec![ + Field::new("date", DataType::Date32, true), + Field::new("null", DataType::Null, true), + Field::new_struct( + "struct", + vec![ + Arc::new(Field::new("utf8", DataType::Utf8, true)), + nested_list_field.clone(), + nested_dict_field.clone(), + nested_map_field.clone(), + ], + true, + ), + root_list_field, + ]); + + let arr_date32 = Date32Array::from(vec![Some(0), None, Some(1), None]); + let arr_null = NullArray::new(4); + let arr_struct = StructArray::from(vec![ + // [{"utf8":"a"},{"utf8":null},{"utf8":null},{"utf8":"b"}] + ( + Arc::new(Field::new("utf8", DataType::Utf8, true)), + Arc::new(StringArray::from(vec![Some("a"), None, None, Some("b")])) as ArrayRef, + ), + // [{"list":[null,null,null]},{"list":[1,2,3]},{"list":null},{"list":[null,null,null]}] + (nested_list_field, nested_list_array as ArrayRef), + // [{"dict":"cupcakes"},{"dict":null},{"dict":"bear"},{"dict":"kuma"}] + (nested_dict_field, nested_dict_array as ArrayRef), + // [{"foo": 10}, null, {}, {"bar": 20, "baz": 30, "qux": 40}] + (nested_map_field, nested_map_array as ArrayRef), + ]); + + let batch = RecordBatch::try_new( + Arc::new(schema), + vec![ + // [{"date":"1970-01-01"},{"date":null},{"date":"1970-01-02"},{"date":null}] + Arc::new(arr_date32), + // [{"null":null},{"null":null},{"null":null},{"null":null}] + Arc::new(arr_null), + Arc::new(arr_struct), + // [{"list":[{"int32":1,"utf8":"a"},{"int32":null,"utf8":"b"}]},{"list":null},{"list":[{int32":5,"utf8":null}]},{"list":null}] + root_list_array, + ], + )?; + + let mut buf = Vec::new(); + { + let mut writer = WriterBuilder::new() + .with_explicit_nulls(true) + .build::<_, JsonArray>(&mut buf); + writer.write_batches(&[&batch])?; + writer.finish()?; + } + + let actual = serde_json::from_slice::>(&buf).unwrap(); + let expected = serde_json::from_value::>(json!([ + { + "date": "1970-01-01", + "list": [ + { + "int32": 1, + "utf8": "a" + }, + { + "int32": null, + "utf8": "b" + } + ], + "null": null, + "struct": { + "dict": "cupcakes", + "list": [ + null, + null, + null + ], + "map": { + "foo": 10 + }, + "utf8": "a" + } + }, + { + "date": null, + "list": null, + "null": null, + "struct": { + "dict": null, + "list": [ + 1, + 2, + 3 + ], + "map": null, + "utf8": null + } + }, + { + "date": "1970-01-02", + "list": [ + { + "int32": 5, + "utf8": null + } + ], + "null": null, + "struct": { + "dict": "bear", + "list": null, + "map": {}, + "utf8": null + } + }, + { + "date": null, + "list": null, + "null": null, + "struct": { + "dict": "kuma", + "list": [ + null, + null, + null + ], + "map": { + "bar": 20, + "baz": 30, + "qux": 40 + }, + "utf8": "b" + } + } + ])) + .unwrap(); + + assert_eq!(actual, expected); + + Ok(()) + } } diff --git a/arrow-json/test/data/nested_with_nulls.json b/arrow-json/test/data/nested_with_nulls.json new file mode 100644 index 000000000000..932565d56063 --- /dev/null +++ b/arrow-json/test/data/nested_with_nulls.json @@ -0,0 +1,4 @@ +{"a": null, "b": null, "c": null, "d": {"d1": null, "d2": [null, 1, 2, null]}} +{"a": null, "b": -3.5, "c": true, "d": {"d1": null, "d2": null}} +{"a": null, "b": null, "c": false, "d": {"d1": "1970-01-01", "d2": null}} +{"a": 1, "b": 2.0, "c": false, "d": {"d1": null, "d2": null}} diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 31388bf99358..b49f56c91574 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -664,8 +664,6 @@ mod tests { .downcast_ref::>() .unwrap(); - dbg!(&array); - // verify let expected = GenericListArray::::from(list_data); assert_eq!(&array.value(0), &expected.value(0)); diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs index bfe16db5cc4d..c73f4f50ac01 100644 --- a/arrow/tests/array_cast.rs +++ b/arrow/tests/array_cast.rs @@ -47,7 +47,6 @@ fn test_cast_timestamp_to_string() { let a = TimestampMillisecondArray::from(vec![Some(864000000005), Some(1545696000001), None]) .with_timezone("UTC".to_string()); let array = Arc::new(a) as ArrayRef; - dbg!(&array); let b = cast(&array, &DataType::Utf8).unwrap(); let c = b.as_any().downcast_ref::().unwrap(); assert_eq!(&DataType::Utf8, c.data_type()); diff --git a/object_store/src/gcp/builder.rs b/object_store/src/gcp/builder.rs index 5f718d63d94a..7417ea4c8a50 100644 --- a/object_store/src/gcp/builder.rs +++ b/object_store/src/gcp/builder.rs @@ -605,7 +605,7 @@ mod tests { .with_bucket_name("foo") .with_proxy_url("https://example.com") .build(); - assert!(dbg!(gcs).is_ok()); + assert!(gcs.is_ok()); let err = GoogleCloudStorageBuilder::new() .with_service_account_path(service_account_path.to_str().unwrap())