From 9efbe1b73b343ef3afd99c805abf891bf7ad9b52 Mon Sep 17 00:00:00 2001 From: Kamil Skalski Date: Sun, 17 Dec 2023 09:49:03 +0700 Subject: [PATCH] Fix inferring object after field was null. --- arrow-json/src/reader/schema.rs | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/arrow-json/src/reader/schema.rs b/arrow-json/src/reader/schema.rs index 97f1a0f29594..ace7b0ea5cb6 100644 --- a/arrow-json/src/reader/schema.rs +++ b/arrow-json/src/reader/schema.rs @@ -455,7 +455,7 @@ fn collect_field_types_from_object( set_object_scalar_field_type(field_types, k, DataType::Utf8)?; } Value::Object(inner_map) => { - if !field_types.contains_key(k) { + if let InferredType::Any = field_types.get(k).unwrap_or(&InferredType::Any) { field_types.insert(k.to_string(), InferredType::Object(HashMap::new())); } match field_types.get_mut(k).unwrap() { @@ -719,4 +719,24 @@ mod tests { ]); assert_eq!(inferred_schema, schema); } + + #[test] + fn test_infer_from_null_then_object() { + let data = r#" + {"obj":null} + {"obj":{"foo":1}} + "#; + let (inferred_schema, _) = + infer_json_schema_from_seekable(Cursor::new(data), None).expect("infer"); + let schema = Schema::new(vec![Field::new( + "obj", + DataType::Struct( + [Field::new("foo", DataType::Int64, true)] + .into_iter() + .collect(), + ), + true, + )]); + assert_eq!(inferred_schema, schema); + } }