From f179188a8fbc71c45c7423c299890c3cfa384c25 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Mon, 6 Nov 2023 14:01:47 +0000 Subject: [PATCH 1/2] Fix serialization of large integers (#5038) (#5042) --- arrow-json/src/reader/mod.rs | 30 ++++++++++++++++++++++++ arrow-json/src/reader/primitive_array.rs | 2 +- arrow-json/src/reader/tape.rs | 2 +- arrow-json/src/reader/timestamp_array.rs | 4 ++-- 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index c1cef0ec81b4..1bd63dec81d6 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -2239,4 +2239,34 @@ mod tests { let values = b.column(0).as_primitive::().values(); assert_eq!(values, &[1, 2, 3, 4]); } + + #[test] + fn test_serde_large_numbers() { + let field = Field::new("int", DataType::Int64, true); + let mut decoder = ReaderBuilder::new_with_field(field) + .build_decoder() + .unwrap(); + + decoder.serialize(&[1699148028689_u64, 2, 3, 4]).unwrap(); + let b = decoder.flush().unwrap().unwrap(); + let values = b.column(0).as_primitive::().values(); + assert_eq!(values, &[1699148028689, 2, 3, 4]); + + let field = Field::new( + "int", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + ); + let mut decoder = ReaderBuilder::new_with_field(field) + .build_decoder() + .unwrap(); + + decoder.serialize(&[1699148028689_u64, 2, 3, 4]).unwrap(); + let b = decoder.flush().unwrap().unwrap(); + let values = b + .column(0) + .as_primitive::() + .values(); + assert_eq!(values, &[1699148028689, 2, 3, 4]); + } } diff --git a/arrow-json/src/reader/primitive_array.rs b/arrow-json/src/reader/primitive_array.rs index 6cf0bac86737..daefab4bf725 100644 --- a/arrow-json/src/reader/primitive_array.rs +++ b/arrow-json/src/reader/primitive_array.rs @@ -143,7 +143,7 @@ where }, TapeElement::I64(high) => match tape.get(p + 1) { TapeElement::I32(low) => { - let v = (high as i64) << 32 | low as i64; + let v = (high as i64) << 32 | (low as u32) as i64; let value = NumCast::from(v).ok_or_else(|| { ArrowError::JsonError(format!("failed to parse {v} as {d}",)) })?; diff --git a/arrow-json/src/reader/tape.rs b/arrow-json/src/reader/tape.rs index b39caede7047..d7b6f26dd14f 100644 --- a/arrow-json/src/reader/tape.rs +++ b/arrow-json/src/reader/tape.rs @@ -180,7 +180,7 @@ impl<'a> Tape<'a> { TapeElement::Null => out.push_str("null"), TapeElement::I64(high) => match self.get(idx + 1) { TapeElement::I32(low) => { - let val = (high as i64) << 32 | low as i64; + let val = (high as i64) << 32 | (low as u32) as i64; let _ = write!(out, "{val}"); return idx + 2; } diff --git a/arrow-json/src/reader/timestamp_array.rs b/arrow-json/src/reader/timestamp_array.rs index 09672614107c..333ca47a77da 100644 --- a/arrow-json/src/reader/timestamp_array.rs +++ b/arrow-json/src/reader/timestamp_array.rs @@ -99,8 +99,8 @@ where TapeElement::I32(v) => builder.append_value(v as i64), TapeElement::I64(high) => match tape.get(p + 1) { TapeElement::I32(low) => { - builder.append_value((high as i64) << 32 | low as i64) - } + builder.append_value((high as i64) << 32 | (low as u32) as i64) + } _ => unreachable!(), }, _ => return Err(tape.error(*p, "primitive")), From 96146b1818ea6a539a790d58aeee733f2d36c457 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 8 Nov 2023 15:41:17 -0500 Subject: [PATCH 2/2] fmt --- arrow-json/src/reader/timestamp_array.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-json/src/reader/timestamp_array.rs b/arrow-json/src/reader/timestamp_array.rs index 333ca47a77da..5da4868dd452 100644 --- a/arrow-json/src/reader/timestamp_array.rs +++ b/arrow-json/src/reader/timestamp_array.rs @@ -100,7 +100,7 @@ where TapeElement::I64(high) => match tape.get(p + 1) { TapeElement::I32(low) => { builder.append_value((high as i64) << 32 | (low as u32) as i64) - } + } _ => unreachable!(), }, _ => return Err(tape.error(*p, "primitive")),