From 1a880d632e96179d10f2bcf06b573a8faa21b7c8 Mon Sep 17 00:00:00 2001 From: Vrishabh Date: Sun, 18 Feb 2024 16:51:03 +0530 Subject: [PATCH] Improve float to string cast by ~20%-40% (#5401) * Add cast f64 to string to benchmark * Improve float to string performance using ryu --- arrow-cast/Cargo.toml | 1 + arrow-cast/src/display.rs | 16 +++++++++++++++- arrow/benches/cast_kernels.rs | 4 +++- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml index 81dd0ebd415f..2d8a57aba5f5 100644 --- a/arrow-cast/Cargo.toml +++ b/arrow-cast/Cargo.toml @@ -52,6 +52,7 @@ lexical-core = { version = "^0.8", default-features = false, features = ["write- atoi = "2.0.0" comfy-table = { version = "7.0", optional = true, default-features = false } base64 = "0.21" +ryu = "1.0.16" [dev-dependencies] criterion = { version = "0.5", default-features = false } diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs index d74128a20ba3..9ec12f6e63d2 100644 --- a/arrow-cast/src/display.rs +++ b/arrow-cast/src/display.rs @@ -427,9 +427,23 @@ macro_rules! primitive_display { }; } +macro_rules! primitive_display_float { + ($($t:ty),+) => { + $(impl<'a> DisplayIndex for &'a PrimitiveArray<$t> + { + fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { + let value = self.value(idx); + let mut buffer = ryu::Buffer::new(); + f.write_str(buffer.format(value))?; + Ok(()) + } + })+ + }; +} + primitive_display!(Int8Type, Int16Type, Int32Type, Int64Type); primitive_display!(UInt8Type, UInt16Type, UInt32Type, UInt64Type); -primitive_display!(Float32Type, Float64Type); +primitive_display_float!(Float32Type, Float64Type); impl<'a> DisplayIndex for &'a PrimitiveArray { fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult { diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs index 933ddd4a06b4..6632dbc57c56 100644 --- a/arrow/benches/cast_kernels.rs +++ b/arrow/benches/cast_kernels.rs @@ -207,7 +207,9 @@ fn add_benchmark(c: &mut Criterion) { c.bench_function("cast f32 to string 512", |b| { b.iter(|| cast_array(&f32_array, DataType::Utf8)) }); - + c.bench_function("cast f64 to string 512", |b| { + b.iter(|| cast_array(&f64_array, DataType::Utf8)) + }); c.bench_function("cast timestamp_ms to i64 512", |b| { b.iter(|| cast_array(&time_ms_array, DataType::Int64)) });