From 5bf9b64a24d2d8ae2af2f967bc2c70544f7f5b9e Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Wed, 12 Jun 2024 18:09:42 -0400 Subject: [PATCH 1/2] avoid copy/allocation when build from offset buffer --- parquet/src/arrow/buffer/offset_buffer.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/parquet/src/arrow/buffer/offset_buffer.rs b/parquet/src/arrow/buffer/offset_buffer.rs index 32bb9d0862b9..89484181d0cc 100644 --- a/parquet/src/arrow/buffer/offset_buffer.rs +++ b/parquet/src/arrow/buffer/offset_buffer.rs @@ -156,16 +156,19 @@ impl OffsetBuffer { fn build_generic_byte_view(self) -> GenericByteViewBuilder { let mut builder = GenericByteViewBuilder::::with_capacity(self.len()); - let mut values = self.values; + let buffer = self.values.into(); + builder.append_block(buffer); for window in self.offsets.windows(2) { let start = window[0]; let end = window[1]; let len = (end - start).to_usize().unwrap(); - let b = values.drain(..len).collect::>(); - if b.is_empty() { - builder.append_null(); + + if len != 0 { + builder + .try_append_view(0, start.as_usize() as u32, len as u32) + .unwrap(); } else { - builder.append_value(b); + builder.append_null(); } } builder From 4669ae4cf24f860acb855ca67bad564399d03847 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Thu, 13 Jun 2024 00:01:07 -0400 Subject: [PATCH 2/2] avoid hard code block id --- parquet/src/arrow/buffer/offset_buffer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parquet/src/arrow/buffer/offset_buffer.rs b/parquet/src/arrow/buffer/offset_buffer.rs index 89484181d0cc..181e69c669a4 100644 --- a/parquet/src/arrow/buffer/offset_buffer.rs +++ b/parquet/src/arrow/buffer/offset_buffer.rs @@ -157,7 +157,7 @@ impl OffsetBuffer { fn build_generic_byte_view(self) -> GenericByteViewBuilder { let mut builder = GenericByteViewBuilder::::with_capacity(self.len()); let buffer = self.values.into(); - builder.append_block(buffer); + let block = builder.append_block(buffer); for window in self.offsets.windows(2) { let start = window[0]; let end = window[1]; @@ -165,7 +165,7 @@ impl OffsetBuffer { if len != 0 { builder - .try_append_view(0, start.as_usize() as u32, len as u32) + .try_append_view(block, start.as_usize() as u32, len as u32) .unwrap(); } else { builder.append_null();