From d18283061ce262c5cd6f34312a042a349f6094a3 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sat, 13 Jul 2024 13:41:20 -0400 Subject: [PATCH] addr comments --- arrow-row/src/variable.rs | 39 +++++++++++-------------------------- arrow/benches/row_format.rs | 4 ++-- 2 files changed, 13 insertions(+), 30 deletions(-) diff --git a/arrow-row/src/variable.rs b/arrow-row/src/variable.rs index 1b544fef7f43..4d4bcddc0807 100644 --- a/arrow-row/src/variable.rs +++ b/arrow-row/src/variable.rs @@ -20,7 +20,7 @@ use arrow_array::builder::BufferBuilder; use arrow_array::*; use arrow_buffer::bit_util::ceil; use arrow_buffer::MutableBuffer; -use arrow_data::{ArrayDataBuilder, ByteView}; +use arrow_data::ArrayDataBuilder; use arrow_schema::{DataType, SortOptions}; use builder::make_view; @@ -153,6 +153,8 @@ fn encode_blocks(out: &mut [u8], val: &[u8]) -> usize { end_offset } +/// Decodes a single block of data +/// The `f` function accepts a slice of the decoded data, it may be called multiple times pub fn decode_blocks(row: &[u8], options: SortOptions, mut f: impl FnMut(&[u8])) -> usize { let (non_empty_sentinel, continuation) = match options.descending { true => (!NON_EMPTY_SENTINEL, !BLOCK_CONTINUATION), @@ -271,36 +273,17 @@ fn decode_binary_view_inner( debug_assert_eq!(start_offset, values.len()); views.append(0); } else { - let view = make_view( - unsafe { values.get_unchecked(start_offset..) }, - 0, - start_offset as u32, - ); - views.append(view); - } - *row = &row[offset..]; - } + // Safety: we just appended the data to the end of the buffer + let val = unsafe { values.get_unchecked_mut(start_offset..) }; - if options.descending { - values.as_slice_mut().iter_mut().for_each(|o| *o = !*o); - for view in views.as_slice_mut() { - let len = *view as u32; - if len <= 12 { - let mut bytes = view.to_le_bytes(); - bytes - .iter_mut() - .skip(4) - .take(len as usize) - .for_each(|o| *o = !*o); - *view = u128::from_le_bytes(bytes); - } else { - let mut byte_view = ByteView::from(*view); - let mut prefix = byte_view.prefix.to_le_bytes(); - prefix.iter_mut().for_each(|o| *o = !*o); - byte_view.prefix = u32::from_le_bytes(prefix); - *view = byte_view.into(); + if options.descending { + val.iter_mut().for_each(|o| *o = !*o); } + + let view = make_view(val, 0, start_offset as u32); + views.append(view); } + *row = &row[offset..]; } if check_utf8 { diff --git a/arrow/benches/row_format.rs b/arrow/benches/row_format.rs index 7ef4a634203e..0fb63b5b3240 100644 --- a/arrow/benches/row_format.rs +++ b/arrow/benches/row_format.rs @@ -93,8 +93,8 @@ fn row_bench(c: &mut Criterion) { let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0., 30, false)) as ArrayRef]; do_bench(c, "4096 string view(30, 0)", cols); - let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0., 100, false)) as ArrayRef]; - do_bench(c, "4096 string view(100, 0)", cols); + let cols = vec![Arc::new(create_string_view_array_with_len(40960, 0., 100, false)) as ArrayRef]; + do_bench(c, "40960 string view(100, 0)", cols); let cols = vec![Arc::new(create_string_view_array_with_len(4096, 0.5, 100, false)) as ArrayRef]; do_bench(c, "4096 string view(100, 0.5)", cols);