Skip to content
Permalink

Comparing changes

This is a direct comparison between two commits made in this repository or its related repositories. View the default comparison for this range or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: apache/arrow-rs
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 7729b200d0fb715e3cfc02f7291b6defbea3c890
Choose a base ref
..
head repository: apache/arrow-rs
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 9b8b42b522a80c0f1cca9f80d209b48b53b7b849
Choose a head ref
Showing with 115 additions and 14 deletions.
  1. +1 −1 arrow-array/src/builder/generic_list_builder.rs
  2. +0 −12 arrow-buffer/src/alloc/alignment.rs
  3. +110 −0 arrow-cast/src/cast/mod.rs
  4. +2 −1 parquet/src/format.rs
  5. +2 −0 parquet/src/lib.rs
2 changes: 1 addition & 1 deletion arrow-array/src/builder/generic_list_builder.rs
Original file line number Diff line number Diff line change
@@ -548,7 +548,7 @@ mod tests {
}

#[test]
fn test_boxed_primitive_aray_builder() {
fn test_boxed_primitive_array_builder() {
let values_builder = make_builder(&DataType::Int32, 5);
let mut builder = ListBuilder::new(values_builder);

12 changes: 0 additions & 12 deletions arrow-buffer/src/alloc/alignment.rs
Original file line number Diff line number Diff line change
@@ -80,15 +80,6 @@ pub const ALIGNMENT: usize = 1 << 5;
#[cfg(target_arch = "sparc64")]
pub const ALIGNMENT: usize = 1 << 6;

// On ARM cache line sizes are fixed. both v6 and v7.
// Need to add board specific or platform specific things later.
/// Cache and allocation multiple alignment size
#[cfg(target_arch = "thumbv6")]
pub const ALIGNMENT: usize = 1 << 5;
/// Cache and allocation multiple alignment size
#[cfg(target_arch = "thumbv7")]
pub const ALIGNMENT: usize = 1 << 5;

// Operating Systems cache size determines this.
// Currently no way to determine this without runtime inference.
/// Cache and allocation multiple alignment size
@@ -107,9 +98,6 @@ pub const ALIGNMENT: usize = 1 << 5;
// If you have smaller data with less padded functionality then use 32 with force option.
// - https://devtalk.nvidia.com/default/topic/803600/variable-cache-line-width-/
/// Cache and allocation multiple alignment size
#[cfg(target_arch = "nvptx")]
pub const ALIGNMENT: usize = 1 << 7;
/// Cache and allocation multiple alignment size
#[cfg(target_arch = "nvptx64")]
pub const ALIGNMENT: usize = 1 << 7;

110 changes: 110 additions & 0 deletions arrow-cast/src/cast/mod.rs
Original file line number Diff line number Diff line change
@@ -218,6 +218,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
| Interval(_),
) => true,
(Utf8 | LargeUtf8, Utf8View) => true,
(Utf8View, Utf8 | LargeUtf8) => true,
(BinaryView, Binary | LargeBinary) => true,
(Utf8 | LargeUtf8, _) => to_type.is_numeric() && to_type != &Float16,
(_, Utf8 | LargeUtf8) => from_type.is_primitive(),

@@ -1262,6 +1264,12 @@ pub fn cast_with_options(
"Casting from {from_type:?} to {to_type:?} not supported",
))),
},
(Utf8View, Utf8) => cast_view_to_byte::<StringViewType, GenericStringType<i32>>(array),
(Utf8View, LargeUtf8) => cast_view_to_byte::<StringViewType, GenericStringType<i64>>(array),
(BinaryView, Binary) => cast_view_to_byte::<BinaryViewType, GenericBinaryType<i32>>(array),
(BinaryView, LargeBinary) => {
cast_view_to_byte::<BinaryViewType, GenericBinaryType<i64>>(array)
}
(from_type, LargeUtf8) if from_type.is_primitive() => {
value_to_string::<i64>(array, cast_options)
}
@@ -2299,6 +2307,32 @@ where
}))
}

/// Helper function to cast from one `ByteViewType` array to `ByteArrayType` array.
fn cast_view_to_byte<FROM, TO>(array: &dyn Array) -> Result<ArrayRef, ArrowError>
where
FROM: ByteViewType,
TO: ByteArrayType,
FROM::Native: AsRef<TO::Native>,
{
let data = array.to_data();
let view_array = GenericByteViewArray::<FROM>::from(data);

let len = view_array.len();
let bytes = view_array
.views()
.iter()
.map(|v| ByteView::from(*v).length as usize)
.sum::<usize>();

let mut byte_array_builder = GenericByteBuilder::<TO>::with_capacity(len, bytes);

for val in view_array.iter() {
byte_array_builder.append_option(val);
}

Ok(Arc::new(byte_array_builder.finish()))
}

#[cfg(test)]
mod tests {
use arrow_buffer::{Buffer, NullBuffer};
@@ -5169,6 +5203,82 @@ mod tests {
assert_eq!(binary_view_array.as_ref(), &expect_binary_view_array);
}

#[test]
fn test_view_to_string() {
_test_view_to_string::<i32>();
_test_view_to_string::<i64>();
}

fn _test_view_to_string<O>()
where
O: OffsetSizeTrait,
{
let data: Vec<Option<&str>> = vec![
Some("hello"),
Some("world"),
None,
Some("large payload over 12 bytes"),
Some("lulu"),
];

let view_array = {
// ["hello", "world", null, "large payload over 12 bytes", "lulu"]
let mut builder = StringViewBuilder::new().with_block_size(8); // multiple buffers.
for s in data.iter() {
builder.append_option(*s);
}
builder.finish()
};

let expected_string_array = GenericStringArray::<O>::from(data);
let expected_type = expected_string_array.data_type();

assert!(can_cast_types(view_array.data_type(), expected_type));

let string_array = cast(&view_array, expected_type).unwrap();
assert_eq!(string_array.data_type(), expected_type);

assert_eq!(string_array.as_ref(), &expected_string_array);
}

#[test]
fn test_view_to_binary() {
_test_view_to_binary::<i32>();
_test_view_to_binary::<i64>();
}

fn _test_view_to_binary<O>()
where
O: OffsetSizeTrait,
{
let data: Vec<Option<&[u8]>> = vec![
Some(b"hello"),
Some(b"world"),
None,
Some(b"large payload over 12 bytes"),
Some(b"lulu"),
];

let view_array = {
// ["hello", "world", null, "large payload over 12 bytes", "lulu"]
let mut builder = BinaryViewBuilder::new().with_block_size(8); // multiple buffers.
for s in data.iter() {
builder.append_option(*s);
}
builder.finish()
};

let expected_binary_array = GenericBinaryArray::<O>::from(data);
let expected_type = expected_binary_array.data_type();

assert!(can_cast_types(view_array.data_type(), expected_type));

let binary_array = cast(&view_array, expected_type).unwrap();
assert_eq!(binary_array.data_type(), expected_type);

assert_eq!(binary_array.as_ref(), &expected_binary_array);
}

#[test]
fn test_cast_from_f64() {
let f64_values: Vec<f64> = vec![
3 changes: 2 additions & 1 deletion parquet/src/format.rs

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions parquet/src/lib.rs
Original file line number Diff line number Diff line change
@@ -107,6 +107,8 @@ pub mod basic;
/// Automatically generated code for reading parquet thrift definition.
// see parquet/CONTRIBUTING.md for instructions on regenerating
#[allow(clippy::derivable_impls, clippy::match_single_binding)]
// Don't try and format auto generated code
#[rustfmt::skip]
pub mod format;

#[macro_use]