Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: BitPackedArray ptype changed by compute funcs #1724

Merged
merged 8 commits into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion bench-vortex/src/bin/clickbench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,9 @@ fn main() {
for _ in 0..args.iterations {
let exec_duration = runtime.block_on(async {
let start = Instant::now();
execute_query(&context, &query).await.unwrap();
execute_query(&context, &query)
.await
.unwrap_or_else(|e| panic!("executing query {query_idx}: {e}"));
start.elapsed()
});

Expand Down
2 changes: 1 addition & 1 deletion encodings/fastlanes/src/bitpacking/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ pub fn unpack(array: BitPackedArray) -> VortexResult<PrimitiveArray> {
let length = array.len();
let offset = array.offset() as usize;
let ptype = array.ptype();
let mut unpacked = match_each_unsigned_integer_ptype!(array.ptype().to_unsigned(), |$P| {
let mut unpacked = match_each_unsigned_integer_ptype!(ptype.to_unsigned(), |$P| {
PrimitiveArray::from_vec(
unpack_primitive::<$P>(array.packed_slice::<$P>(), bit_width, offset, length),
array.validity(),
Expand Down
3 changes: 0 additions & 3 deletions encodings/fastlanes/src/bitpacking/compute/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,9 +178,6 @@ mod test {

#[test]
fn filter_bitpacked_signed() {
// Elements 0..=499 are negative integers (patches)
// Element 500 = 0 (packed)
// Elements 501..999 are positive integers (packed)
let values: Vec<i64> = (0..500).collect_vec();
let unpacked = PrimitiveArray::from(values.clone());
let bitpacked = BitPackedArray::encode(unpacked.as_ref(), 9).unwrap();
Expand Down
32 changes: 29 additions & 3 deletions encodings/fastlanes/src/bitpacking/compute/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ impl TakeFn<BitPackedArray> for BitPackedEncoding {

// NOTE: we use the unsigned PType because all values in the BitPackedArray must
// be non-negative (pre-condition of creating the BitPackedArray).
let ptype: PType = PType::try_from(array.dtype())?.to_unsigned();
let ptype: PType = PType::try_from(array.dtype())?;
let validity = array.validity();
let taken_validity = validity.take(indices)?;

let indices = indices.clone().into_primitive()?;
let taken = match_each_unsigned_integer_ptype!(ptype, |$T| {
let taken = match_each_unsigned_integer_ptype!(ptype.to_unsigned(), |$T| {
match_each_integer_ptype!(indices.ptype(), |$I| {
take_primitive::<$T, $I>(array, &indices, taken_validity)?
})
Expand Down Expand Up @@ -107,7 +107,11 @@ fn take_primitive<T: NativePType + BitPacking, I: NativePType>(
}
});

let unpatched_taken = PrimitiveArray::from_vec(output, taken_validity);
let mut unpatched_taken = PrimitiveArray::from_vec(output, taken_validity);
// Flip back to signed type before patching.
if array.ptype().is_signed_int() {
unpatched_taken = unpatched_taken.reinterpret_cast(array.ptype());
}
if let Some(patches) = array.patches() {
if let Some(patches) = patches.take(&indices.to_array())? {
return unpatched_taken.patch(patches);
Expand All @@ -125,6 +129,7 @@ mod test {
use rand::{thread_rng, Rng};
use vortex_array::array::PrimitiveArray;
use vortex_array::compute::{scalar_at, slice, take};
use vortex_array::validity::Validity;
use vortex_array::{IntoArrayData, IntoArrayVariant};

use crate::BitPackedArray;
Expand Down Expand Up @@ -210,4 +215,25 @@ mod test {
);
});
}

#[test]
#[cfg_attr(miri, ignore)]
fn take_signed_with_patches() {
let start = BitPackedArray::encode(
&PrimitiveArray::from(vec![1i32, 2i32, 3i32, 4i32]).into_array(),
1,
)
.unwrap();

let taken_primitive = super::take_primitive::<u32, u64>(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We usually test blackbox, but had to go whitebox here since the take impl has a configurable UNPACK_THRESHOLD and I didn't want us to stop testing this codepath in case we changed the constant.

This test was able to repro the clickbench error before the fix above to reinterpret_cast before patching

&start,
&PrimitiveArray::from(vec![0u64, 1, 2, 3]),
Validity::NonNullable,
)
.unwrap();
assert_eq!(
taken_primitive.into_maybe_null_slice::<i32>(),
vec![1i32, 2, 3, 4]
);
}
}
34 changes: 32 additions & 2 deletions encodings/fastlanes/src/bitpacking/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
use std::fmt::{Debug, Display};
use std::sync::Arc;

use ::serde::{Deserialize, Serialize};
pub use compress::*;
use fastlanes::BitPacking;
use ::serde::{Deserialize, Serialize};
use vortex_array::array::PrimitiveArray;
use vortex_array::encoding::ids;
use vortex_array::patches::{Patches, PatchesMetadata};
Expand Down Expand Up @@ -100,6 +100,17 @@ impl BitPackedArray {
);
}

if let Some(ref patches) = patches {
// Ensure that array and patches have same PType
if patches.dtype().as_nonnullable() != DType::from(ptype).as_nonnullable() {
a10y marked this conversation as resolved.
Show resolved Hide resolved
vortex_bail!(
"Patches DType {} does not match BitPackedArray dtype {}",
patches.dtype().as_nonnullable(),
ptype
)
}
}

// expected packed size is in bytes
let expected_packed_size =
((length + offset as usize + 1023) / 1024) * (128 * bit_width as usize);
Expand Down Expand Up @@ -276,8 +287,9 @@ impl PrimitiveArrayTrait for BitPackedArray {}

#[cfg(test)]
mod test {
use itertools::Itertools;
use vortex_array::array::PrimitiveArray;
use vortex_array::{IntoArrayData, IntoArrayVariant};
use vortex_array::{IntoArrayData, IntoArrayVariant, IntoCanonical};

use crate::BitPackedArray;

Expand Down Expand Up @@ -305,4 +317,22 @@ mod test {
let _packed = BitPackedArray::encode(uncompressed.as_ref(), 9)
.expect_err("Cannot pack value into larger width");
}

#[test]
fn signed_with_patches() {
let values = (0i32..=512).collect_vec();
let parray = PrimitiveArray::from(values.clone()).into_array();

let packed_with_patches = BitPackedArray::encode(&parray, 9).unwrap();
assert!(packed_with_patches.patches().is_some());
assert_eq!(
packed_with_patches
.into_canonical()
.unwrap()
.into_primitive()
.unwrap()
.into_maybe_null_slice::<i32>(),
values
);
}
}
41 changes: 29 additions & 12 deletions vortex-array/src/array/primitive/patch.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,50 @@
use itertools::Itertools;
use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype};
use arrow_buffer::ArrowNativeType;
use vortex_dtype::{match_each_integer_ptype, match_each_native_ptype, NativePType};
use vortex_error::VortexResult;

use crate::array::PrimitiveArray;
use crate::patches::Patches;
use crate::validity::Validity;
use crate::variants::PrimitiveArrayTrait;
use crate::{ArrayLen, IntoArrayVariant};

impl PrimitiveArray {
#[allow(clippy::cognitive_complexity)]
pub fn patch(self, patches: Patches) -> VortexResult<Self> {
let (_, indices, values) = patches.into_parts();
let indices = indices.into_primitive()?;
let values = values.into_primitive()?;
let (_, patch_indices, patch_values) = patches.into_parts();
let patch_indices = patch_indices.into_primitive()?;
let patch_values = patch_values.into_primitive()?;

let patched_validity =
self.validity()
.patch(self.len(), indices.as_ref(), values.validity())?;
.patch(self.len(), patch_indices.as_ref(), patch_values.validity())?;

match_each_integer_ptype!(indices.ptype(), |$I| {
match_each_integer_ptype!(patch_indices.ptype(), |$I| {
match_each_native_ptype!(self.ptype(), |$T| {
let mut own_values = self.into_maybe_null_slice::<$T>();
for (idx, value) in indices.into_maybe_null_slice::<$I>().into_iter().zip_eq(values.into_maybe_null_slice::<$T>().into_iter()) {
own_values[idx as usize] = value;
}
Ok(Self::from_vec(own_values, patched_validity))
self.patch_typed::<$T, $I>(patch_indices, patch_values, patched_validity)
})
})
}

fn patch_typed<T, I>(
self,
patch_indices: PrimitiveArray,
patch_values: PrimitiveArray,
patched_validity: Validity,
) -> VortexResult<Self>
where
T: NativePType + ArrowNativeType,
I: NativePType + ArrowNativeType,
{
let mut own_values = self.into_maybe_null_slice::<T>();

let patch_indices = patch_indices.into_maybe_null_slice::<I>();
let patch_values = patch_values.into_maybe_null_slice::<T>();
for (idx, value) in itertools::zip_eq(patch_indices, patch_values) {
own_values[idx.as_usize()] = value;
}
Ok(Self::from_vec(own_values, patched_validity))
}
}

#[cfg(test)]
Expand Down