Skip to content

Commit

Permalink
expose get_or_insert_key and some optimization function for dict
Browse files Browse the repository at this point in the history
  • Loading branch information
rluvaton committed Dec 12, 2024
1 parent fcf4aa4 commit 164d709
Show file tree
Hide file tree
Showing 2 changed files with 143 additions and 14 deletions.
78 changes: 71 additions & 7 deletions arrow-array/src/builder/generic_bytes_dictionary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,16 @@ where
K: ArrowDictionaryKeyType,
T: ByteArrayType,
{
fn get_or_insert_key(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
/// Only insert value **without inserting key** and get the key of that value (value index)
///
/// This should be used for user optimization
///
/// Returns an error if the new index would overflow the key type.
///
/// # Safety
/// This function is unsafe as calling this without calling one of the `append_key` functions will result in an undefined behavior
///
pub unsafe fn get_or_insert_key(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
let value_native: &T::Native = value.as_ref();
let value_bytes: &[u8] = value_native.as_ref();

Expand All @@ -222,15 +231,68 @@ where
Ok(key)
}


/// Only insert key that uses a value in a specific index
///
/// This should be used for user optimization
///
/// # Panic
/// this will panic if the value index is out of bounds
///
pub fn append_key_for_existing_value(&mut self, value_index: K::Native) {
assert!(value_index.as_usize() < self.values_builder.len(), "value_index is outside values bound");
unsafe {
self.append_key_for_existing_value_unchecked(value_index);
}
}

/// Only insert key that uses a value in a specific index without checking if `value_index` is in the bounds
///
/// This should be used for user optimization
///
/// # Safety
/// The user must ensure that the value index is valid
///
pub unsafe fn append_key_for_existing_value_unchecked(&mut self, value_index: K::Native) {
self.keys_builder.append_value(value_index);
}

/// Only insert key that uses a value in a specific index N times
///
/// This should be used for user optimization
///
/// # Panic
/// this will panic if the value index is out of bounds
///
pub fn append_key_n_for_existing_value(&mut self, value_index: K::Native, count: usize) {
assert!(value_index.as_usize() < self.values_builder.len(), "value_index is outside values bound");
unsafe {
self.append_key_n_for_existing_value_unchecked(value_index, count);
}
}

/// Only insert key that uses a value in a specific index `N` times without checking if `value_index` is in the bounds
///
/// This should be used for user optimization
///
/// # Safety
/// The user must ensure that the value index is valid
///
pub unsafe fn append_key_n_for_existing_value_unchecked(&mut self, value_index: K::Native, count: usize) {
self.keys_builder.append_value_n(value_index, count);
}

/// Append a value to the array. Return an existing index
/// if already present in the values array or a new index if the
/// value is appended to the values array.
///
/// Returns an error if the new index would overflow the key type.
pub fn append(&mut self, value: impl AsRef<T::Native>) -> Result<K::Native, ArrowError> {
let key = self.get_or_insert_key(value)?;
self.keys_builder.append_value(key);
Ok(key)
unsafe {
let key = self.get_or_insert_key(value)?;
self.append_key_for_existing_value_unchecked(key);
Ok(key)
}
}

/// Append a value multiple times to the array.
Expand All @@ -242,9 +304,11 @@ where
value: impl AsRef<T::Native>,
count: usize,
) -> Result<K::Native, ArrowError> {
let key = self.get_or_insert_key(value)?;
self.keys_builder.append_value_n(key, count);
Ok(key)
unsafe {
let key = self.get_or_insert_key(value)?;
self.append_key_n_for_existing_value_unchecked(key, count);
Ok(key)
}
}

/// Infallibly append a value to this builder
Expand Down
79 changes: 72 additions & 7 deletions arrow-array/src/builder/primitive_dictionary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,18 @@ where
K: ArrowDictionaryKeyType,
V: ArrowPrimitiveType,
{

/// Only insert value **without inserting key** and get the key of that value (value index)
///
/// This should be used for user optimization
///
/// Returns an error if the new index would overflow the key type.
///
/// # Safety
/// This function is unsafe as calling this without calling one of the `append_key` functions will result in an undefined behavior
///
#[inline]
fn get_or_insert_key(&mut self, value: V::Native) -> Result<K::Native, ArrowError> {
pub unsafe fn get_or_insert_key(&mut self, value: V::Native) -> Result<K::Native, ArrowError> {
match self.map.get(&Value(value)) {
Some(&key) => {
Ok(K::Native::from_usize(key).ok_or(ArrowError::DictionaryKeyOverflowError)?)
Expand All @@ -224,24 +234,79 @@ where
}
}


/// Only insert key that uses a value in a specific index
///
/// This should be used for user optimization
///
/// # Panic
/// this will panic if the value index is out of bounds
///
pub fn append_key_for_existing_value(&mut self, value_index: K::Native) {
assert!(value_index.as_usize() < self.values_builder.len(), "value_index is outside values bound");
unsafe {
self.append_key_for_existing_value_unchecked(value_index);
}
}

/// Only insert key that uses a value in a specific index without checking if `value_index` is in the bounds
///
/// This should be used for user optimization
///
/// # Safety
/// The user must ensure that the value index is valid
///
pub unsafe fn append_key_for_existing_value_unchecked(&mut self, value_index: K::Native) {
self.keys_builder.append_value(value_index);
}

/// Only insert key that uses a value in a specific index N times
///
/// This should be used for user optimization
///
/// # Panic
/// this will panic if the value index is out of bounds
///
pub fn append_key_n_for_existing_value(&mut self, value_index: K::Native, count: usize) {
assert!(value_index.as_usize() < self.values_builder.len(), "value_index is outside values bound");
unsafe {
self.append_key_n_for_existing_value_unchecked(value_index, count);
}
}

/// Only insert key that uses a value in a specific index `N` times without checking if `value_index` is in the bounds
///
/// This should be used for user optimization
///
/// # Safety
/// The user must ensure that the value index is valid
///
pub unsafe fn append_key_n_for_existing_value_unchecked(&mut self, value_index: K::Native, count: usize) {
self.keys_builder.append_value_n(value_index, count);
}

/// Append a primitive value to the array. Return an existing index
/// if already present in the values array or a new index if the
/// value is appended to the values array.
#[inline]
pub fn append(&mut self, value: V::Native) -> Result<K::Native, ArrowError> {
let key = self.get_or_insert_key(value)?;
self.keys_builder.append_value(key);
Ok(key)
unsafe {
let key = self.get_or_insert_key(value)?;
self.append_key_for_existing_value_unchecked(key);
Ok(key)
}
}

/// Append a value multiple times to the array.
/// This is the same as `append` but allows to append the same value multiple times without doing multiple lookups.
///
/// Returns an error if the new index would overflow the key type.
pub fn append_n(&mut self, value: V::Native, count: usize) -> Result<K::Native, ArrowError> {
let key = self.get_or_insert_key(value)?;
self.keys_builder.append_value_n(key, count);
Ok(key)
unsafe {
let key = self.get_or_insert_key(value)?;
self.append_key_n_for_existing_value_unchecked(key, count);
Ok(key)
}
}

/// Infallibly append a value to this builder
Expand Down

0 comments on commit 164d709

Please sign in to comment.