Skip to content

Commit

Permalink
run clippy and fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
rluvaton committed Dec 12, 2024
1 parent dbd1fdd commit a09615a
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 76 deletions.
106 changes: 66 additions & 40 deletions arrow-array/src/builder/generic_bytes_dictionary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,9 +311,12 @@ where
///
/// when dictionary values are null (the actual mapped values) the keys are null
///
pub fn extend_dictionary(&mut self, dictionary: &TypedDictionaryArray<K, GenericByteArray<T>>) -> Result<(), ArrowError> {
pub fn extend_dictionary(
&mut self,
dictionary: &TypedDictionaryArray<K, GenericByteArray<T>>,
) -> Result<(), ArrowError> {
let values = dictionary.values();

let v_len = values.len();
let k_len = dictionary.keys().len();
if v_len == 0 && k_len == 0 {
Expand All @@ -327,33 +330,33 @@ where
}

if k_len == 0 {
return Err(ArrowError::InvalidArgumentError("Dictionary keys should not be empty when values are not empty".to_string()));
return Err(ArrowError::InvalidArgumentError(
"Dictionary keys should not be empty when values are not empty".to_string(),
));
}

// Orphan values will be carried over to the new dictionary
let mapped_values = values.iter()
let mapped_values = values
.iter()
// Dictionary values can technically be null, so we need to handle that
.map(|dict_value| dict_value
.map(|dict_value| self.get_or_insert_key(dict_value))
.transpose())
.map(|dict_value| {
dict_value
.map(|dict_value| self.get_or_insert_key(dict_value))
.transpose()
})
.collect::<Result<Vec<_>, _>>()?;

// Just insert the keys without additional lookups
dictionary
.keys()
.iter()
.for_each(|key| {
match key {
dictionary.keys().iter().for_each(|key| match key {
None => self.append_null(),
Some(original_dict_index) => {
let index = original_dict_index.as_usize().min(v_len - 1);
match mapped_values[index] {
None => self.append_null(),
Some(original_dict_index) => {
let index = original_dict_index.as_usize().min(v_len - 1);
match mapped_values[index] {
None => self.append_null(),
Some(mapped_value) => self.keys_builder.append_value(mapped_value),
}
}
Some(mapped_value) => self.keys_builder.append_value(mapped_value),
}
});
}
});

Ok(())
}
Expand Down Expand Up @@ -732,20 +735,39 @@ mod tests {

let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
builder.extend(["e", "e", "f", "e", "d"].into_iter().map(Some));
builder.extend_dictionary(&some_dict.downcast_dict().unwrap()).unwrap();
builder
.extend_dictionary(&some_dict.downcast_dict().unwrap())
.unwrap();
let dict = builder.finish();

assert_eq!(dict.values().len(), 6);

let values = dict
.downcast_dict::<GenericByteArray<Utf8Type>>()
.unwrap()
.into_iter()
.collect::<Vec<_>>();

assert_eq!(
values,
[Some("e"), Some("e"), Some("f"), Some("e"), Some("d"), Some("a"), Some("b"), Some("c"), Some("a"), Some("b"), Some("c"), None, Some("c"), Some("d"), Some("a"), None]
[
Some("e"),
Some("e"),
Some("f"),
Some("e"),
Some("d"),
Some("a"),
Some("b"),
Some("c"),
Some("a"),
Some("b"),
Some("c"),
None,
Some("c"),
Some("d"),
Some("a"),
None
]
);
}
#[test]
Expand All @@ -763,7 +785,10 @@ mod tests {
let values = values_builder.finish();
let keys = keys_builder.finish();

let data_type = DataType::Dictionary(Box::new(Int32Type::DATA_TYPE), Box::new(Utf8Type::DATA_TYPE));
let data_type = DataType::Dictionary(
Box::new(Int32Type::DATA_TYPE),
Box::new(Utf8Type::DATA_TYPE),
);

let builder = keys
.into_data()
Expand All @@ -775,10 +800,15 @@ mod tests {
};

let some_dict_values = some_dict.values().as_string::<i32>();
assert_eq!(some_dict_values.into_iter().collect::<Vec<_>>(), &[None, Some("I like worm hugs")]);
assert_eq!(
some_dict_values.into_iter().collect::<Vec<_>>(),
&[None, Some("I like worm hugs")]
);

let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
builder.extend_dictionary(&some_dict.downcast_dict().unwrap()).unwrap();
builder
.extend_dictionary(&some_dict.downcast_dict().unwrap())
.unwrap();
let dict = builder.finish();

assert_eq!(dict.values().len(), 1);
Expand All @@ -789,10 +819,7 @@ mod tests {
.into_iter()
.collect::<Vec<_>>();

assert_eq!(
values,
[None, Some("I like worm hugs")]
);
assert_eq!(values, [None, Some("I like worm hugs")]);
}

#[test]
Expand All @@ -804,20 +831,19 @@ mod tests {
};

let mut builder = GenericByteDictionaryBuilder::<Int32Type, Utf8Type>::new();
builder.extend_dictionary(&some_dict.downcast_dict().unwrap()).unwrap();
builder
.extend_dictionary(&some_dict.downcast_dict().unwrap())
.unwrap();
let dict = builder.finish();

assert_eq!(dict.values().len(), 0);

let values = dict
.downcast_dict::<GenericByteArray<Utf8Type>>()
.unwrap()
.into_iter()
.collect::<Vec<_>>();

assert_eq!(
values,
[None, None]
);

assert_eq!(values, [None, None]);
}
}
104 changes: 68 additions & 36 deletions arrow-array/src/builder/primitive_dictionary_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@

use crate::builder::{ArrayBuilder, PrimitiveBuilder};
use crate::types::ArrowDictionaryKeyType;
use crate::{Array, ArrayRef, ArrowPrimitiveType, DictionaryArray, PrimitiveArray, TypedDictionaryArray};
use crate::{
Array, ArrayRef, ArrowPrimitiveType, DictionaryArray, PrimitiveArray, TypedDictionaryArray,
};
use arrow_buffer::{ArrowNativeType, ToByteSlice};
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
Expand Down Expand Up @@ -309,7 +311,10 @@ where
///
/// when dictionary values are null (the actual mapped values) the keys are null
///
pub fn extend_dictionary(&mut self, dictionary: &TypedDictionaryArray<K, PrimitiveArray<V>>) -> Result<(), ArrowError> {
pub fn extend_dictionary(
&mut self,
dictionary: &TypedDictionaryArray<K, PrimitiveArray<V>>,
) -> Result<(), ArrowError> {
let values = dictionary.values();

let v_len = values.len();
Expand All @@ -325,33 +330,33 @@ where
}

if k_len == 0 {
return Err(ArrowError::InvalidArgumentError("Dictionary keys should not be empty when values are not empty".to_string()));
return Err(ArrowError::InvalidArgumentError(
"Dictionary keys should not be empty when values are not empty".to_string(),
));
}

// Orphan values will be carried over to the new dictionary
let mapped_values = values.iter()
let mapped_values = values
.iter()
// Dictionary values can technically be null, so we need to handle that
.map(|dict_value| dict_value
.map(|dict_value| self.get_or_insert_key(dict_value))
.transpose())
.map(|dict_value| {
dict_value
.map(|dict_value| self.get_or_insert_key(dict_value))
.transpose()
})
.collect::<Result<Vec<_>, _>>()?;

// Just insert the keys without additional lookups
dictionary
.keys()
.iter()
.for_each(|key| {
match key {
dictionary.keys().iter().for_each(|key| match key {
None => self.append_null(),
Some(original_dict_index) => {
let index = original_dict_index.as_usize().min(v_len - 1);
match mapped_values[index] {
None => self.append_null(),
Some(original_dict_index) => {
let index = original_dict_index.as_usize().min(v_len - 1);
match mapped_values[index] {
None => self.append_null(),
Some(mapped_value) => self.keys_builder.append_value(mapped_value),
}
}
Some(mapped_value) => self.keys_builder.append_value(mapped_value),
}
});
}
});

Ok(())
}
Expand Down Expand Up @@ -421,7 +426,7 @@ impl<K: ArrowDictionaryKeyType, P: ArrowPrimitiveType> Extend<Option<P::Native>>
mod tests {
use super::*;

use crate::array::{UInt32Array, UInt8Array, Int32Array};
use crate::array::{Int32Array, UInt32Array, UInt8Array};
use crate::builder::Decimal128Builder;
use crate::cast::AsArray;
use crate::types::{Decimal128Type, Int32Type, UInt32Type, UInt8Type};
Expand Down Expand Up @@ -510,7 +515,9 @@ mod tests {

let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
builder.extend([6, 6, 7, 6, 5].into_iter().map(Some));
builder.extend_dictionary(&some_dict.downcast_dict().unwrap()).unwrap();
builder
.extend_dictionary(&some_dict.downcast_dict().unwrap())
.unwrap();
let dict = builder.finish();

assert_eq!(dict.values().len(), 7);
Expand All @@ -523,7 +530,29 @@ mod tests {

assert_eq!(
values,
[Some(6), Some(6), Some(7), Some(6), Some(5), Some(1), Some(2), Some(3), Some(1), Some(2), Some(3), Some(1), Some(2), Some(3), None, Some(4), Some(5), Some(1), Some(3), Some(1), None]
[
Some(6),
Some(6),
Some(7),
Some(6),
Some(5),
Some(1),
Some(2),
Some(3),
Some(1),
Some(2),
Some(3),
Some(1),
Some(2),
Some(3),
None,
Some(4),
Some(5),
Some(1),
Some(3),
Some(1),
None
]
);
}

Expand All @@ -542,8 +571,10 @@ mod tests {
let values = values_builder.finish();
let keys = keys_builder.finish();

let data_type =
DataType::Dictionary(Box::new(Int32Type::DATA_TYPE), Box::new(values.data_type().clone()));
let data_type = DataType::Dictionary(
Box::new(Int32Type::DATA_TYPE),
Box::new(values.data_type().clone()),
);

let builder = keys
.into_data()
Expand All @@ -555,10 +586,15 @@ mod tests {
};

let some_dict_values = some_dict.values().as_primitive::<Int32Type>();
assert_eq!(some_dict_values.into_iter().collect::<Vec<_>>(), &[None, Some(42)]);
assert_eq!(
some_dict_values.into_iter().collect::<Vec<_>>(),
&[None, Some(42)]
);

let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
builder.extend_dictionary(&some_dict.downcast_dict().unwrap()).unwrap();
builder
.extend_dictionary(&some_dict.downcast_dict().unwrap())
.unwrap();
let dict = builder.finish();

assert_eq!(dict.values().len(), 1);
Expand All @@ -569,10 +605,7 @@ mod tests {
.into_iter()
.collect::<Vec<_>>();

assert_eq!(
values,
[None, Some(42)]
);
assert_eq!(values, [None, Some(42)]);
}

#[test]
Expand All @@ -584,7 +617,9 @@ mod tests {
};

let mut builder = PrimitiveDictionaryBuilder::<Int32Type, Int32Type>::new();
builder.extend_dictionary(&some_dict.downcast_dict().unwrap()).unwrap();
builder
.extend_dictionary(&some_dict.downcast_dict().unwrap())
.unwrap();
let dict = builder.finish();

assert_eq!(dict.values().len(), 0);
Expand All @@ -595,9 +630,6 @@ mod tests {
.into_iter()
.collect::<Vec<_>>();

assert_eq!(
values,
[None, None]
);
assert_eq!(values, [None, None]);
}
}

0 comments on commit a09615a

Please sign in to comment.