Skip to content

Commit

Permalink
Add support for tinystr, rust_decimal, and glam types
Browse files Browse the repository at this point in the history
- Implement Arrow serialization/deserialization for TinyAsciiStr, Decimal, and glam vector/matrix types
- Add new optional dependencies and features
  • Loading branch information
Swoorup committed Oct 1, 2024
1 parent 6e31682 commit ce01e61
Show file tree
Hide file tree
Showing 11 changed files with 607 additions and 4 deletions.
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,6 @@ quote = "1"
syn = "2"
trybuild = "1.0"
pretty_assertions = "1.4"
tinystr = "0.7"
rust_decimal = "1.36"
glam = "0.29"
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ Default implementations of the above traits are provided for the following:
- Large Arrow types [`LargeBinary`], [`LargeString`], [`LargeList`] are supported via the `type` attribute. Please see the [complex_example.rs](./arrow_convert/tests/complex_example.rs) for usage.
- Fixed size types [`FixedSizeBinary`], [`FixedSizeList`] are supported via the `FixedSizeVec` type override.
- Note: nesting of [`FixedSizeList`] is not supported.
- `TinyAsciiStr` from the [tinystr](https://github.com/zbraniecki/tinystr) crate (with the `tinystr` feature enabled)
- `Decimal` from the [rust_decimal](https://github.com/paupino/rust-decimal) crate (with the `rust_decimal` feature enabled)
- `Glam` vector and matrix types (with the `glam` feature enabled):
- `Vec2`, `Vec3`, `Vec4`
- `DVec2`, `DVec3`, `DVec4`
- `BVec2`, `BVec3`, `BVec4`
- `Mat2`, `Mat3`, `Mat4`
- `DMat2`, `DMat3`, `DMat4`

### Enums

Expand Down
20 changes: 16 additions & 4 deletions arrow_convert/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,35 @@ keywords.workspace = true
repository.workspace = true
description = "Convert between nested rust types and Arrow with arrow"

[features]
default = ["derive"]

derive = ["arrow_convert_derive"]
tinystr = ["dep:tinystr"]
rust_decimal = ["dep:rust_decimal"]
glam = ["dep:glam"]

[dependencies]
arrow = { workspace = true }
arrow_convert_derive = { workspace = true, optional = true }
half = { workspace = true }
chrono = { workspace = true, features = ["std"] }
err-derive = { workspace = true }

# optional deps
tinystr = { workspace = true, optional = true }
rust_decimal = { workspace = true, optional = true }
glam = { workspace = true, optional = true }

[dev-dependencies]
arrow_convert_derive = { workspace = true }
glam = { workspace = true }
tinystr = { workspace = true }
rust_decimal = { workspace = true }
criterion = { workspace = true }
trybuild = { workspace = true }
pretty_assertions = { workspace = true }

[features]
default = ["derive"]
derive = ["arrow_convert_derive"]

[lib]
bench = false

Expand Down
172 changes: 172 additions & 0 deletions arrow_convert/src/features/glam.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
use arrow::datatypes::DataType;

use crate::arrow_enable_vec_for_type;
use crate::deserialize::ArrowDeserialize;
use crate::field::ArrowField;
use crate::serialize::ArrowSerialize;
use arrow::datatypes::Field;

use crate::deserialize::arrow_deserialize_vec_helper;
use arrow::array::ArrayRef;
use arrow::array::{BooleanBuilder, Float32Builder, Float64Builder};
use arrow::array::{FixedSizeListArray, FixedSizeListBuilder};
use std::sync::Arc;

/// This macro implements the `ArrowSerialize` and `ArrowDeserialize` traits for a given `glam` vector or matrix type.
///
/// The macro takes the following parameters:
/// - `$type`: The type of the `glam` vector or matrix to implement the traits for.
/// - `$size`: The size of the vector or matrix (e.g. 2 for `glam::Vec2`, 4 for `glam::Mat4`).
/// - `$dt`: The data type of the elements in the vector or matrix (e.g. `bool`, `f32`).
/// - `$arrow_dt`: The corresponding Arrow data type for the element type.
/// - `$array_builder`: The Arrow array builder type to use for the element type.
/// - `$se`: A closure that serializes the `$type` to a slice of the element type.
/// - `$de`: A closure that deserializes a `Vec` of the element type to the `$type`.
macro_rules! impl_glam_ty {
($type:ty, $size:expr, $dt:ident, $arrow_dt:expr, $array_builder:ident, $se:expr, $de:expr) => {
impl ArrowField for $type {
type Type = Self;

fn data_type() -> DataType {
let field = Field::new("scalar", $arrow_dt, false);
DataType::FixedSizeList(Arc::new(field), $size)
}
}

arrow_enable_vec_for_type!($type);

impl ArrowSerialize for $type {
type ArrayBuilderType = FixedSizeListBuilder<$array_builder>;

fn new_array() -> Self::ArrayBuilderType {
let field = Field::new("scalar", $arrow_dt, false);
Self::ArrayBuilderType::new(<$dt as ArrowSerialize>::new_array(), $size).with_field(field)
}

fn arrow_serialize(v: &Self::Type, array: &mut Self::ArrayBuilderType) -> arrow::error::Result<()> {
let v = $se(v);

array.values().append_slice(v.as_ref());
array.append(true);
Ok(())
}
}

impl ArrowDeserialize for $type {
type ArrayType = FixedSizeListArray;

fn arrow_deserialize(v: Option<ArrayRef>) -> Option<Self> {
let v = arrow_deserialize_vec_helper::<$dt>(v)?;
Some($de(v))
}
}
};
}

/// Implements the `ArrowSerialize` and `ArrowDeserialize` traits for the given `glam::Vec<bool>` type.
macro_rules! impl_glam_vec_bool {
($type:ty, $size:expr) => {
impl_glam_ty!(
$type,
$size,
bool,
DataType::Boolean,
BooleanBuilder,
|v: &$type| <[bool; $size]>::from(*v),
|v: Vec<bool>| {
let length = v.len();

match <[bool; $size]>::try_from(v).ok() {
None => panic!(
"Expected size of {} deserializing array of type `{}`, got {}",
std::any::type_name::<$type>(),
$size,
length
),
Some(array) => Self::from_array(array),
}
}
);
};
}

/// Implements the `ArrowSerialize` and `ArrowDeserialize` traits for the given `glam::Vec2` type.
macro_rules! impl_glam_vec_f32 {
($type:ty, $size:expr) => {
impl_glam_ty!(
$type,
$size,
f32,
DataType::Float32,
Float32Builder,
|v: &$type| *v,
|v: Vec<f32>| Self::from_slice(&v)
);
};
}

/// Implements the `ArrowSerialize` and `ArrowDeserialize` traits for the given `glam::Mat2`, `glam::Mat3`, and `glam::Mat4` types.
macro_rules! impl_glam_mat_f32 {
($type:ty, $size:expr) => {
impl_glam_ty!(
$type,
$size,
f32,
DataType::Float32,
Float32Builder,
|v: &$type| *v,
|v: Vec<f32>| Self::from_cols_slice(&v)
);
};
}

/// Implements the `ArrowSerialize` and `ArrowDeserialize` traits for the given `glam::DVec2`, `glam::DVec3`, and `glam::DVec4` types.
macro_rules! impl_glam_vec_f64 {
($type:ty, $size:expr) => {
impl_glam_ty!(
$type,
$size,
f64,
DataType::Float64,
Float64Builder,
|v: &$type| *v,
|v: Vec<f64>| Self::from_slice(&v)
);
};
}

/// Implements the `ArrowSerialize` and `ArrowDeserialize` traits for the given `glam::DMat2`, `glam::DMat3`, and `glam::DMat4` types.
macro_rules! impl_glam_mat_f64 {
($type:ty, $size:expr) => {
impl_glam_ty!(
$type,
$size,
f64,
DataType::Float64,
Float64Builder,
|v: &$type| *v,
|v: Vec<f64>| Self::from_cols_slice(&v)
);
};
}

// Boolean vectors
impl_glam_vec_bool!(glam::BVec2, 2);
impl_glam_vec_bool!(glam::BVec3, 3);
impl_glam_vec_bool!(glam::BVec4, 4);

// Float32 vectors and matrices
impl_glam_vec_f32!(glam::Vec2, 2);
impl_glam_vec_f32!(glam::Vec3, 3);
impl_glam_vec_f32!(glam::Vec4, 4);
impl_glam_mat_f32!(glam::Mat2, 4);
impl_glam_mat_f32!(glam::Mat3, 9);
impl_glam_mat_f32!(glam::Mat4, 16);

// Float64 vectors and matrices
impl_glam_vec_f64!(glam::DVec2, 2);
impl_glam_vec_f64!(glam::DVec3, 3);
impl_glam_vec_f64!(glam::DVec4, 4);
impl_glam_mat_f64!(glam::DMat2, 4);
impl_glam_mat_f64!(glam::DMat3, 9);
impl_glam_mat_f64!(glam::DMat4, 16);
8 changes: 8 additions & 0 deletions arrow_convert/src/features/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#[cfg(feature = "tinystr")]
mod tinystr;

#[cfg(feature = "rust_decimal")]
mod rust_decimal;

#[cfg(feature = "glam")]
mod glam;
55 changes: 55 additions & 0 deletions arrow_convert/src/features/rust_decimal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use crate::arrow_enable_vec_for_type;
use crate::deserialize::ArrowDeserialize;
use crate::field::ArrowField;
use crate::serialize::ArrowSerialize;

use arrow::datatypes::{DataType, DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE};
use rust_decimal::Decimal;

use arrow::array::{Decimal128Array, Decimal128Builder};

impl ArrowField for Decimal {
type Type = Decimal;

#[inline]
fn data_type() -> DataType {
DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
}
}

arrow_enable_vec_for_type!(Decimal);

impl ArrowSerialize for Decimal {
type ArrayBuilderType = Decimal128Builder;

fn new_array() -> Self::ArrayBuilderType {
Decimal128Builder::new().with_data_type(Self::data_type())
}

fn arrow_serialize(v: &Self::Type, array: &mut Self::ArrayBuilderType) -> arrow::error::Result<()> {
array.append_value(decimal_to_scaled_i128(*v));
Ok(())
}
}

impl ArrowDeserialize for Decimal {
type ArrayType = Decimal128Array;

fn arrow_deserialize(v: Option<i128>) -> Option<Decimal> {
v.map(|d| Decimal::from_i128_with_scale(d, DECIMAL_DEFAULT_SCALE as _))
}
}

/// Converts a `Decimal` value to an `i128` representation, adjusting the scale to match the default scale.
fn decimal_to_scaled_i128(decimal: Decimal) -> i128 {
let m = decimal.mantissa();
let scale_diff = DECIMAL_DEFAULT_SCALE as i32 - decimal.scale() as i32;

if scale_diff == 0 {
m
} else if scale_diff < 0 {
m / 10_i128.pow(scale_diff.unsigned_abs())
} else {
m * 10_i128.pow(scale_diff as u32)
}
}
37 changes: 37 additions & 0 deletions arrow_convert/src/features/tinystr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use arrow::datatypes::DataType;
use tinystr::TinyAsciiStr;

use crate::deserialize::ArrowDeserialize;
use crate::field::ArrowField;
use crate::serialize::ArrowSerialize;

use arrow::array::{FixedSizeBinaryArray, FixedSizeBinaryBuilder};

impl<const N: usize> ArrowField for TinyAsciiStr<N> {
type Type = Self;

fn data_type() -> DataType {
DataType::FixedSizeBinary(N as i32)
}
}

impl<const N: usize> ArrowSerialize for TinyAsciiStr<N> {
type ArrayBuilderType = FixedSizeBinaryBuilder;

fn new_array() -> Self::ArrayBuilderType {
FixedSizeBinaryBuilder::new(N as i32)
}

fn arrow_serialize(v: &Self::Type, array: &mut Self::ArrayBuilderType) -> arrow::error::Result<()> {
array.append_value(v.as_bytes())?;
Ok(())
}
}

impl<const N: usize> ArrowDeserialize for TinyAsciiStr<N> {
type ArrayType = FixedSizeBinaryArray;

fn arrow_deserialize(v: Option<&[u8]>) -> Option<Self> {
v.and_then(|bytes| TinyAsciiStr::from_bytes(bytes).ok())
}
}
2 changes: 2 additions & 0 deletions arrow_convert/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,5 @@ pub use arrow_convert_derive::{ArrowDeserialize, ArrowField, ArrowSerialize};
#[cfg_attr(not(target_os = "windows"), doc = include_str!("../README.md"))]
#[cfg(doctest)]
struct ReadmeDoctests;

mod features;
Loading

0 comments on commit ce01e61

Please sign in to comment.