Skip to content

Commit

Permalink
Revert "refactor(rust): Purge arrow-rs support (pola-rs#19312)"
Browse files Browse the repository at this point in the history
This reverts commit f88bd6a.
  • Loading branch information
jhorstmann committed Oct 23, 2024
1 parent 1d144c8 commit 75a6063
Show file tree
Hide file tree
Showing 37 changed files with 1,178 additions and 3 deletions.
118 changes: 118 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ repository = "https://github.com/pola-rs/polars"
ahash = ">=0.8.5"
aho-corasick = "1.1"
arboard = { version = "3.4.0", default-features = false }
arrow-array = { version = ">=41", default-features = false }
arrow-buffer = { version = ">=41", default-features = false }
arrow-data = { version = ">=41", default-features = false }
arrow-schema = { version = ">=41", default-features = false }
atoi = "2"
atoi_simd = "0.15.5"
atomic-waker = "1"
Expand Down
8 changes: 7 additions & 1 deletion crates/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,11 @@ multiversion = { workspace = true, optional = true }
# Faster hashing
ahash = { workspace = true }

# For async arrow flight conversion
# Support conversion to/from arrow-rs
arrow-array = { workspace = true, optional = true }
arrow-buffer = { workspace = true, optional = true }
arrow-data = { workspace = true, optional = true }
arrow-schema = { workspace = true, optional = true }
async-stream = { version = "0.3", optional = true }
tokio = { workspace = true, optional = true, features = ["io-util"] }

Expand Down Expand Up @@ -99,6 +103,7 @@ getrandom = { version = "0.2", features = ["js"] }
[features]
default = []
full = [
"arrow_rs",
"io_ipc",
"io_flight",
"io_ipc_compression",
Expand All @@ -111,6 +116,7 @@ full = [
# parses timezones used in timestamp conversions
"chrono-tz",
]
arrow_rs = ["arrow-buffer", "arrow-schema", "arrow-data", "arrow-array"]
io_ipc = ["arrow-format", "polars-error/arrow-format"]
io_ipc_compression = ["lz4", "zstd", "io_ipc"]
io_flight = ["io_ipc", "arrow-format/flight-data", "async-stream", "futures", "tokio"]
Expand Down
43 changes: 43 additions & 0 deletions crates/polars-arrow/src/array/binary/data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
use arrow_data::{ArrayData, ArrayDataBuilder};

use crate::array::{Arrow2Arrow, BinaryArray};
use crate::bitmap::Bitmap;
use crate::offset::{Offset, OffsetsBuffer};

impl<O: Offset> Arrow2Arrow for BinaryArray<O> {
fn to_data(&self) -> ArrayData {
let dtype = self.dtype.clone().into();
let builder = ArrayDataBuilder::new(dtype)
.len(self.offsets().len_proxy())
.buffers(vec![
self.offsets.clone().into_inner().into(),
self.values.clone().into(),
])
.nulls(self.validity.as_ref().map(|b| b.clone().into()));

// SAFETY: Array is valid
unsafe { builder.build_unchecked() }
}

fn from_data(data: &ArrayData) -> Self {
let dtype = data.data_type().clone().into();

if data.is_empty() {
// Handle empty offsets
return Self::new_empty(dtype);
}

let buffers = data.buffers();

// SAFETY: ArrayData is valid
let mut offsets = unsafe { OffsetsBuffer::new_unchecked(buffers[0].clone().into()) };
offsets.slice(data.offset(), data.len() + 1);

Self {
dtype,
offsets,
values: buffers[1].clone().into(),
validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())),
}
}
}
3 changes: 3 additions & 0 deletions crates/polars-arrow/src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ mod mutable;
pub use mutable::*;
use polars_error::{polars_bail, PolarsResult};

#[cfg(feature = "arrow_rs")]
mod data;

/// A [`BinaryArray`] is Arrow's semantically equivalent of an immutable `Vec<Option<Vec<u8>>>`.
/// It implements [`Array`].
///
Expand Down
36 changes: 36 additions & 0 deletions crates/polars-arrow/src/array/boolean/data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
use arrow_buffer::{BooleanBuffer, NullBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};

use crate::array::{Arrow2Arrow, BooleanArray};
use crate::bitmap::Bitmap;
use crate::datatypes::ArrowDataType;

impl Arrow2Arrow for BooleanArray {
fn to_data(&self) -> ArrayData {
let buffer = NullBuffer::from(self.values.clone());

let builder = ArrayDataBuilder::new(arrow_schema::DataType::Boolean)
.len(buffer.len())
.offset(buffer.offset())
.buffers(vec![buffer.into_inner().into_inner()])
.nulls(self.validity.as_ref().map(|b| b.clone().into()));

// SAFETY: Array is valid
unsafe { builder.build_unchecked() }
}

fn from_data(data: &ArrayData) -> Self {
assert_eq!(data.data_type(), &arrow_schema::DataType::Boolean);

let buffers = data.buffers();
let buffer = BooleanBuffer::new(buffers[0].clone(), data.offset(), data.len());
// Use NullBuffer to compute set count
let values = Bitmap::from_null_buffer(NullBuffer::new(buffer));

Self {
dtype: ArrowDataType::Boolean,
values,
validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())),
}
}
}
2 changes: 2 additions & 0 deletions crates/polars-arrow/src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ use crate::bitmap::{Bitmap, MutableBitmap};
use crate::datatypes::{ArrowDataType, PhysicalType};
use crate::trusted_len::TrustedLen;

#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
mod from;
Expand Down
49 changes: 49 additions & 0 deletions crates/polars-arrow/src/array/dictionary/data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
use arrow_data::{ArrayData, ArrayDataBuilder};

use crate::array::{
from_data, to_data, Arrow2Arrow, DictionaryArray, DictionaryKey, PrimitiveArray,
};
use crate::datatypes::{ArrowDataType, PhysicalType};

impl<K: DictionaryKey> Arrow2Arrow for DictionaryArray<K> {
fn to_data(&self) -> ArrayData {
let keys = self.keys.to_data();
let builder = keys
.into_builder()
.data_type(self.dtype.clone().into())
.child_data(vec![to_data(self.values.as_ref())]);

// SAFETY: Dictionary is valid
unsafe { builder.build_unchecked() }
}

fn from_data(data: &ArrayData) -> Self {
let key = match data.data_type() {
arrow_schema::DataType::Dictionary(k, _) => k.as_ref(),
d => panic!("unsupported dictionary type {d}"),
};

let dtype = ArrowDataType::from(data.data_type().clone());
assert_eq!(
dtype.to_physical_type(),
PhysicalType::Dictionary(K::KEY_TYPE)
);

let key_builder = ArrayDataBuilder::new(key.clone())
.buffers(vec![data.buffers()[0].clone()])
.offset(data.offset())
.len(data.len())
.nulls(data.nulls().cloned());

// SAFETY: Dictionary is valid
let key_data = unsafe { key_builder.build_unchecked() };
let keys = PrimitiveArray::from_data(&key_data);
let values = from_data(&data.child_data()[0]);

Self {
dtype,
keys,
values,
}
}
}
2 changes: 2 additions & 0 deletions crates/polars-arrow/src/array/dictionary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use crate::scalar::{new_scalar, Scalar};
use crate::trusted_len::TrustedLen;
use crate::types::NativeType;

#[cfg(feature = "arrow_rs")]
mod data;
mod ffi;
pub(super) mod fmt;
mod iterator;
Expand Down
Loading

0 comments on commit 75a6063

Please sign in to comment.