diff --git a/arrow-arith/src/arity.rs b/arrow-arith/src/arity.rs
index ff5c8e822cc0..d85ee67d062b 100644
--- a/arrow-arith/src/arity.rs
+++ b/arrow-arith/src/arity.rs
@@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
-//! Defines kernels suitable to perform operations to primitive arrays.
+//! Kernels for operating on [`PrimitiveArray`]s
use arrow_array::builder::BufferBuilder;
use arrow_array::types::ArrowDictionaryKeyType;
@@ -162,18 +162,38 @@ where
}
}
+/// Allies a binary infallable function to two [`PrimitiveArray`]s,
+/// producing a new [`PrimitiveArray`]
+///
+/// # Details
+///
/// Given two arrays of length `len`, calls `op(a[i], b[i])` for `i` in `0..len`, collecting
-/// the results in a [`PrimitiveArray`]. If any index is null in either `a` or `b`, the
+/// the results in a [`PrimitiveArray`].
+///
+/// If any index is null in either `a` or `b`, the
/// corresponding index in the result will also be null
///
-/// Like [`unary`] the provided function is evaluated for every index, ignoring validity. This
-/// is beneficial when the cost of the operation is low compared to the cost of branching, and
-/// especially when the operation can be vectorised, however, requires `op` to be infallible
-/// for all possible values of its inputs
+/// Like [`unary`], the `op` is evaluated for every element in the two arrays,
+/// including those elements which are NULL. This is beneficial as the cost of
+/// the operation is low compared to the cost of branching, and especially when
+/// the operation can be vectorised, however, requires `op` to be infallible for
+/// all possible values of its inputs
///
-/// # Error
+/// # Errors
+///
+/// * if the arrays have different lengths.
///
-/// This function gives error if the arrays have different lengths
+/// # Example
+/// ```
+/// # use arrow_arith::arity::binary;
+/// # use arrow_array::{Float32Array, Int32Array};
+/// # use arrow_array::types::Int32Type;
+/// let a = Float32Array::from(vec![Some(5.1f32), None, Some(6.8), Some(7.2)]);
+/// let b = Int32Array::from(vec![1, 2, 4, 9]);
+/// // compute int(a) + b for each element
+/// let c = binary(&a, &b, |a, b| a as i32 + b).unwrap();
+/// assert_eq!(c, Int32Array::from(vec![Some(6), None, Some(10), Some(16)]));
+/// ```
pub fn binary(
a: &PrimitiveArray,
b: &PrimitiveArray,
@@ -207,23 +227,68 @@ where
Ok(PrimitiveArray::new(buffer.into(), nulls))
}
-/// Given two arrays of length `len`, calls `op(a[i], b[i])` for `i` in `0..len`, mutating
-/// the mutable [`PrimitiveArray`] `a`. If any index is null in either `a` or `b`, the
-/// corresponding index in the result will also be null.
+/// Applies a binary and infallible function to values in two arrays, replacing
+/// the values in the first array in place.
///
-/// Mutable primitive array means that the buffer is not shared with other arrays.
-/// As a result, this mutates the buffer directly without allocating new buffer.
+/// # Details
+///
+/// Given two arrays of length `len`, calls `op(a[i], b[i])` for `i` in
+/// `0..len`, modifying the [`PrimitiveArray`] `a` in place, if possible.
+///
+/// If any index is null in either `a` or `b`, the corresponding index in the
+/// result will also be null.
+///
+/// # Buffer Reuse
+///
+/// If the underlying buffers in `a` are not shared with other arrays, mutates
+/// the underlying buffer in place, without allocating.
+///
+/// If the underlying buffer in `a` are shared, returns Err(self)
///
/// Like [`unary`] the provided function is evaluated for every index, ignoring validity. This
/// is beneficial when the cost of the operation is low compared to the cost of branching, and
/// especially when the operation can be vectorised, however, requires `op` to be infallible
/// for all possible values of its inputs
///
-/// # Error
+/// # Errors
///
-/// This function gives error if the arrays have different lengths.
-/// This function gives error of original [`PrimitiveArray`] `a` if it is not a mutable
-/// primitive array.
+/// * if the arrays have different lengths
+/// * If the array is not mutable.
+///
+/// # See Also
+///
+/// * Documentation on [`PrimitiveArray::unary_mut`] for operating on [`ArrayRef`].
+///
+/// # Example
+/// ```
+/// # use arrow_arith::arity::binary_mut;
+/// # use arrow_array::Float32Array;
+/// # use arrow_array::types::Int32Type;
+/// let a = Float32Array::from(vec![Some(5.1f32), None, Some(6.8)]);
+/// let b = Float32Array::from(vec![Some(1.0f32), None, Some(2.0)]);
+/// // compute a + b, updating the value in a in place if possible
+/// let a = binary_mut(a, &b, |a, b| a + b).unwrap().unwrap();
+/// assert_eq!(a, Float32Array::from(vec![Some(6.1), None, Some(8.8)]));
+/// ```
+///
+/// # Example with shared buffers
+/// ```
+/// # use arrow_arith::arity::binary_mut;
+/// # use arrow_array::Float32Array;
+/// # use arrow_array::types::Int32Type;
+/// let a = Float32Array::from(vec![Some(5.1f32), None, Some(6.8)]);
+/// let b = Float32Array::from(vec![Some(1.0f32), None, Some(2.0)]);
+/// // a_clone shares the buffer with a
+/// let a_cloned = a.clone();
+/// // try to update a in place, but it is shared. Returns Err(a)
+/// let a = binary_mut(a, &b, |a, b| a + b).unwrap_err();
+/// assert_eq!(a_cloned, a);
+/// // drop shared reference
+/// drop(a_cloned);
+/// // now a is not shared, so we can update it in place
+/// let a = binary_mut(a, &b, |a, b| a + b).unwrap().unwrap();
+/// assert_eq!(a, Float32Array::from(vec![Some(6.1), None, Some(8.8)]));
+/// ```
pub fn binary_mut(
a: PrimitiveArray,
b: &PrimitiveArray,
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 919a1010116b..933f19518c65 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -419,7 +419,7 @@ pub type Decimal256Array = PrimitiveArray;
pub use crate::types::ArrowPrimitiveType;
-/// An array of [primitive values](https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout)
+/// An array of primtive values, of type [`ArrowPrimitiveType`]
///
/// # Example: From a Vec
///
@@ -480,6 +480,19 @@ pub use crate::types::ArrowPrimitiveType;
/// assert_eq!(array.values(), &[1, 0, 2]);
/// assert!(array.is_null(1));
/// ```
+///
+/// # Example: Get a `PrimitiveArray` from an [`ArrayRef`]
+/// ```
+/// # use std::sync::Arc;
+/// # use arrow_array::{Array, cast::AsArray, ArrayRef, Float32Array, PrimitiveArray};
+/// # use arrow_array::types::{Float32Type};
+/// # use arrow_schema::DataType;
+/// # let array: ArrayRef = Arc::new(Float32Array::from(vec![1.2, 2.3]));
+/// // will panic if the array is not a Float32Array
+/// assert_eq!(&DataType::Float32, array.data_type());
+/// let f32_array: Float32Array = array.as_primitive().clone();
+/// assert_eq!(f32_array, Float32Array::from(vec![1.2, 2.3]));
+/// ```
pub struct PrimitiveArray {
data_type: DataType,
/// Values data
@@ -732,22 +745,34 @@ impl PrimitiveArray {
PrimitiveArray::from(unsafe { d.build_unchecked() })
}
- /// Applies an unary and infallible function to a primitive array.
- /// This is the fastest way to perform an operation on a primitive array when
- /// the benefits of a vectorized operation outweigh the cost of branching nulls and non-nulls.
+ /// Applies a unary infallible function to a primitive array, producing a
+ /// new array of potentially different type.
+ ///
+ /// This is the fastest way to perform an operation on a primitive array
+ /// when the benefits of a vectorized operation outweigh the cost of
+ /// branching nulls and non-nulls.
///
- /// # Implementation
+ /// See also
+ /// * [`Self::unary_mut`] for in place modification.
+ /// * [`Self::try_unary`] for fallible operations.
+ /// * [`arrow::compute::binary`] for binary operations
+ ///
+ /// [`arrow::compute::binary`]: https://docs.rs/arrow/latest/arrow/compute/fn.binary.html
+ /// # Null Handling
+ ///
+ /// Applies the function for all values, including those on null slots. This
+ /// will often allow the compiler to generate faster vectorized code, but
+ /// requires that the operation must be infallible (not error/panic) for any
+ /// value of the corresponding type or this function may panic.
///
- /// This will apply the function for all values, including those on null slots.
- /// This implies that the operation must be infallible for any value of the corresponding type
- /// or this function may panic.
/// # Example
/// ```rust
- /// # use arrow_array::{Int32Array, types::Int32Type};
+ /// # use arrow_array::{Int32Array, Float32Array, types::Int32Type};
/// # fn main() {
/// let array = Int32Array::from(vec![Some(5), Some(7), None]);
- /// let c = array.unary(|x| x * 2 + 1);
- /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
+ /// // Create a new array with the value of applying sqrt
+ /// let c = array.unary(|x| f32::sqrt(x as f32));
+ /// assert_eq!(c, Float32Array::from(vec![Some(2.236068), Some(2.6457512), None]));
/// # }
/// ```
pub fn unary(&self, op: F) -> PrimitiveArray
@@ -766,24 +791,50 @@ impl PrimitiveArray {
PrimitiveArray::new(buffer.into(), nulls)
}
- /// Applies an unary and infallible function to a mutable primitive array.
- /// Mutable primitive array means that the buffer is not shared with other arrays.
- /// As a result, this mutates the buffer directly without allocating new buffer.
+ /// Applies a unary and infallible function to the array in place if possible.
+ ///
+ /// # Buffer Reuse
+ ///
+ /// If the underlying buffers are not shared with other arrays, mutates the
+ /// underlying buffer in place, without allocating.
+ ///
+ /// If the underlying buffer is shared, returns Err(self)
///
- /// # Implementation
+ /// # Null Handling
+ ///
+ /// See [`Self::unary`] for more information on null handling.
///
- /// This will apply the function for all values, including those on null slots.
- /// This implies that the operation must be infallible for any value of the corresponding type
- /// or this function may panic.
/// # Example
+ ///
/// ```rust
/// # use arrow_array::{Int32Array, types::Int32Type};
- /// # fn main() {
/// let array = Int32Array::from(vec![Some(5), Some(7), None]);
+ /// // Apply x*2+1 to the data in place, no allocations
/// let c = array.unary_mut(|x| x * 2 + 1).unwrap();
/// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
- /// # }
/// ```
+ ///
+ /// # Example: modify [`ArrayRef`] in place, if not shared
+ ///
+ /// It is also possible to modify an [`ArrayRef`] if there are no other
+ /// references to the underlying buffer.
+ ///
+ /// ```rust
+ /// # use std::sync::Arc;
+ /// # use arrow_array::{Array, cast::AsArray, ArrayRef, Int32Array, PrimitiveArray, types::Int32Type};
+ /// # let array: ArrayRef = Arc::new(Int32Array::from(vec![Some(5), Some(7), None]));
+ /// // Convert to Int32Array (panic's if array.data_type is not Int32)
+ /// let a = array.as_primitive::().clone();
+ /// // Try to apply x*2+1 to the data in place, fails because array is still shared
+ /// a.unary_mut(|x| x * 2 + 1).unwrap_err();
+ /// // Try again, this time dropping the last remaining reference
+ /// let a = array.as_primitive::().clone();
+ /// drop(array);
+ /// // Now we can apply the operation in place
+ /// let c = a.unary_mut(|x| x * 2 + 1).unwrap();
+ /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None]));
+ /// ```
+
pub fn unary_mut(self, op: F) -> Result, PrimitiveArray>
where
F: Fn(T::Native) -> T::Native,
@@ -796,11 +847,12 @@ impl PrimitiveArray {
Ok(builder.finish())
}
- /// Applies a unary and fallible function to all valid values in a primitive array
+ /// Applies a unary fallible function to all valid values in a primitive
+ /// array, producing a new array of potentially different type.
///
- /// This is unlike [`Self::unary`] which will apply an infallible function to all rows
- /// regardless of validity, in many cases this will be significantly faster and should
- /// be preferred if `op` is infallible.
+ /// Applies `op` to only rows that are valid, which is often significantly
+ /// slower than [`Self::unary`], which should be preferred if `op` is
+ /// fallible.
///
/// Note: LLVM is currently unable to effectively vectorize fallible operations
pub fn try_unary(&self, op: F) -> Result, E>
@@ -829,13 +881,16 @@ impl PrimitiveArray {
Ok(PrimitiveArray::new(values, nulls))
}
- /// Applies an unary and fallible function to all valid values in a mutable primitive array.
- /// Mutable primitive array means that the buffer is not shared with other arrays.
- /// As a result, this mutates the buffer directly without allocating new buffer.
+ /// Applies a unary fallible function to all valid values in a mutable
+ /// primitive array.
+ ///
+ /// # Null Handling
+ ///
+ /// See [`Self::try_unary`] for more information on null handling.
+ ///
+ /// # Buffer Reuse
///
- /// This is unlike [`Self::unary_mut`] which will apply an infallible function to all rows
- /// regardless of validity, in many cases this will be significantly faster and should
- /// be preferred if `op` is infallible.
+ /// See [`Self::unary_mut`] for more information on buffer reuse.
///
/// This returns an `Err` when the input array is shared buffer with other
/// array. In the case, returned `Err` wraps input array. If the function
@@ -870,9 +925,9 @@ impl PrimitiveArray {
/// Applies a unary and nullable function to all valid values in a primitive array
///
- /// This is unlike [`Self::unary`] which will apply an infallible function to all rows
- /// regardless of validity, in many cases this will be significantly faster and should
- /// be preferred if `op` is infallible.
+ /// Applies `op` to only rows that are valid, which is often significantly
+ /// slower than [`Self::unary`], which should be preferred if `op` is
+ /// fallible.
///
/// Note: LLVM is currently unable to effectively vectorize fallible operations
pub fn unary_opt(&self, op: F) -> PrimitiveArray
@@ -915,8 +970,16 @@ impl PrimitiveArray {
PrimitiveArray::new(values, Some(nulls))
}
- /// Returns `PrimitiveBuilder` of this primitive array for mutating its values if the underlying
- /// data buffer is not shared by others.
+ /// Returns a `PrimitiveBuilder` for this array, suitable for mutating values
+ /// in place.
+ ///
+ /// # Buffer Reuse
+ ///
+ /// If the underlying data buffer has no other outstanding references, the
+ /// buffer is used without copying.
+ ///
+ /// If the underlying data buffer does have outstanding references, returns
+ /// `Err(self)`
pub fn into_builder(self) -> Result, Self> {
let len = self.len();
let data = self.into_data();
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index 198a11cb6974..a790fba86fed 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -47,9 +47,11 @@ impl BooleanType {
pub const DATA_TYPE: DataType = DataType::Boolean;
}
-/// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the
-/// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`].
+/// Trait for [primitive values], bridging the dynamic-typed nature of Arrow
+/// (via [`DataType`]) with the static-typed nature of rust types
+/// ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`].
///
+/// [primitive values]: https://arrow.apache.org/docs/format/Columnar.html#fixed-size-primitive-layout
/// [`ArrowNativeType`]: arrow_buffer::ArrowNativeType
pub trait ArrowPrimitiveType: primitive::PrimitiveTypeSealed + 'static {
/// Corresponding Rust native type for the primitive type.
diff --git a/arrow-buffer/src/native.rs b/arrow-buffer/src/native.rs
index e05c1311ff3c..c563f73cf5b9 100644
--- a/arrow-buffer/src/native.rs
+++ b/arrow-buffer/src/native.rs
@@ -22,11 +22,14 @@ mod private {
pub trait Sealed {}
}
-/// Trait expressing a Rust type that has the same in-memory representation
-/// as Arrow. This includes `i16`, `f32`, but excludes `bool` (which in arrow is represented in bits).
+/// Trait expressing a Rust type that has the same in-memory representation as
+/// Arrow.
///
-/// In little endian machines, types that implement [`ArrowNativeType`] can be memcopied to arrow buffers
-/// as is.
+/// This includes `i16`, `f32`, but excludes `bool` (which in arrow is
+/// represented in bits).
+///
+/// In little endian machines, types that implement [`ArrowNativeType`] can be
+/// memcopied to arrow buffers as is.
///
/// # Transmute Safety
///