From de6a7594fb7faab800c82f149fb277d015690e6b Mon Sep 17 00:00:00 2001 From: ByteBaker <42913098+ByteBaker@users.noreply.github.com> Date: Tue, 24 Sep 2024 01:26:35 +0530 Subject: [PATCH] chore: add docs, part of #37 (#6433) * chore: add docs, part of #37 - add pragma `#![warn(missing_docs)]` to the following - `arrow-array` - `arrow-cast` - `arrow-csv` - `arrow-data` - `arrow-json` - `arrow-ord` - `arrow-pyarrow-integration-testing` - `arrow-row` - `arrow-schema` - `arrow-select` - `arrow-string` - `arrow` - `parquet_derive` - add docs to those that generated lint warnings - Remove `bitflags` workaround in `arrow-schema` At some point, a change in `bitflags v2.3.0` had started generating lint warnings in `arrow-schema`, This was handled using a [workaround](https://github.com/apache/arrow-rs/pull/4233) [Issue](https://github.com/bitflags/bitflags/issues/356) `bitflags v2.3.1` fixed the issue hence the workaround is no longer needed. * fix: resolve comments on PR #6433 --- .../src/builder/generic_bytes_builder.rs | 2 +- .../src/builder/generic_bytes_view_builder.rs | 12 +++-- arrow-cast/src/cast/mod.rs | 2 +- arrow-csv/src/writer.rs | 5 +- arrow-data/src/byte_view.rs | 1 + arrow-data/src/data.rs | 41 ++++++++++---- arrow-data/src/decimal.rs | 4 ++ arrow-data/src/lib.rs | 1 + arrow-data/src/transform/mod.rs | 5 ++ arrow-json/src/reader/mod.rs | 2 +- arrow-ord/src/lib.rs | 1 + arrow-ord/src/rank.rs | 2 + arrow-ord/src/sort.rs | 2 + arrow-pyarrow-integration-testing/src/lib.rs | 1 + arrow-row/src/lib.rs | 1 + arrow-schema/src/datatype.rs | 4 +- arrow-schema/src/error.rs | 16 ++++++ arrow-schema/src/ffi.rs | 54 ++++++++++++++----- arrow-schema/src/lib.rs | 1 + arrow-schema/src/schema.rs | 1 + arrow-select/src/filter.rs | 1 + arrow-select/src/interleave.rs | 2 + arrow-select/src/lib.rs | 1 + arrow-select/src/nullif.rs | 2 + arrow-select/src/zip.rs | 2 + arrow-string/src/concat_elements.rs | 6 +++ arrow-string/src/lib.rs | 1 + arrow-string/src/like.rs | 2 + arrow/tests/csv.rs | 3 +- parquet_derive/src/lib.rs | 4 ++ 30 files changed, 147 insertions(+), 35 deletions(-) diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs index e85a2fed01e7..a465f3e4d60e 100644 --- a/arrow-array/src/builder/generic_bytes_builder.rs +++ b/arrow-array/src/builder/generic_bytes_builder.rs @@ -537,7 +537,7 @@ mod tests { write!(builder, "buz").unwrap(); builder.append_value(""); let a = builder.finish(); - let r: Vec<_> = a.iter().map(|x| x.unwrap()).collect(); + let r: Vec<_> = a.iter().flatten().collect(); assert_eq!(r, &["foo", "bar\n", "fizbuz"]) } } diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs b/arrow-array/src/builder/generic_bytes_view_builder.rs index 09277c679c16..d12c2b7db468 100644 --- a/arrow-array/src/builder/generic_bytes_view_builder.rs +++ b/arrow-array/src/builder/generic_bytes_view_builder.rs @@ -555,6 +555,8 @@ pub fn make_view(data: &[u8], block_id: u32, offset: u32) -> u128 { #[cfg(test)] mod tests { + use core::str; + use super::*; use crate::Array; @@ -642,7 +644,7 @@ mod tests { let array = v.finish_cloned(); array.to_data().validate_full().unwrap(); assert_eq!(array.data_buffers().len(), 5); - let actual: Vec<_> = array.iter().map(Option::unwrap).collect(); + let actual: Vec<_> = array.iter().flatten().collect(); assert_eq!( actual, &[ @@ -692,13 +694,13 @@ mod tests { let mut exp_builder = StringViewBuilder::new(); let mut fixed_builder = StringViewBuilder::new().with_fixed_block_size(STARTING_BLOCK_SIZE); - let long_string = String::from_utf8(vec![b'a'; STARTING_BLOCK_SIZE as usize]).unwrap(); + let long_string = str::from_utf8(&[b'a'; STARTING_BLOCK_SIZE as usize]).unwrap(); for i in 0..9 { // 8k, 16k, 32k, 64k, 128k, 256k, 512k, 1M, 2M for _ in 0..(2_u32.pow(i)) { - exp_builder.append_value(&long_string); - fixed_builder.append_value(&long_string); + exp_builder.append_value(long_string); + fixed_builder.append_value(long_string); } exp_builder.flush_in_progress(); fixed_builder.flush_in_progress(); @@ -721,7 +723,7 @@ mod tests { } // Add one more value, and the buffer stop growing. - exp_builder.append_value(&long_string); + exp_builder.append_value(long_string); exp_builder.flush_in_progress(); assert_eq!( exp_builder.completed.last().unwrap().capacity(), diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs index 25ef243e18e4..b751c81ee440 100644 --- a/arrow-cast/src/cast/mod.rs +++ b/arrow-cast/src/cast/mod.rs @@ -9394,7 +9394,7 @@ mod tests { Some(vec![Some(0), None, Some(2)]), ]); let a = cast_with_options(&array, &DataType::Utf8, &options).unwrap(); - let r: Vec<_> = a.as_string::().iter().map(|x| x.unwrap()).collect(); + let r: Vec<_> = a.as_string::().iter().flatten().collect(); assert_eq!(r, &["[0, 1, 2]", "[0, null, 2]"]); } #[test] diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs index dded475aa2aa..eae2133a4623 100644 --- a/arrow-csv/src/writer.rs +++ b/arrow-csv/src/writer.rs @@ -442,6 +442,7 @@ mod tests { }; use arrow_array::types::*; use arrow_buffer::i256; + use core::str; use std::io::{Cursor, Read, Seek}; use std::sync::Arc; @@ -508,7 +509,7 @@ Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378,06:51:20,cupcakes sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo "#; - assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap()); + assert_eq!(expected, str::from_utf8(&buffer).unwrap()); } #[test] @@ -558,7 +559,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo , 0.290472,0.290472 "#; - assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap()); + assert_eq!(expected, str::from_utf8(&buffer).unwrap()); } #[test] diff --git a/arrow-data/src/byte_view.rs b/arrow-data/src/byte_view.rs index a2e9d135fdcb..6f6d6d175689 100644 --- a/arrow-data/src/byte_view.rs +++ b/arrow-data/src/byte_view.rs @@ -40,6 +40,7 @@ pub struct ByteView { impl ByteView { #[inline(always)] + /// Convert `ByteView` to `u128` by concatenating the fields pub fn as_u128(self) -> u128 { (self.length as u128) | ((self.prefix as u128) << 32) diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs index 33cbc897a6c1..8c9e002e219b 100644 --- a/arrow-data/src/data.rs +++ b/arrow-data/src/data.rs @@ -231,6 +231,7 @@ pub struct ArrayData { nulls: Option, } +/// A thread-safe, shared reference to the Arrow array data. pub type ArrayDataRef = Arc; impl ArrayData { @@ -1747,7 +1748,12 @@ pub enum BufferSpec { /// for array slicing and interoperability with `Vec`, which cannot be over-aligned. /// /// Note that these alignment requirements will vary between architectures - FixedWidth { byte_width: usize, alignment: usize }, + FixedWidth { + /// The width of each element in bytes + byte_width: usize, + /// The alignment required by Rust for an array of the corresponding primitive + alignment: usize, + }, /// Variable width, such as string data for utf8 data VariableWidth, /// Buffer holds a bitmap. @@ -1783,6 +1789,7 @@ pub struct ArrayDataBuilder { impl ArrayDataBuilder { #[inline] + /// Creates a new array data builder pub const fn new(data_type: DataType) -> Self { Self { data_type, @@ -1796,17 +1803,20 @@ impl ArrayDataBuilder { } } + /// Creates a new array data builder from an existing one, changing the data type pub fn data_type(self, data_type: DataType) -> Self { Self { data_type, ..self } } #[inline] #[allow(clippy::len_without_is_empty)] + /// Sets the length of the [ArrayData] pub const fn len(mut self, n: usize) -> Self { self.len = n; self } + /// Sets the null buffer of the [ArrayData] pub fn nulls(mut self, nulls: Option) -> Self { self.nulls = nulls; self.null_count = None; @@ -1814,43 +1824,51 @@ impl ArrayDataBuilder { self } + /// Sets the null count of the [ArrayData] pub fn null_count(mut self, null_count: usize) -> Self { self.null_count = Some(null_count); self } + /// Sets the `null_bit_buffer` of the [ArrayData] pub fn null_bit_buffer(mut self, buf: Option) -> Self { self.nulls = None; self.null_bit_buffer = buf; self } + /// Sets the offset of the [ArrayData] #[inline] pub const fn offset(mut self, n: usize) -> Self { self.offset = n; self } + /// Sets the buffers of the [ArrayData] pub fn buffers(mut self, v: Vec) -> Self { self.buffers = v; self } + /// Adds a single buffer to the [ArrayData]'s buffers pub fn add_buffer(mut self, b: Buffer) -> Self { self.buffers.push(b); self } - pub fn add_buffers(mut self, bs: Vec) -> Self { + /// Adds multiple buffers to the [ArrayData]'s buffers + pub fn add_buffers>(mut self, bs: I) -> Self { self.buffers.extend(bs); self } + /// Sets the child data of the [ArrayData] pub fn child_data(mut self, v: Vec) -> Self { self.child_data = v; self } + /// Adds a single child data to the [ArrayData]'s child data pub fn add_child_data(mut self, r: ArrayData) -> Self { self.child_data.push(r); self @@ -1873,14 +1891,17 @@ impl ArrayDataBuilder { /// Same as [`Self::build_unchecked`] but ignoring `force_validate` feature flag unsafe fn build_impl(self) -> ArrayData { - let nulls = self.nulls.or_else(|| { - let buffer = self.null_bit_buffer?; - let buffer = BooleanBuffer::new(buffer, self.offset, self.len); - Some(match self.null_count { - Some(n) => NullBuffer::new_unchecked(buffer, n), - None => NullBuffer::new(buffer), + let nulls = self + .nulls + .or_else(|| { + let buffer = self.null_bit_buffer?; + let buffer = BooleanBuffer::new(buffer, self.offset, self.len); + Some(match self.null_count { + Some(n) => NullBuffer::new_unchecked(buffer, n), + None => NullBuffer::new(buffer), + }) }) - }); + .filter(|b| b.null_count() != 0); ArrayData { data_type: self.data_type, @@ -1888,7 +1909,7 @@ impl ArrayDataBuilder { offset: self.offset, buffers: self.buffers, child_data: self.child_data, - nulls: nulls.filter(|b| b.null_count() != 0), + nulls, } } diff --git a/arrow-data/src/decimal.rs b/arrow-data/src/decimal.rs index d9028591aaaa..fe19db641236 100644 --- a/arrow-data/src/decimal.rs +++ b/arrow-data/src/decimal.rs @@ -15,6 +15,10 @@ // specific language governing permissions and limitations // under the License. +//! Defines maximum and minimum values for `decimal256` and `decimal128` types for varying precisions. +//! +//! Also provides functions to validate if a given decimal value is within the valid range of the decimal type. + use arrow_buffer::i256; use arrow_schema::ArrowError; diff --git a/arrow-data/src/lib.rs b/arrow-data/src/lib.rs index 59a049fe96cf..a7feca6cd976 100644 --- a/arrow-data/src/lib.rs +++ b/arrow-data/src/lib.rs @@ -19,6 +19,7 @@ //! //! For a higher-level, strongly-typed interface see [arrow_array](https://docs.rs/arrow_array) +#![warn(missing_docs)] mod data; pub use data::*; diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs index edc68f1a107f..c74b0c43481a 100644 --- a/arrow-data/src/transform/mod.rs +++ b/arrow-data/src/transform/mod.rs @@ -15,6 +15,11 @@ // specific language governing permissions and limitations // under the License. +//! Low-level array data abstractions. +//! +//! Provides utilities for creating, manipulating, and converting Arrow arrays +//! made of primitive types, strings, and nested types. + use super::{data::new_buffers, ArrayData, ArrayDataBuilder, ByteView}; use crate::bit_mask::set_bits; use arrow_buffer::buffer::{BooleanBuffer, NullBuffer}; diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index 97d9c8962618..bcacf6f706b8 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -1007,7 +1007,7 @@ mod tests { let map_values = map.values().as_list::(); assert_eq!(map.value_offsets(), &[0, 1, 3, 5]); - let k: Vec<_> = map_keys.iter().map(|x| x.unwrap()).collect(); + let k: Vec<_> = map_keys.iter().flatten().collect(); assert_eq!(&k, &["a", "a", "b", "c", "a"]); let list_values = map_values.values().as_string::(); diff --git a/arrow-ord/src/lib.rs b/arrow-ord/src/lib.rs index 8fe4ecbc05aa..93fcb98a00f5 100644 --- a/arrow-ord/src/lib.rs +++ b/arrow-ord/src/lib.rs @@ -43,6 +43,7 @@ //! ``` //! +#![warn(missing_docs)] pub mod cmp; #[doc(hidden)] pub mod comparison; diff --git a/arrow-ord/src/rank.rs b/arrow-ord/src/rank.rs index 51b0b5b91ba9..ecc693bab4e4 100644 --- a/arrow-ord/src/rank.rs +++ b/arrow-ord/src/rank.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Provides `rank` function to assign a rank to each value in an array + use arrow_array::cast::AsArray; use arrow_array::types::*; use arrow_array::{downcast_primitive_array, Array, ArrowNativeTypeOp, GenericByteArray}; diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs index 168f82747c91..60fc4a918525 100644 --- a/arrow-ord/src/sort.rs +++ b/arrow-ord/src/sort.rs @@ -635,7 +635,9 @@ where /// One column to be used in lexicographical sort #[derive(Clone, Debug)] pub struct SortColumn { + /// The column to sort pub values: ArrayRef, + /// Sort options for this column pub options: Option, } diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs index 918fa74e3083..e12c1389e66f 100644 --- a/arrow-pyarrow-integration-testing/src/lib.rs +++ b/arrow-pyarrow-integration-testing/src/lib.rs @@ -18,6 +18,7 @@ //! This library demonstrates a minimal usage of Rust's C data interface to pass //! arrays from and to Python. +#![warn(missing_docs)] use std::sync::Arc; use arrow::array::new_empty_array; diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs index 2d9af757550e..ac859e90e126 100644 --- a/arrow-row/src/lib.rs +++ b/arrow-row/src/lib.rs @@ -125,6 +125,7 @@ //! [compared]: PartialOrd //! [compare]: PartialOrd +#![warn(missing_docs)] use std::cmp::Ordering; use std::hash::{Hash, Hasher}; use std::sync::Arc; diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs index b9cfc3d8a848..32eb95aef8a4 100644 --- a/arrow-schema/src/datatype.rs +++ b/arrow-schema/src/datatype.rs @@ -420,11 +420,13 @@ pub enum IntervalUnit { MonthDayNano, } -// Sparse or Dense union layouts +/// Sparse or Dense union layouts #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum UnionMode { + /// Sparse union layout Sparse, + /// Dense union layout Dense, } diff --git a/arrow-schema/src/error.rs b/arrow-schema/src/error.rs index 5e632d051f0f..982dd026a04d 100644 --- a/arrow-schema/src/error.rs +++ b/arrow-schema/src/error.rs @@ -26,23 +26,39 @@ use std::error::Error; pub enum ArrowError { /// Returned when functionality is not yet available. NotYetImplemented(String), + /// Wraps an external error. ExternalError(Box), + /// Error during casting from one type to another. CastError(String), + /// Memory or buffer error. MemoryError(String), + /// Error during parsing from a string. ParseError(String), + /// Error during schema-related operations. SchemaError(String), + /// Error during computation. ComputeError(String), + /// Error during division by zero. DivideByZero, + /// Error when an arithmetic operation overflows. ArithmeticOverflow(String), + /// Error during CSV-related operations. CsvError(String), + /// Error during JSON-related operations. JsonError(String), + /// Error during IO operations. IoError(String, std::io::Error), + /// Error during IPC operations in `arrow-ipc` or `arrow-flight`. IpcError(String), + /// Error indicating that an unexpected or bad argument was passed to a function. InvalidArgumentError(String), + /// Error during Parquet operations. ParquetError(String), /// Error during import or export to/from the C Data Interface CDataInterface(String), + /// Error when a dictionary key is bigger than the key type DictionaryKeyOverflowError, + /// Error when the run end index in a REE array is bigger than the array length RunEndIndexOverflowError, } diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs index c68849059f8c..e12c37da4898 100644 --- a/arrow-schema/src/ffi.rs +++ b/arrow-schema/src/ffi.rs @@ -37,25 +37,27 @@ use crate::{ ArrowError, DataType, Field, FieldRef, IntervalUnit, Schema, TimeUnit, UnionFields, UnionMode, }; +use bitflags::bitflags; use std::sync::Arc; use std::{ collections::HashMap, ffi::{c_char, c_void, CStr, CString}, }; -#[allow(clippy::assign_op_pattern)] -/// Workaround -mod flags { - use bitflags::bitflags; - bitflags! { - pub struct Flags: i64 { - const DICTIONARY_ORDERED = 0b00000001; - const NULLABLE = 0b00000010; - const MAP_KEYS_SORTED = 0b00000100; - } +bitflags! { + /// Flags for [`FFI_ArrowSchema`] + /// + /// Old Workaround at + /// is no longer required as `bitflags` [fixed the issue](https://github.com/bitflags/bitflags/pull/355). + pub struct Flags: i64 { + /// Indicates that the dictionary is ordered + const DICTIONARY_ORDERED = 0b00000001; + /// Indicates that the field is nullable + const NULLABLE = 0b00000010; + /// Indicates that the map keys are sorted + const MAP_KEYS_SORTED = 0b00000100; } } -pub use flags::*; /// ABI-compatible struct for `ArrowSchema` from C Data Interface /// See @@ -70,10 +72,12 @@ pub use flags::*; /// #[repr(C)] #[derive(Debug)] +#[allow(non_camel_case_types)] pub struct FFI_ArrowSchema { format: *const c_char, name: *const c_char, metadata: *const c_char, + /// Refer to [Arrow Flags](https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.flags) flags: i64, n_children: i64, children: *mut *mut FFI_ArrowSchema, @@ -155,16 +159,19 @@ impl FFI_ArrowSchema { Ok(this) } + /// Set the name of the schema pub fn with_name(mut self, name: &str) -> Result { self.name = CString::new(name).unwrap().into_raw(); Ok(self) } + /// Set the flags of the schema pub fn with_flags(mut self, flags: Flags) -> Result { self.flags = flags.bits(); Ok(self) } + /// Add metadata to the schema pub fn with_metadata(mut self, metadata: I) -> Result where I: IntoIterator, @@ -237,6 +244,7 @@ impl FFI_ArrowSchema { std::ptr::replace(schema, Self::empty()) } + /// Create an empty [`FFI_ArrowSchema`] pub fn empty() -> Self { Self { format: std::ptr::null_mut(), @@ -251,7 +259,7 @@ impl FFI_ArrowSchema { } } - /// returns the format of this schema. + /// Returns the format of this schema. pub fn format(&self) -> &str { assert!(!self.format.is_null()); // safe because the lifetime of `self.format` equals `self` @@ -260,7 +268,7 @@ impl FFI_ArrowSchema { .expect("The external API has a non-utf8 as format") } - /// returns the name of this schema. + /// Returns the name of this schema. pub fn name(&self) -> Option<&str> { if self.name.is_null() { None @@ -274,35 +282,55 @@ impl FFI_ArrowSchema { } } + /// Returns the flags of this schema. pub fn flags(&self) -> Option { Flags::from_bits(self.flags) } + /// Returns the child of this schema at `index`. + /// + /// # Panics + /// + /// Panics if `index` is greater than or equal to the number of children. + /// + /// This is to make sure that the unsafe acces to raw pointer is sound. pub fn child(&self, index: usize) -> &Self { assert!(index < self.n_children as usize); unsafe { self.children.add(index).as_ref().unwrap().as_ref().unwrap() } } + /// Returns an iterator to the schema's children. pub fn children(&self) -> impl Iterator { (0..self.n_children as usize).map(move |i| self.child(i)) } + /// Returns if the field is semantically nullable, + /// regardless of whether it actually has null values. pub fn nullable(&self) -> bool { (self.flags / 2) & 1 == 1 } + /// Returns the reference to the underlying dictionary of the schema. + /// Check [ArrowSchema.dictionary](https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.dictionary). + /// + /// This must be `Some` if the schema represents a dictionary-encoded type, `None` otherwise. pub fn dictionary(&self) -> Option<&Self> { unsafe { self.dictionary.as_ref() } } + /// For map types, returns whether the keys within each map value are sorted. + /// + /// Refer to [Arrow Flags](https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.flags) pub fn map_keys_sorted(&self) -> bool { self.flags & 0b00000100 != 0 } + /// For dictionary-encoded types, returns whether the ordering of dictionary indices is semantically meaningful. pub fn dictionary_ordered(&self) -> bool { self.flags & 0b00000001 != 0 } + /// Returns the metadata in the schema as `Key-Value` pairs pub fn metadata(&self) -> Result, ArrowError> { if self.metadata.is_null() { Ok(HashMap::new()) diff --git a/arrow-schema/src/lib.rs b/arrow-schema/src/lib.rs index 27466593d598..d513ca7dd526 100644 --- a/arrow-schema/src/lib.rs +++ b/arrow-schema/src/lib.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#![warn(missing_docs)] //! Arrow logical types mod datatype; diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 9a9ef45d8b24..cc3a8a308a83 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -185,6 +185,7 @@ pub type SchemaRef = Arc; #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Schema { + /// A sequence of fields that describe the schema. pub fields: Fields, /// A map of key-value pairs containing additional meta data. pub metadata: HashMap, diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs index e59ad50dd3f9..8198980cb97a 100644 --- a/arrow-select/src/filter.rs +++ b/arrow-select/src/filter.rs @@ -56,6 +56,7 @@ const FILTER_SLICES_SELECTIVITY_THRESHOLD: f64 = 0.8; pub struct SlicesIterator<'a>(BitSliceIterator<'a>); impl<'a> SlicesIterator<'a> { + /// Creates a new iterator from a [BooleanArray] pub fn new(filter: &'a BooleanArray) -> Self { Self(filter.values().set_slices()) } diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs index a2ab7ba63359..a0520e969a6b 100644 --- a/arrow-select/src/interleave.rs +++ b/arrow-select/src/interleave.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Interleave elements from multiple arrays + use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values}; use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder}; use arrow_array::cast::AsArray; diff --git a/arrow-select/src/lib.rs b/arrow-select/src/lib.rs index 82f57a6af42b..b796821717fe 100644 --- a/arrow-select/src/lib.rs +++ b/arrow-select/src/lib.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#![warn(missing_docs)] //! Arrow selection kernels pub mod concat; diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs index a7848c16a8ec..d1e3c35bfbde 100644 --- a/arrow-select/src/nullif.rs +++ b/arrow-select/src/nullif.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Implements the `nullif` function for Arrow arrays. + use arrow_array::{make_array, Array, ArrayRef, BooleanArray}; use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_unary_op_helper}; use arrow_buffer::{BooleanBuffer, NullBuffer}; diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs index ff2380ef2420..1f317dfd45c6 100644 --- a/arrow-select/src/zip.rs +++ b/arrow-select/src/zip.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Zip two arrays by some boolean mask. Where the mask evaluates `true` values of `truthy` + use crate::filter::SlicesIterator; use arrow_array::*; use arrow_data::transform::MutableArrayData; diff --git a/arrow-string/src/concat_elements.rs b/arrow-string/src/concat_elements.rs index cb60363d3324..41be8a81cb12 100644 --- a/arrow-string/src/concat_elements.rs +++ b/arrow-string/src/concat_elements.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +//! Provides utility functions for concatenation of elements in arrays. use std::sync::Arc; use arrow_array::builder::BufferBuilder; @@ -167,6 +168,11 @@ pub fn concat_elements_utf8_many( Ok(unsafe { builder.build_unchecked() }.into()) } +/// Returns the elementwise concatenation of [`Array`]s. +/// +/// # Errors +/// +/// This function errors if the arrays are of different types. pub fn concat_elements_dyn(left: &dyn Array, right: &dyn Array) -> Result { if left.data_type() != right.data_type() { return Err(ArrowError::ComputeError(format!( diff --git a/arrow-string/src/lib.rs b/arrow-string/src/lib.rs index 4444b37a7742..87e23a7b22ae 100644 --- a/arrow-string/src/lib.rs +++ b/arrow-string/src/lib.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#![warn(missing_docs)] //! Arrow string kernels pub mod concat_elements; diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs index 4a6c5bab90e6..6d82682d724c 100644 --- a/arrow-string/src/like.rs +++ b/arrow-string/src/like.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +//! Provide SQL's LIKE operators for Arrow's string arrays + use crate::predicate::Predicate; use arrow_array::cast::AsArray; use arrow_array::*; diff --git a/arrow/tests/csv.rs b/arrow/tests/csv.rs index fd01f1663955..781b750f34cd 100644 --- a/arrow/tests/csv.rs +++ b/arrow/tests/csv.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use core::str; use std::sync::Arc; use arrow_array::*; @@ -54,6 +55,6 @@ fn test_export_csv_timestamps() { let left = "c1,c2 2019-04-18T20:54:47.378+10:00,2019-04-18T10:54:47.378 2021-10-30T17:59:07+11:00,2021-10-30T06:59:07\n"; - let right = String::from_utf8(sw).unwrap(); + let right = str::from_utf8(&sw).unwrap(); assert_eq!(left, right); } diff --git a/parquet_derive/src/lib.rs b/parquet_derive/src/lib.rs index 038d8fa446e5..a35cebb3a105 100644 --- a/parquet_derive/src/lib.rs +++ b/parquet_derive/src/lib.rs @@ -15,6 +15,10 @@ // specific language governing permissions and limitations // under the License. +//! This crate provides a procedural macro to derive +//! implementations of a RecordWriter and RecordReader + +#![warn(missing_docs)] #![recursion_limit = "128"] extern crate proc_macro;