From de6a7594fb7faab800c82f149fb277d015690e6b Mon Sep 17 00:00:00 2001
From: ByteBaker <42913098+ByteBaker@users.noreply.github.com>
Date: Tue, 24 Sep 2024 01:26:35 +0530
Subject: [PATCH] chore: add docs, part of #37 (#6433)

* chore: add docs, part of #37

- add pragma `#![warn(missing_docs)]` to the following
  - `arrow-array`
  - `arrow-cast`
  - `arrow-csv`
  - `arrow-data`
  - `arrow-json`
  - `arrow-ord`
  - `arrow-pyarrow-integration-testing`
  - `arrow-row`
  - `arrow-schema`
  - `arrow-select`
  - `arrow-string`
  - `arrow`
  - `parquet_derive`

- add docs to those that generated lint warnings

- Remove `bitflags` workaround in `arrow-schema`
At some point, a change in `bitflags v2.3.0` had
started generating lint warnings in `arrow-schema`,

This was handled using a
[workaround](https://github.com/apache/arrow-rs/pull/4233)

[Issue](https://github.com/bitflags/bitflags/issues/356)

`bitflags v2.3.1` fixed the issue hence the
workaround is no longer needed.

* fix: resolve comments on PR #6433
---
 .../src/builder/generic_bytes_builder.rs      |  2 +-
 .../src/builder/generic_bytes_view_builder.rs | 12 +++--
 arrow-cast/src/cast/mod.rs                    |  2 +-
 arrow-csv/src/writer.rs                       |  5 +-
 arrow-data/src/byte_view.rs                   |  1 +
 arrow-data/src/data.rs                        | 41 ++++++++++----
 arrow-data/src/decimal.rs                     |  4 ++
 arrow-data/src/lib.rs                         |  1 +
 arrow-data/src/transform/mod.rs               |  5 ++
 arrow-json/src/reader/mod.rs                  |  2 +-
 arrow-ord/src/lib.rs                          |  1 +
 arrow-ord/src/rank.rs                         |  2 +
 arrow-ord/src/sort.rs                         |  2 +
 arrow-pyarrow-integration-testing/src/lib.rs  |  1 +
 arrow-row/src/lib.rs                          |  1 +
 arrow-schema/src/datatype.rs                  |  4 +-
 arrow-schema/src/error.rs                     | 16 ++++++
 arrow-schema/src/ffi.rs                       | 54 ++++++++++++++-----
 arrow-schema/src/lib.rs                       |  1 +
 arrow-schema/src/schema.rs                    |  1 +
 arrow-select/src/filter.rs                    |  1 +
 arrow-select/src/interleave.rs                |  2 +
 arrow-select/src/lib.rs                       |  1 +
 arrow-select/src/nullif.rs                    |  2 +
 arrow-select/src/zip.rs                       |  2 +
 arrow-string/src/concat_elements.rs           |  6 +++
 arrow-string/src/lib.rs                       |  1 +
 arrow-string/src/like.rs                      |  2 +
 arrow/tests/csv.rs                            |  3 +-
 parquet_derive/src/lib.rs                     |  4 ++
 30 files changed, 147 insertions(+), 35 deletions(-)

diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs
index e85a2fed01e7..a465f3e4d60e 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -537,7 +537,7 @@ mod tests {
         write!(builder, "buz").unwrap();
         builder.append_value("");
         let a = builder.finish();
-        let r: Vec<_> = a.iter().map(|x| x.unwrap()).collect();
+        let r: Vec<_> = a.iter().flatten().collect();
         assert_eq!(r, &["foo", "bar\n", "fizbuz"])
     }
 }
diff --git a/arrow-array/src/builder/generic_bytes_view_builder.rs b/arrow-array/src/builder/generic_bytes_view_builder.rs
index 09277c679c16..d12c2b7db468 100644
--- a/arrow-array/src/builder/generic_bytes_view_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_view_builder.rs
@@ -555,6 +555,8 @@ pub fn make_view(data: &[u8], block_id: u32, offset: u32) -> u128 {
 
 #[cfg(test)]
 mod tests {
+    use core::str;
+
     use super::*;
     use crate::Array;
 
@@ -642,7 +644,7 @@ mod tests {
         let array = v.finish_cloned();
         array.to_data().validate_full().unwrap();
         assert_eq!(array.data_buffers().len(), 5);
-        let actual: Vec<_> = array.iter().map(Option::unwrap).collect();
+        let actual: Vec<_> = array.iter().flatten().collect();
         assert_eq!(
             actual,
             &[
@@ -692,13 +694,13 @@ mod tests {
         let mut exp_builder = StringViewBuilder::new();
         let mut fixed_builder = StringViewBuilder::new().with_fixed_block_size(STARTING_BLOCK_SIZE);
 
-        let long_string = String::from_utf8(vec![b'a'; STARTING_BLOCK_SIZE as usize]).unwrap();
+        let long_string = str::from_utf8(&[b'a'; STARTING_BLOCK_SIZE as usize]).unwrap();
 
         for i in 0..9 {
             // 8k, 16k, 32k, 64k, 128k, 256k, 512k, 1M, 2M
             for _ in 0..(2_u32.pow(i)) {
-                exp_builder.append_value(&long_string);
-                fixed_builder.append_value(&long_string);
+                exp_builder.append_value(long_string);
+                fixed_builder.append_value(long_string);
             }
             exp_builder.flush_in_progress();
             fixed_builder.flush_in_progress();
@@ -721,7 +723,7 @@ mod tests {
         }
 
         // Add one more value, and the buffer stop growing.
-        exp_builder.append_value(&long_string);
+        exp_builder.append_value(long_string);
         exp_builder.flush_in_progress();
         assert_eq!(
             exp_builder.completed.last().unwrap().capacity(),
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 25ef243e18e4..b751c81ee440 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -9394,7 +9394,7 @@ mod tests {
             Some(vec![Some(0), None, Some(2)]),
         ]);
         let a = cast_with_options(&array, &DataType::Utf8, &options).unwrap();
-        let r: Vec<_> = a.as_string::<i32>().iter().map(|x| x.unwrap()).collect();
+        let r: Vec<_> = a.as_string::<i32>().iter().flatten().collect();
         assert_eq!(r, &["[0, 1, 2]", "[0, null, 2]"]);
     }
     #[test]
diff --git a/arrow-csv/src/writer.rs b/arrow-csv/src/writer.rs
index dded475aa2aa..eae2133a4623 100644
--- a/arrow-csv/src/writer.rs
+++ b/arrow-csv/src/writer.rs
@@ -442,6 +442,7 @@ mod tests {
     };
     use arrow_array::types::*;
     use arrow_buffer::i256;
+    use core::str;
     use std::io::{Cursor, Read, Seek};
     use std::sync::Arc;
 
@@ -508,7 +509,7 @@ Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,cupcakes
 consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378,06:51:20,cupcakes
 sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
 "#;
-        assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
+        assert_eq!(expected, str::from_utf8(&buffer).unwrap());
     }
 
     #[test]
@@ -558,7 +559,7 @@ sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555,23:46:03,foo
 ,
 0.290472,0.290472
 "#;
-        assert_eq!(expected.to_string(), String::from_utf8(buffer).unwrap());
+        assert_eq!(expected, str::from_utf8(&buffer).unwrap());
     }
 
     #[test]
diff --git a/arrow-data/src/byte_view.rs b/arrow-data/src/byte_view.rs
index a2e9d135fdcb..6f6d6d175689 100644
--- a/arrow-data/src/byte_view.rs
+++ b/arrow-data/src/byte_view.rs
@@ -40,6 +40,7 @@ pub struct ByteView {
 
 impl ByteView {
     #[inline(always)]
+    /// Convert `ByteView` to `u128` by concatenating the fields
     pub fn as_u128(self) -> u128 {
         (self.length as u128)
             | ((self.prefix as u128) << 32)
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 33cbc897a6c1..8c9e002e219b 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -231,6 +231,7 @@ pub struct ArrayData {
     nulls: Option<NullBuffer>,
 }
 
+/// A thread-safe, shared reference to the Arrow array data.
 pub type ArrayDataRef = Arc<ArrayData>;
 
 impl ArrayData {
@@ -1747,7 +1748,12 @@ pub enum BufferSpec {
     /// for array slicing and interoperability with `Vec`, which cannot be over-aligned.
     ///
     /// Note that these alignment requirements will vary between architectures
-    FixedWidth { byte_width: usize, alignment: usize },
+    FixedWidth {
+        /// The width of each element in bytes
+        byte_width: usize,
+        /// The alignment required by Rust for an array of the corresponding primitive
+        alignment: usize,
+    },
     /// Variable width, such as string data for utf8 data
     VariableWidth,
     /// Buffer holds a bitmap.
@@ -1783,6 +1789,7 @@ pub struct ArrayDataBuilder {
 
 impl ArrayDataBuilder {
     #[inline]
+    /// Creates a new array data builder
     pub const fn new(data_type: DataType) -> Self {
         Self {
             data_type,
@@ -1796,17 +1803,20 @@ impl ArrayDataBuilder {
         }
     }
 
+    /// Creates a new array data builder from an existing one, changing the data type
     pub fn data_type(self, data_type: DataType) -> Self {
         Self { data_type, ..self }
     }
 
     #[inline]
     #[allow(clippy::len_without_is_empty)]
+    /// Sets the length of the [ArrayData]
     pub const fn len(mut self, n: usize) -> Self {
         self.len = n;
         self
     }
 
+    /// Sets the null buffer of the [ArrayData]
     pub fn nulls(mut self, nulls: Option<NullBuffer>) -> Self {
         self.nulls = nulls;
         self.null_count = None;
@@ -1814,43 +1824,51 @@ impl ArrayDataBuilder {
         self
     }
 
+    /// Sets the null count of the [ArrayData]
     pub fn null_count(mut self, null_count: usize) -> Self {
         self.null_count = Some(null_count);
         self
     }
 
+    /// Sets the `null_bit_buffer` of the [ArrayData]
     pub fn null_bit_buffer(mut self, buf: Option<Buffer>) -> Self {
         self.nulls = None;
         self.null_bit_buffer = buf;
         self
     }
 
+    /// Sets the offset of the [ArrayData]
     #[inline]
     pub const fn offset(mut self, n: usize) -> Self {
         self.offset = n;
         self
     }
 
+    /// Sets the buffers of the [ArrayData]
     pub fn buffers(mut self, v: Vec<Buffer>) -> Self {
         self.buffers = v;
         self
     }
 
+    /// Adds a single buffer to the [ArrayData]'s buffers
     pub fn add_buffer(mut self, b: Buffer) -> Self {
         self.buffers.push(b);
         self
     }
 
-    pub fn add_buffers(mut self, bs: Vec<Buffer>) -> Self {
+    /// Adds multiple buffers to the [ArrayData]'s buffers
+    pub fn add_buffers<I: IntoIterator<Item = Buffer>>(mut self, bs: I) -> Self {
         self.buffers.extend(bs);
         self
     }
 
+    /// Sets the child data of the [ArrayData]
     pub fn child_data(mut self, v: Vec<ArrayData>) -> Self {
         self.child_data = v;
         self
     }
 
+    /// Adds a single child data to the [ArrayData]'s child data
     pub fn add_child_data(mut self, r: ArrayData) -> Self {
         self.child_data.push(r);
         self
@@ -1873,14 +1891,17 @@ impl ArrayDataBuilder {
 
     /// Same as [`Self::build_unchecked`] but ignoring `force_validate` feature flag
     unsafe fn build_impl(self) -> ArrayData {
-        let nulls = self.nulls.or_else(|| {
-            let buffer = self.null_bit_buffer?;
-            let buffer = BooleanBuffer::new(buffer, self.offset, self.len);
-            Some(match self.null_count {
-                Some(n) => NullBuffer::new_unchecked(buffer, n),
-                None => NullBuffer::new(buffer),
+        let nulls = self
+            .nulls
+            .or_else(|| {
+                let buffer = self.null_bit_buffer?;
+                let buffer = BooleanBuffer::new(buffer, self.offset, self.len);
+                Some(match self.null_count {
+                    Some(n) => NullBuffer::new_unchecked(buffer, n),
+                    None => NullBuffer::new(buffer),
+                })
             })
-        });
+            .filter(|b| b.null_count() != 0);
 
         ArrayData {
             data_type: self.data_type,
@@ -1888,7 +1909,7 @@ impl ArrayDataBuilder {
             offset: self.offset,
             buffers: self.buffers,
             child_data: self.child_data,
-            nulls: nulls.filter(|b| b.null_count() != 0),
+            nulls,
         }
     }
 
diff --git a/arrow-data/src/decimal.rs b/arrow-data/src/decimal.rs
index d9028591aaaa..fe19db641236 100644
--- a/arrow-data/src/decimal.rs
+++ b/arrow-data/src/decimal.rs
@@ -15,6 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Defines maximum and minimum values for `decimal256` and `decimal128` types for varying precisions.
+//!
+//! Also provides functions to validate if a given decimal value is within the valid range of the decimal type.
+
 use arrow_buffer::i256;
 use arrow_schema::ArrowError;
 
diff --git a/arrow-data/src/lib.rs b/arrow-data/src/lib.rs
index 59a049fe96cf..a7feca6cd976 100644
--- a/arrow-data/src/lib.rs
+++ b/arrow-data/src/lib.rs
@@ -19,6 +19,7 @@
 //!
 //! For a higher-level, strongly-typed interface see [arrow_array](https://docs.rs/arrow_array)
 
+#![warn(missing_docs)]
 mod data;
 pub use data::*;
 
diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs
index edc68f1a107f..c74b0c43481a 100644
--- a/arrow-data/src/transform/mod.rs
+++ b/arrow-data/src/transform/mod.rs
@@ -15,6 +15,11 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Low-level array data abstractions.
+//!
+//! Provides utilities for creating, manipulating, and converting Arrow arrays
+//! made of primitive types, strings, and nested types.
+
 use super::{data::new_buffers, ArrayData, ArrayDataBuilder, ByteView};
 use crate::bit_mask::set_bits;
 use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 97d9c8962618..bcacf6f706b8 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -1007,7 +1007,7 @@ mod tests {
         let map_values = map.values().as_list::<i32>();
         assert_eq!(map.value_offsets(), &[0, 1, 3, 5]);
 
-        let k: Vec<_> = map_keys.iter().map(|x| x.unwrap()).collect();
+        let k: Vec<_> = map_keys.iter().flatten().collect();
         assert_eq!(&k, &["a", "a", "b", "c", "a"]);
 
         let list_values = map_values.values().as_string::<i32>();
diff --git a/arrow-ord/src/lib.rs b/arrow-ord/src/lib.rs
index 8fe4ecbc05aa..93fcb98a00f5 100644
--- a/arrow-ord/src/lib.rs
+++ b/arrow-ord/src/lib.rs
@@ -43,6 +43,7 @@
 //! ```
 //!
 
+#![warn(missing_docs)]
 pub mod cmp;
 #[doc(hidden)]
 pub mod comparison;
diff --git a/arrow-ord/src/rank.rs b/arrow-ord/src/rank.rs
index 51b0b5b91ba9..ecc693bab4e4 100644
--- a/arrow-ord/src/rank.rs
+++ b/arrow-ord/src/rank.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Provides `rank` function to assign a rank to each value in an array
+
 use arrow_array::cast::AsArray;
 use arrow_array::types::*;
 use arrow_array::{downcast_primitive_array, Array, ArrowNativeTypeOp, GenericByteArray};
diff --git a/arrow-ord/src/sort.rs b/arrow-ord/src/sort.rs
index 168f82747c91..60fc4a918525 100644
--- a/arrow-ord/src/sort.rs
+++ b/arrow-ord/src/sort.rs
@@ -635,7 +635,9 @@ where
 /// One column to be used in lexicographical sort
 #[derive(Clone, Debug)]
 pub struct SortColumn {
+    /// The column to sort
     pub values: ArrayRef,
+    /// Sort options for this column
     pub options: Option<SortOptions>,
 }
 
diff --git a/arrow-pyarrow-integration-testing/src/lib.rs b/arrow-pyarrow-integration-testing/src/lib.rs
index 918fa74e3083..e12c1389e66f 100644
--- a/arrow-pyarrow-integration-testing/src/lib.rs
+++ b/arrow-pyarrow-integration-testing/src/lib.rs
@@ -18,6 +18,7 @@
 //! This library demonstrates a minimal usage of Rust's C data interface to pass
 //! arrays from and to Python.
 
+#![warn(missing_docs)]
 use std::sync::Arc;
 
 use arrow::array::new_empty_array;
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 2d9af757550e..ac859e90e126 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -125,6 +125,7 @@
 //! [compared]: PartialOrd
 //! [compare]: PartialOrd
 
+#![warn(missing_docs)]
 use std::cmp::Ordering;
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
diff --git a/arrow-schema/src/datatype.rs b/arrow-schema/src/datatype.rs
index b9cfc3d8a848..32eb95aef8a4 100644
--- a/arrow-schema/src/datatype.rs
+++ b/arrow-schema/src/datatype.rs
@@ -420,11 +420,13 @@ pub enum IntervalUnit {
     MonthDayNano,
 }
 
-// Sparse or Dense union layouts
+/// Sparse or Dense union layouts
 #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Copy)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub enum UnionMode {
+    /// Sparse union layout
     Sparse,
+    /// Dense union layout
     Dense,
 }
 
diff --git a/arrow-schema/src/error.rs b/arrow-schema/src/error.rs
index 5e632d051f0f..982dd026a04d 100644
--- a/arrow-schema/src/error.rs
+++ b/arrow-schema/src/error.rs
@@ -26,23 +26,39 @@ use std::error::Error;
 pub enum ArrowError {
     /// Returned when functionality is not yet available.
     NotYetImplemented(String),
+    /// Wraps an external error.
     ExternalError(Box<dyn Error + Send + Sync>),
+    /// Error during casting from one type to another.
     CastError(String),
+    /// Memory or buffer error.
     MemoryError(String),
+    /// Error during parsing from a string.
     ParseError(String),
+    /// Error during schema-related operations.
     SchemaError(String),
+    /// Error during computation.
     ComputeError(String),
+    /// Error during division by zero.
     DivideByZero,
+    /// Error when an arithmetic operation overflows.
     ArithmeticOverflow(String),
+    /// Error during CSV-related operations.
     CsvError(String),
+    /// Error during JSON-related operations.
     JsonError(String),
+    /// Error during IO operations.
     IoError(String, std::io::Error),
+    /// Error during IPC operations in `arrow-ipc` or `arrow-flight`.
     IpcError(String),
+    /// Error indicating that an unexpected or bad argument was passed to a function.
     InvalidArgumentError(String),
+    /// Error during Parquet operations.
     ParquetError(String),
     /// Error during import or export to/from the C Data Interface
     CDataInterface(String),
+    /// Error when a dictionary key is bigger than the key type
     DictionaryKeyOverflowError,
+    /// Error when the run end index in a REE array is bigger than the array length
     RunEndIndexOverflowError,
 }
 
diff --git a/arrow-schema/src/ffi.rs b/arrow-schema/src/ffi.rs
index c68849059f8c..e12c37da4898 100644
--- a/arrow-schema/src/ffi.rs
+++ b/arrow-schema/src/ffi.rs
@@ -37,25 +37,27 @@
 use crate::{
     ArrowError, DataType, Field, FieldRef, IntervalUnit, Schema, TimeUnit, UnionFields, UnionMode,
 };
+use bitflags::bitflags;
 use std::sync::Arc;
 use std::{
     collections::HashMap,
     ffi::{c_char, c_void, CStr, CString},
 };
 
-#[allow(clippy::assign_op_pattern)]
-/// Workaround <https://github.com/bitflags/bitflags/issues/356>
-mod flags {
-    use bitflags::bitflags;
-    bitflags! {
-        pub struct Flags: i64 {
-            const DICTIONARY_ORDERED = 0b00000001;
-            const NULLABLE = 0b00000010;
-            const MAP_KEYS_SORTED = 0b00000100;
-        }
+bitflags! {
+    /// Flags for [`FFI_ArrowSchema`]
+    ///
+    /// Old Workaround at <https://github.com/bitflags/bitflags/issues/356>
+    /// is no longer required as `bitflags` [fixed the issue](https://github.com/bitflags/bitflags/pull/355).
+    pub struct Flags: i64 {
+        /// Indicates that the dictionary is ordered
+        const DICTIONARY_ORDERED = 0b00000001;
+        /// Indicates that the field is nullable
+        const NULLABLE = 0b00000010;
+        /// Indicates that the map keys are sorted
+        const MAP_KEYS_SORTED = 0b00000100;
     }
 }
-pub use flags::*;
 
 /// ABI-compatible struct for `ArrowSchema` from C Data Interface
 /// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
@@ -70,10 +72,12 @@ pub use flags::*;
 ///
 #[repr(C)]
 #[derive(Debug)]
+#[allow(non_camel_case_types)]
 pub struct FFI_ArrowSchema {
     format: *const c_char,
     name: *const c_char,
     metadata: *const c_char,
+    /// Refer to [Arrow Flags](https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.flags)
     flags: i64,
     n_children: i64,
     children: *mut *mut FFI_ArrowSchema,
@@ -155,16 +159,19 @@ impl FFI_ArrowSchema {
         Ok(this)
     }
 
+    /// Set the name of the schema
     pub fn with_name(mut self, name: &str) -> Result<Self, ArrowError> {
         self.name = CString::new(name).unwrap().into_raw();
         Ok(self)
     }
 
+    /// Set the flags of the schema
     pub fn with_flags(mut self, flags: Flags) -> Result<Self, ArrowError> {
         self.flags = flags.bits();
         Ok(self)
     }
 
+    /// Add metadata to the schema
     pub fn with_metadata<I, S>(mut self, metadata: I) -> Result<Self, ArrowError>
     where
         I: IntoIterator<Item = (S, S)>,
@@ -237,6 +244,7 @@ impl FFI_ArrowSchema {
         std::ptr::replace(schema, Self::empty())
     }
 
+    /// Create an empty [`FFI_ArrowSchema`]
     pub fn empty() -> Self {
         Self {
             format: std::ptr::null_mut(),
@@ -251,7 +259,7 @@ impl FFI_ArrowSchema {
         }
     }
 
-    /// returns the format of this schema.
+    /// Returns the format of this schema.
     pub fn format(&self) -> &str {
         assert!(!self.format.is_null());
         // safe because the lifetime of `self.format` equals `self`
@@ -260,7 +268,7 @@ impl FFI_ArrowSchema {
             .expect("The external API has a non-utf8 as format")
     }
 
-    /// returns the name of this schema.
+    /// Returns the name of this schema.
     pub fn name(&self) -> Option<&str> {
         if self.name.is_null() {
             None
@@ -274,35 +282,55 @@ impl FFI_ArrowSchema {
         }
     }
 
+    /// Returns the flags of this schema.
     pub fn flags(&self) -> Option<Flags> {
         Flags::from_bits(self.flags)
     }
 
+    /// Returns the child of this schema at `index`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `index` is greater than or equal to the number of children.
+    ///
+    /// This is to make sure that the unsafe acces to raw pointer is sound.
     pub fn child(&self, index: usize) -> &Self {
         assert!(index < self.n_children as usize);
         unsafe { self.children.add(index).as_ref().unwrap().as_ref().unwrap() }
     }
 
+    /// Returns an iterator to the schema's children.
     pub fn children(&self) -> impl Iterator<Item = &Self> {
         (0..self.n_children as usize).map(move |i| self.child(i))
     }
 
+    /// Returns if the field is semantically nullable,
+    /// regardless of whether it actually has null values.
     pub fn nullable(&self) -> bool {
         (self.flags / 2) & 1 == 1
     }
 
+    /// Returns the reference to the underlying dictionary of the schema.
+    /// Check [ArrowSchema.dictionary](https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.dictionary).
+    ///
+    /// This must be `Some` if the schema represents a dictionary-encoded type, `None` otherwise.
     pub fn dictionary(&self) -> Option<&Self> {
         unsafe { self.dictionary.as_ref() }
     }
 
+    /// For map types, returns whether the keys within each map value are sorted.
+    ///
+    /// Refer to [Arrow Flags](https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema.flags)
     pub fn map_keys_sorted(&self) -> bool {
         self.flags & 0b00000100 != 0
     }
 
+    /// For dictionary-encoded types, returns whether the ordering of dictionary indices is semantically meaningful.
     pub fn dictionary_ordered(&self) -> bool {
         self.flags & 0b00000001 != 0
     }
 
+    /// Returns the metadata in the schema as `Key-Value` pairs
     pub fn metadata(&self) -> Result<HashMap<String, String>, ArrowError> {
         if self.metadata.is_null() {
             Ok(HashMap::new())
diff --git a/arrow-schema/src/lib.rs b/arrow-schema/src/lib.rs
index 27466593d598..d513ca7dd526 100644
--- a/arrow-schema/src/lib.rs
+++ b/arrow-schema/src/lib.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![warn(missing_docs)]
 //! Arrow logical types
 
 mod datatype;
diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs
index 9a9ef45d8b24..cc3a8a308a83 100644
--- a/arrow-schema/src/schema.rs
+++ b/arrow-schema/src/schema.rs
@@ -185,6 +185,7 @@ pub type SchemaRef = Arc<Schema>;
 #[derive(Debug, Clone, PartialEq, Eq)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct Schema {
+    /// A sequence of fields that describe the schema.
     pub fields: Fields,
     /// A map of key-value pairs containing additional meta data.
     pub metadata: HashMap<String, String>,
diff --git a/arrow-select/src/filter.rs b/arrow-select/src/filter.rs
index e59ad50dd3f9..8198980cb97a 100644
--- a/arrow-select/src/filter.rs
+++ b/arrow-select/src/filter.rs
@@ -56,6 +56,7 @@ const FILTER_SLICES_SELECTIVITY_THRESHOLD: f64 = 0.8;
 pub struct SlicesIterator<'a>(BitSliceIterator<'a>);
 
 impl<'a> SlicesIterator<'a> {
+    /// Creates a new iterator from a [BooleanArray]
     pub fn new(filter: &'a BooleanArray) -> Self {
         Self(filter.values().set_slices())
     }
diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index a2ab7ba63359..a0520e969a6b 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Interleave elements from multiple arrays
+
 use crate::dictionary::{merge_dictionary_values, should_merge_dictionary_values};
 use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder};
 use arrow_array::cast::AsArray;
diff --git a/arrow-select/src/lib.rs b/arrow-select/src/lib.rs
index 82f57a6af42b..b796821717fe 100644
--- a/arrow-select/src/lib.rs
+++ b/arrow-select/src/lib.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![warn(missing_docs)]
 //! Arrow selection kernels
 
 pub mod concat;
diff --git a/arrow-select/src/nullif.rs b/arrow-select/src/nullif.rs
index a7848c16a8ec..d1e3c35bfbde 100644
--- a/arrow-select/src/nullif.rs
+++ b/arrow-select/src/nullif.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Implements the `nullif` function for Arrow arrays.
+
 use arrow_array::{make_array, Array, ArrayRef, BooleanArray};
 use arrow_buffer::buffer::{bitwise_bin_op_helper, bitwise_unary_op_helper};
 use arrow_buffer::{BooleanBuffer, NullBuffer};
diff --git a/arrow-select/src/zip.rs b/arrow-select/src/zip.rs
index ff2380ef2420..1f317dfd45c6 100644
--- a/arrow-select/src/zip.rs
+++ b/arrow-select/src/zip.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Zip two arrays by some boolean mask. Where the mask evaluates `true` values of `truthy`
+
 use crate::filter::SlicesIterator;
 use arrow_array::*;
 use arrow_data::transform::MutableArrayData;
diff --git a/arrow-string/src/concat_elements.rs b/arrow-string/src/concat_elements.rs
index cb60363d3324..41be8a81cb12 100644
--- a/arrow-string/src/concat_elements.rs
+++ b/arrow-string/src/concat_elements.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Provides utility functions for concatenation of elements in arrays.
 use std::sync::Arc;
 
 use arrow_array::builder::BufferBuilder;
@@ -167,6 +168,11 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
     Ok(unsafe { builder.build_unchecked() }.into())
 }
 
+/// Returns the elementwise concatenation of [`Array`]s.
+///
+/// # Errors
+///
+/// This function errors if the arrays are of different types.
 pub fn concat_elements_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef, ArrowError> {
     if left.data_type() != right.data_type() {
         return Err(ArrowError::ComputeError(format!(
diff --git a/arrow-string/src/lib.rs b/arrow-string/src/lib.rs
index 4444b37a7742..87e23a7b22ae 100644
--- a/arrow-string/src/lib.rs
+++ b/arrow-string/src/lib.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#![warn(missing_docs)]
 //! Arrow string kernels
 
 pub mod concat_elements;
diff --git a/arrow-string/src/like.rs b/arrow-string/src/like.rs
index 4a6c5bab90e6..6d82682d724c 100644
--- a/arrow-string/src/like.rs
+++ b/arrow-string/src/like.rs
@@ -15,6 +15,8 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Provide SQL's LIKE operators for Arrow's string arrays
+
 use crate::predicate::Predicate;
 use arrow_array::cast::AsArray;
 use arrow_array::*;
diff --git a/arrow/tests/csv.rs b/arrow/tests/csv.rs
index fd01f1663955..781b750f34cd 100644
--- a/arrow/tests/csv.rs
+++ b/arrow/tests/csv.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use core::str;
 use std::sync::Arc;
 
 use arrow_array::*;
@@ -54,6 +55,6 @@ fn test_export_csv_timestamps() {
     let left = "c1,c2
 2019-04-18T20:54:47.378+10:00,2019-04-18T10:54:47.378
 2021-10-30T17:59:07+11:00,2021-10-30T06:59:07\n";
-    let right = String::from_utf8(sw).unwrap();
+    let right = str::from_utf8(&sw).unwrap();
     assert_eq!(left, right);
 }
diff --git a/parquet_derive/src/lib.rs b/parquet_derive/src/lib.rs
index 038d8fa446e5..a35cebb3a105 100644
--- a/parquet_derive/src/lib.rs
+++ b/parquet_derive/src/lib.rs
@@ -15,6 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! This crate provides a procedural macro to derive
+//! implementations of a RecordWriter and RecordReader
+
+#![warn(missing_docs)]
 #![recursion_limit = "128"]
 
 extern crate proc_macro;