Skip to content

Commit

Permalink
Improve MetadataFetch and AsyncFileReader docs
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Oct 3, 2024
1 parent b9cf3c5 commit c8f648b
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 2 deletions.
38 changes: 37 additions & 1 deletion parquet/src/arrow/async_reader/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,44 @@ use std::future::Future;
use std::ops::Range;

/// A data source that can be used with [`MetadataLoader`] to load [`ParquetMetaData`]
///
/// Note that implementation is are provided for [`AsyncFileReader`].
///
/// # Example `MetadataFetch` for a custom async data source
///
/// ```rust
/// # use parquet::errors::Result;
/// # use parquet::arrow::async_reader::MetadataFetch;
/// # use bytes::Bytes;
/// # use std::ops::Range;
/// # use std::io::SeekFrom;
/// # use futures::future::BoxFuture;
/// # use futures::FutureExt;
/// # use tokio::io::{AsyncReadExt, AsyncSeekExt};
/// // Adapter that implements the API for reading bytes from an async source (in
/// // this case a tokio::fs::File)
/// struct TokioFileMetadata {
/// file: tokio::fs::File,
/// }
/// impl MetadataFetch for TokioFileMetadata {
/// fn fetch(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>> {
/// // return a future that fetches data in range
/// async move {
/// let mut buf = vec![0; range.end - range.start]; // target buffer
/// // seek to the start of the range and read the data
/// self.file.seek(SeekFrom::Start(range.start as u64)).await?;
/// self.file.read_exact(&mut buf).await?;
/// Ok(Bytes::from(buf)) // convert to Bytes
/// }
/// .boxed() // turn into BoxedFuture, using FutureExt::boxed
/// }
/// }
///```
pub trait MetadataFetch {
/// Fetches a range of bytes asynchronously
/// Return a future that fetches the specified range of bytes asynchronously
///
/// Note the returned type is a boxed future, often created by
/// [FutureExt::boxed]. See the trait documentation for an example
fn fetch(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>>;
}

Expand Down
12 changes: 12 additions & 0 deletions parquet/src/arrow/async_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,18 @@ use crate::arrow::schema::ParquetField;
pub use store::*;

/// The asynchronous interface used by [`ParquetRecordBatchStream`] to read parquet files
///
/// Notes:
///
/// 1. There is a default implementation for types that implement [`AsyncRead`]
/// and [`AsyncSeek`], for example [`tokio::fs::File`].
///
/// 2. [`ParquetObjectReader`], available when the `object_store` crate feature
/// is activated, implements this interface for [`ObjectStore`].
///
/// [`ObjectStore`]: object_store::ObjectStore
///
/// [`tokio::fs::File`]: https://docs.rs/tokio/latest/tokio/fs/struct.File.html
pub trait AsyncFileReader: Send {
/// Retrieve the bytes in `range`
fn get_bytes(&mut self, range: Range<usize>) -> BoxFuture<'_, Result<Bytes>>;
Expand Down
9 changes: 8 additions & 1 deletion parquet/src/file/metadata/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,18 @@ use crate::arrow::async_reader::MetadataFetch;
/// See [`crate::file::metadata::ParquetMetaDataWriter#output-format`] for a description of
/// the Parquet metadata.
///
/// Parquet metadata is not necessarily contiguous in the files: part is stored
/// in the footer (the last bytes of the file), but other portions (such as the
/// PageIndex) can be stored elsewhere.
///
/// This reader handles reading the footer as well as the non contiguous parts
/// of the metadata such as the page indexes.
///
/// # Example
/// ```no_run
/// # use parquet::file::metadata::ParquetMetaDataReader;
/// # fn open_parquet_file(path: &str) -> std::fs::File { unimplemented!(); }
/// // read parquet metadata including page indexes
/// // read parquet metadata including page indexes from a file
/// let file = open_parquet_file("some_path.parquet");
/// let mut reader = ParquetMetaDataReader::new()
/// .with_page_indexes(true);
Expand Down

0 comments on commit c8f648b

Please sign in to comment.