diff --git a/file_format.rs b/file_format.rs index 1564a14a..e17f753c 100644 --- a/file_format.rs +++ b/file_format.rs @@ -5,7 +5,6 @@ use std::sync::Arc; use crate::reader::metadata::read_metadata_async; use arrow::datatypes::Schema; -use arrow::error::ArrowError; use datafusion::arrow::datatypes::SchemaRef; use datafusion::common::{FileType, Statistics}; use datafusion::datasource::file_format::FileFormat; @@ -18,20 +17,16 @@ use futures::TryStreamExt; use async_trait::async_trait; use futures_util::StreamExt; +use object_store::path::Path; use object_store::{ObjectMeta, ObjectStore}; use super::object_store_reader::ObjectStoreReader; use super::physical_exec::OrcExec; -async fn fetch_schema( - store: &Arc, - file: &ObjectMeta, -) -> Result<(object_store::path::Path, Schema)> { +async fn fetch_schema(store: &Arc, file: &ObjectMeta) -> Result<(Path, Schema)> { let loc_path = file.location.clone(); let mut reader = ObjectStoreReader::new(Arc::clone(store), file.clone()); - let metadata = read_metadata_async(&mut reader) - .await - .map_err(ArrowError::from)?; + let metadata = read_metadata_async(&mut reader).await?; let schema = metadata .root_data_type() .create_arrow_schema(&HashMap::default()); diff --git a/mod.rs b/mod.rs index 6ac47c72..7d008191 100644 --- a/mod.rs +++ b/mod.rs @@ -7,13 +7,15 @@ use datafusion::dataframe::DataFrame; use datafusion::datasource::listing::{ ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, }; -use datafusion::error::Result; +use datafusion::error::{DataFusionError, Result}; use datafusion::execution::config::SessionConfig; use datafusion::execution::context::{DataFilePaths, SessionContext, SessionState}; use datafusion::execution::options::ReadOptions; use async_trait::async_trait; +use crate::error::OrcError; + use self::file_format::OrcFormat; mod file_format; @@ -122,6 +124,12 @@ impl SessionContextOrcExt for SessionContext { } } +impl From for DataFusionError { + fn from(value: OrcError) -> Self { + DataFusionError::External(Box::new(value)) + } +} + #[cfg(test)] mod tests { use datafusion::assert_batches_sorted_eq; diff --git a/object_store_reader.rs b/object_store_reader.rs index a9c6988f..5d374dfd 100644 --- a/object_store_reader.rs +++ b/object_store_reader.rs @@ -7,6 +7,7 @@ use futures::{FutureExt, TryFutureExt}; use object_store::{ObjectMeta, ObjectStore}; +/// Implements [`AsyncChunkReader`] to allow reading ORC files via `object_store` API. pub struct ObjectStoreReader { store: Arc, file: ObjectMeta,