-
Notifications
You must be signed in to change notification settings - Fork 81
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add arrow_schema_utility (#301)
# Rationale for this change There are situations where we infer arrow schemas using the official arrow crate functions which will produce arrow schemas that are not fully posql compatible. # What changes are included in this PR? This patch adds a function to give us posql compatible arrow schemas: arrow_schema_utility::get_posql_compatible_schema() # Are these changes tested? Yes
- Loading branch information
Showing
2 changed files
with
40 additions
and
0 deletions.
There are no files selected for viewing
37 changes: 37 additions & 0 deletions
37
crates/proof-of-sql/src/base/database/arrow_schema_utility.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
//! This module provides utility functions for working with Arrow schemas in the context of Proof of SQL. | ||
//! It includes functionality to convert Arrow schemas to PoSQL-compatible formats. | ||
|
||
use alloc::sync::Arc; | ||
use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; | ||
|
||
/// Converts an Arrow schema to a PoSQL-compatible schema. | ||
/// | ||
/// This function takes an Arrow `SchemaRef` and returns a new `SchemaRef` where | ||
/// floating-point data types (Float16, Float32, Float64) are converted to Decimal256(75, 30). | ||
/// Other data types remain unchanged. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `schema` - The input Arrow schema to convert. | ||
/// | ||
/// # Returns | ||
/// | ||
/// A new `SchemaRef` with PoSQL-compatible data types. | ||
#[must_use] | ||
pub fn get_posql_compatible_schema(schema: &SchemaRef) -> SchemaRef { | ||
let new_fields: Vec<Field> = schema | ||
.fields() | ||
.iter() | ||
.map(|field| { | ||
let new_data_type = match field.data_type() { | ||
DataType::Float16 | DataType::Float32 | DataType::Float64 => { | ||
DataType::Decimal256(75, 30) | ||
} | ||
_ => field.data_type().clone(), | ||
}; | ||
Field::new(field.name(), new_data_type, field.is_nullable()) | ||
}) | ||
.collect(); | ||
|
||
Arc::new(Schema::new(new_fields)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters