From e06f41e40ac8f436242cf0c0681b56556bf0118d Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Thu, 13 Jun 2024 15:02:52 -0700 Subject: [PATCH 01/24] feat: initial support for timestamp --- .../src/base/commitment/column_bounds.rs | 5 +++++ .../src/base/commitment/committable_column.rs | 12 ++++++++++++ .../database/arrow_array_to_column_conversion.rs | 12 ++++-------- crates/proof-of-sql/src/base/database/column.rs | 15 ++++++++++++++- .../base/database/owned_and_arrow_conversions.rs | 3 ++- .../src/base/database/owned_column.rs | 5 +++++ .../base/database/owned_table_test_accessor.rs | 1 + .../src/base/database/test_accessor_utility.rs | 14 ++++++++++++-- .../src/base/polynomial/multilinear_extension.rs | 4 ++++ crates/proof-of-sql/src/base/scalar/mod.rs | 1 + .../dory/dory_commitment_helper_gpu.rs | 1 + .../proof-of-sql/src/sql/ast/dense_filter_util.rs | 3 +++ .../src/sql/ast/filter_result_expr.rs | 1 + crates/proof-of-sql/src/sql/ast/group_by_util.rs | 2 ++ .../src/sql/proof/provable_query_result.rs | 7 +++++++ .../src/sql/proof/provable_result_column.rs | 2 ++ .../src/sql/proof/verifiable_query_result.rs | 3 ++- 17 files changed, 78 insertions(+), 13 deletions(-) diff --git a/crates/proof-of-sql/src/base/commitment/column_bounds.rs b/crates/proof-of-sql/src/base/commitment/column_bounds.rs index 055878cce..6af96d820 100644 --- a/crates/proof-of-sql/src/base/commitment/column_bounds.rs +++ b/crates/proof-of-sql/src/base/commitment/column_bounds.rs @@ -207,6 +207,8 @@ pub enum ColumnBounds { BigInt(Bounds), /// The bounds of an Int128 column. Int128(Bounds), + /// The bounds of a Timestamp column. + Timestamp(Bounds), } impl ColumnBounds { @@ -219,6 +221,9 @@ impl ColumnBounds { CommittableColumn::Int(ints) => ColumnBounds::Int(Bounds::from_iter(*ints)), CommittableColumn::BigInt(ints) => ColumnBounds::BigInt(Bounds::from_iter(*ints)), CommittableColumn::Int128(ints) => ColumnBounds::Int128(Bounds::from_iter(*ints)), + CommittableColumn::Timestamp(times) => { + ColumnBounds::Timestamp(Bounds::from_iter(*times)) + } CommittableColumn::Boolean(_) | CommittableColumn::Decimal75(_, _, _) | CommittableColumn::Scalar(_) diff --git a/crates/proof-of-sql/src/base/commitment/committable_column.rs b/crates/proof-of-sql/src/base/commitment/committable_column.rs index bf83bcfd7..a3b33452a 100644 --- a/crates/proof-of-sql/src/base/commitment/committable_column.rs +++ b/crates/proof-of-sql/src/base/commitment/committable_column.rs @@ -37,6 +37,8 @@ pub enum CommittableColumn<'a> { Scalar(Vec<[u64; 4]>), /// Column of limbs for committing to scalars, hashed from a VarChar column. VarChar(Vec<[u64; 4]>), + /// Borrowed Timestamp column, mapped to `u64`. + Timestamp(&'a [u64]), } impl<'a> CommittableColumn<'a> { @@ -51,6 +53,7 @@ impl<'a> CommittableColumn<'a> { CommittableColumn::Scalar(col) => col.len(), CommittableColumn::VarChar(col) => col.len(), CommittableColumn::Boolean(col) => col.len(), + CommittableColumn::Timestamp(col) => col.len(), } } @@ -78,6 +81,7 @@ impl<'a> From<&CommittableColumn<'a>> for ColumnType { CommittableColumn::Scalar(_) => ColumnType::Scalar, CommittableColumn::VarChar(_) => ColumnType::VarChar, CommittableColumn::Boolean(_) => ColumnType::Boolean, + CommittableColumn::Timestamp(_) => ColumnType::Timestamp, } } } @@ -99,6 +103,7 @@ impl<'a, S: Scalar> From<&Column<'a, S>> for CommittableColumn<'a> { let as_limbs: Vec<_> = scalars.iter().map(RefInto::<[u64; 4]>::ref_into).collect(); CommittableColumn::VarChar(as_limbs) } + Column::Timestamp(times) => CommittableColumn::Timestamp(times), } } } @@ -128,6 +133,7 @@ impl<'a, S: Scalar> From<&'a OwnedColumn> for CommittableColumn<'a> { .map(Into::<[u64; 4]>::into) .collect(), ), + OwnedColumn::Timestamp(times) => (times as &[_]).into(), } } } @@ -162,6 +168,11 @@ impl<'a> From<&'a [bool]> for CommittableColumn<'a> { CommittableColumn::Boolean(value) } } +impl<'a> From<&'a [u64]> for CommittableColumn<'a> { + fn from(value: &'a [u64]) -> Self { + CommittableColumn::Timestamp(value) + } +} #[cfg(feature = "blitzar")] impl<'a, 'b> From<&'a CommittableColumn<'b>> for Sequence<'a> { @@ -175,6 +186,7 @@ impl<'a, 'b> From<&'a CommittableColumn<'b>> for Sequence<'a> { CommittableColumn::Scalar(limbs) => Sequence::from(limbs), CommittableColumn::VarChar(limbs) => Sequence::from(limbs), CommittableColumn::Boolean(bools) => Sequence::from(*bools), + CommittableColumn::Timestamp(times) => Sequence::from(*times), } } } diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs index 2c339d8fe..391ca5c4e 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs @@ -1,10 +1,6 @@ use super::scalar_and_i256_conversions::convert_i256_to_scalar; use crate::{ - base::{ - database::Column, - math::decimal::Precision, - scalar::{Curve25519Scalar, Scalar}, - }, + base::{database::Column, math::decimal::Precision, scalar::Scalar}, sql::parse::ConversionError, }; use arrow::{ @@ -48,7 +44,7 @@ pub trait ArrayRefExt { #[cfg(feature = "blitzar")] fn to_curve25519_scalars( &self, - ) -> Result, ArrowArrayToColumnConversionError>; + ) -> Result, ArrowArrayToColumnConversionError>; /// Convert an ArrayRef into a Proof of SQL Column type /// @@ -76,7 +72,7 @@ impl ArrayRefExt for ArrayRef { #[cfg(feature = "blitzar")] fn to_curve25519_scalars( &self, - ) -> Result, ArrowArrayToColumnConversionError> { + ) -> Result, ArrowArrayToColumnConversionError> { if self.null_count() != 0 { return Err(ArrowArrayToColumnConversionError::ArrayContainsNulls); } @@ -283,7 +279,7 @@ impl ArrayRefExt for ArrayRef { mod tests { use super::*; - use crate::proof_primitive::dory::DoryScalar; + use crate::{base::scalar::Curve25519Scalar, proof_primitive::dory::DoryScalar}; use arrow::array::Decimal256Builder; use std::{str::FromStr, sync::Arc}; diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index b78ec8873..a3fb6732b 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -3,7 +3,7 @@ use crate::base::{ math::decimal::{scale_scalar, Precision}, scalar::Scalar, }; -use arrow::datatypes::{DataType, Field}; +use arrow::datatypes::{DataType, Field, TimeUnit}; use bumpalo::Bump; use proof_of_sql_parser::Identifier; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; @@ -37,6 +37,8 @@ pub enum Column<'a, S: Scalar> { /// - the first element maps to the str values. /// - the second element maps to the str hashes (see [crate::base::scalar::Scalar]). VarChar((&'a [&'a str], &'a [S])), + /// Timestamp columns + Timestamp(&'a [u64]), } impl<'a, S: Scalar> Column<'a, S> { /// Provides the column type associated with the column @@ -50,6 +52,7 @@ impl<'a, S: Scalar> Column<'a, S> { Self::Int128(_) => ColumnType::Int128, Self::Scalar(_) => ColumnType::Scalar, Self::Decimal75(precision, scale, _) => ColumnType::Decimal75(*precision, *scale), + Self::Timestamp(_) => ColumnType::Timestamp, } } /// Returns the length of the column. @@ -66,6 +69,7 @@ impl<'a, S: Scalar> Column<'a, S> { Self::Int128(col) => col.len(), Self::Scalar(col) => col.len(), Self::Decimal75(_, _, col) => col.len(), + Self::Timestamp(col) => col.len(), } } /// Returns `true` if the column has no elements. @@ -153,6 +157,10 @@ impl<'a, S: Scalar> Column<'a, S> { .par_iter() .map(|s| *s * scale_factor) .collect::>(), + Self::Timestamp(col) => col + .par_iter() + .map(|i| S::from(i) * scale_factor) + .collect::>(), } } } @@ -194,6 +202,9 @@ pub enum ColumnType { /// Mapped to i256 #[serde(rename = "Decimal75", alias = "DECIMAL75", alias = "decimal75")] Decimal75(Precision, i8), + /// Mapped to u64 + #[serde(alias = "TIMESTAMP", alias = "timestamp")] + Timestamp, } impl ColumnType { @@ -256,6 +267,7 @@ impl From<&ColumnType> for DataType { } ColumnType::VarChar => DataType::Utf8, ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), + ColumnType::Timestamp => DataType::Time64(TimeUnit::Second), } } } @@ -298,6 +310,7 @@ impl std::fmt::Display for ColumnType { } ColumnType::VarChar => write!(f, "VARCHAR"), ColumnType::Scalar => write!(f, "SCALAR"), + ColumnType::Timestamp => write!(f, "TIMESTAMP"), } } } diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs index e9f522a2b..07724c3fb 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs @@ -24,7 +24,7 @@ use crate::base::{ use arrow::{ array::{ ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, - Int64Array, StringArray, + Int64Array, StringArray, UInt64Array, }, datatypes::{i256, DataType, Schema, SchemaRef}, error::ArrowError, @@ -79,6 +79,7 @@ impl From> for ArrayRef { } OwnedColumn::Scalar(_) => unimplemented!("Cannot convert Scalar type to arrow type"), OwnedColumn::VarChar(col) => Arc::new(StringArray::from(col)), + OwnedColumn::Timestamp(col) => Arc::new(UInt64Array::from(col)), } } } diff --git a/crates/proof-of-sql/src/base/database/owned_column.rs b/crates/proof-of-sql/src/base/database/owned_column.rs index 03420916e..631e16866 100644 --- a/crates/proof-of-sql/src/base/database/owned_column.rs +++ b/crates/proof-of-sql/src/base/database/owned_column.rs @@ -24,6 +24,8 @@ pub enum OwnedColumn { Decimal75(Precision, i8, Vec), /// Scalar columns Scalar(Vec), + /// i32 columns + Timestamp(Vec), } impl OwnedColumn { @@ -38,6 +40,7 @@ impl OwnedColumn { OwnedColumn::Int128(col) => col.len(), OwnedColumn::Decimal75(_, _, col) => col.len(), OwnedColumn::Scalar(col) => col.len(), + OwnedColumn::Timestamp(col) => col.len(), } } /// Returns true if the column is empty. @@ -51,6 +54,7 @@ impl OwnedColumn { OwnedColumn::Int128(col) => col.is_empty(), OwnedColumn::Scalar(col) => col.is_empty(), OwnedColumn::Decimal75(_, _, col) => col.is_empty(), + OwnedColumn::Timestamp(col) => col.is_empty(), } } /// Returns the type of the column. @@ -66,6 +70,7 @@ impl OwnedColumn { OwnedColumn::Decimal75(precision, scale, _) => { ColumnType::Decimal75(*precision, *scale) } + OwnedColumn::Timestamp(_) => ColumnType::Timestamp, } } } diff --git a/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs b/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs index dd790aa25..b9d480675 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs @@ -95,6 +95,7 @@ impl DataAccessor for OwnedTableTestA .alloc_slice_fill_iter(col.iter().map(|s| (*s).into())); Column::VarChar((col, scals)) } + OwnedColumn::Timestamp(col) => Column::Timestamp(col), } } } diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs index b61e3db80..551679f8c 100644 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs @@ -2,9 +2,9 @@ use crate::base::database::ColumnType; use arrow::{ array::{ Array, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, Int64Array, - StringArray, + StringArray, UInt64Array, }, - datatypes::{i256, DataType, Field, Schema}, + datatypes::{i256, DataType, Field, Schema, TimeUnit}, record_batch::RecordBatch, }; use rand::{ @@ -115,6 +115,16 @@ pub fn make_random_test_accessor_data( columns.push(Arc::new(StringArray::from(col))); } ColumnType::Scalar => unimplemented!("Scalar columns are not supported by arrow"), + ColumnType::Timestamp => { + column_fields.push(Field::new( + *col_name, + DataType::Time64(TimeUnit::Second), + false, + )); + + let values: Vec = values.iter().map(|x| *x as u64).collect(); + columns.push(Arc::new(UInt64Array::from(values.to_vec()))); + } } } diff --git a/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs b/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs index 4487ea828..cc52e694d 100644 --- a/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs +++ b/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs @@ -102,6 +102,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => c.inner_product(evaluation_vec), Column::Int128(c) => c.inner_product(evaluation_vec), Column::Decimal75(_, _, c) => c.inner_product(evaluation_vec), + Column::Timestamp(c) => c.inner_product(evaluation_vec), } } @@ -115,6 +116,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => c.mul_add(res, multiplier), Column::Int128(c) => c.mul_add(res, multiplier), Column::Decimal75(_, _, c) => c.mul_add(res, multiplier), + Column::Timestamp(c) => c.mul_add(res, multiplier), } } @@ -128,6 +130,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => c.to_sumcheck_term(num_vars), Column::Int128(c) => c.to_sumcheck_term(num_vars), Column::Decimal75(_, _, c) => c.to_sumcheck_term(num_vars), + Column::Timestamp(c) => c.to_sumcheck_term(num_vars), } } @@ -141,6 +144,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => MultilinearExtension::::id(c), Column::Int128(c) => MultilinearExtension::::id(c), Column::Decimal75(_, _, c) => MultilinearExtension::::id(c), + Column::Timestamp(c) => MultilinearExtension::::id(c), } } } diff --git a/crates/proof-of-sql/src/base/scalar/mod.rs b/crates/proof-of-sql/src/base/scalar/mod.rs index 9ff4f56b2..074b0c16b 100644 --- a/crates/proof-of-sql/src/base/scalar/mod.rs +++ b/crates/proof-of-sql/src/base/scalar/mod.rs @@ -43,6 +43,7 @@ pub trait Scalar: + for<'a> std::convert::From<&'a i16> // Required for `Column` to implement `MultilinearExtension` + for<'a> std::convert::From<&'a i32> // Required for `Column` to implement `MultilinearExtension` + for<'a> std::convert::From<&'a i64> // Required for `Column` to implement `MultilinearExtension` + + for<'a> std::convert::From<&'a u64> // Required for `Column` to implement `MultilinearExtension` + for<'a> std::convert::From<&'a i128> // Required for `Column` to implement `MultilinearExtension` + std::convert::TryInto + std::convert::TryInto diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs index 58d9b354c..7402e06ae 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs @@ -280,6 +280,7 @@ fn compute_dory_commitment( CommittableColumn::Scalar(column) => compute_dory_commitment_impl(column, offset, setup), CommittableColumn::VarChar(column) => compute_dory_commitment_impl(column, offset, setup), CommittableColumn::Boolean(column) => compute_dory_commitment_impl(column, offset, setup), + CommittableColumn::Timestamp(column) => compute_dory_commitment_impl(column, offset, setup), } } diff --git a/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs b/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs index c45f1e911..c3a62c153 100644 --- a/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs +++ b/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs @@ -65,6 +65,9 @@ pub fn filter_column_by_index<'a, S: Scalar>( *scale, alloc.alloc_slice_fill_iter(indexes.iter().map(|&i| col[i])), ), + Column::Timestamp(col) => { + Column::Timestamp(alloc.alloc_slice_fill_iter(indexes.iter().map(|&i| col[i]))) + } } } diff --git a/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs b/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs index 76548f167..7c863ad6a 100644 --- a/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs +++ b/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs @@ -75,6 +75,7 @@ impl FilterResultExpr { Column::Scalar(_col) => todo!(), Column::Decimal75(_, _, col) => prover_evaluate_impl(builder, alloc, selection, col), Column::VarChar((_, scals)) => prover_evaluate_impl(builder, alloc, selection, scals), + Column::Timestamp(col) => prover_evaluate_impl(builder, alloc, selection, col), }; } diff --git a/crates/proof-of-sql/src/sql/ast/group_by_util.rs b/crates/proof-of-sql/src/sql/ast/group_by_util.rs index b1bcfb710..f79544e0e 100644 --- a/crates/proof-of-sql/src/sql/ast/group_by_util.rs +++ b/crates/proof-of-sql/src/sql/ast/group_by_util.rs @@ -114,6 +114,7 @@ pub(super) fn sum_aggregate_column_by_index_counts<'a, S: Scalar>( } Column::Scalar(col) => sum_aggregate_slice_by_index_counts(alloc, col, counts, indexes), Column::VarChar(_) => unimplemented!("Cannot sum varchar columns"), + Column::Timestamp(col) => sum_aggregate_slice_by_index_counts(alloc, col, counts, indexes), } } @@ -175,6 +176,7 @@ pub(super) fn compare_indexes_by_columns( Column::Decimal75(_, _, _) => todo!("TODO: unimplemented"), Column::Scalar(col) => col[i].cmp(&col[j]), Column::VarChar((col, _)) => col[i].cmp(col[j]), + Column::Timestamp(col) => col[i].cmp(&col[j]), }) .find(|&ord| ord != Ordering::Equal) .unwrap_or(Ordering::Equal) diff --git a/crates/proof-of-sql/src/sql/proof/provable_query_result.rs b/crates/proof-of-sql/src/sql/proof/provable_query_result.rs index bf983cfc3..401c19fcf 100644 --- a/crates/proof-of-sql/src/sql/proof/provable_query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/provable_query_result.rs @@ -115,6 +115,7 @@ impl ProvableQueryResult { ColumnType::Scalar => decode_and_convert::(&self.data[offset..]), ColumnType::VarChar => decode_and_convert::<&str, S>(&self.data[offset..]), + ColumnType::Timestamp => decode_and_convert::(&self.data[offset..]), }?; val += evaluation_vec[index as usize] * x; @@ -194,6 +195,12 @@ impl ProvableQueryResult { offset += num_read; Ok((field.name(), OwnedColumn::Decimal75(precision, scale, col))) } + ColumnType::Timestamp => { + let (col, num_read) = decode_multiple_elements(&self.data[offset..], n) + .ok_or(QueryError::Overflow)?; + offset += num_read; + Ok((field.name(), OwnedColumn::Int(col))) + } }) .collect::>()?, )?; diff --git a/crates/proof-of-sql/src/sql/proof/provable_result_column.rs b/crates/proof-of-sql/src/sql/proof/provable_result_column.rs index 3ca3df3b6..fc4d319a8 100644 --- a/crates/proof-of-sql/src/sql/proof/provable_result_column.rs +++ b/crates/proof-of-sql/src/sql/proof/provable_result_column.rs @@ -42,6 +42,7 @@ impl ProvableResultColumn for Column<'_, S> { Column::Decimal75(_, _, col) => col.num_bytes(selection), Column::Scalar(col) => col.num_bytes(selection), Column::VarChar((col, _)) => col.num_bytes(selection), + Column::Timestamp(col) => col.num_bytes(selection), } } @@ -55,6 +56,7 @@ impl ProvableResultColumn for Column<'_, S> { Column::Decimal75(_, _, col) => col.write(out, selection), Column::Scalar(col) => col.write(out, selection), Column::VarChar((col, _)) => col.write(out, selection), + Column::Timestamp(col) => col.write(out, selection), } } } diff --git a/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs b/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs index 84b95b091..31ded2e8e 100644 --- a/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs @@ -155,7 +155,7 @@ fn make_empty_query_result(result_fields: Vec) -> QueryR match field.data_type() { ColumnType::Boolean => OwnedColumn::Boolean(vec![]), ColumnType::SmallInt => OwnedColumn::SmallInt(vec![]), - ColumnType::Int => OwnedColumn::SmallInt(vec![]), + ColumnType::Int => OwnedColumn::Int(vec![]), ColumnType::BigInt => OwnedColumn::BigInt(vec![]), ColumnType::Int128 => OwnedColumn::Int128(vec![]), ColumnType::Decimal75(precision, scale) => { @@ -163,6 +163,7 @@ fn make_empty_query_result(result_fields: Vec) -> QueryR } ColumnType::Scalar => OwnedColumn::Scalar(vec![]), ColumnType::VarChar => OwnedColumn::VarChar(vec![]), + ColumnType::Timestamp => OwnedColumn::Timestamp(vec![]), }, ) }) From 24ef423c66dcc946cc3878ee7d216ba1a6e08cf5 Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Thu, 13 Jun 2024 15:40:46 -0700 Subject: [PATCH 02/24] fix: cover dory commitment helper gpu matcharm --- .../src/proof_primitive/dory/dory_commitment_helper_cpu.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs index 77c4c2a18..5254661e9 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs @@ -60,6 +60,7 @@ fn compute_dory_commitment( } CommittableColumn::VarChar(column) => compute_dory_commitment_impl(column, offset, setup), CommittableColumn::Boolean(column) => compute_dory_commitment_impl(column, offset, setup), + CommittableColumn::Timestamp(column) => compute_dory_commitment_impl(column, offset, setup), } } From 80f585c760d0ee1a7feef172e101beeaf120479c Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Mon, 17 Jun 2024 20:51:40 -0700 Subject: [PATCH 03/24] feat: init timestamp support --- Cargo.toml | 1 + crates/proof-of-sql/Cargo.toml | 1 + .../src/base/commitment/column_bounds.rs | 4 +- .../src/base/commitment/committable_column.rs | 18 ++--- .../proof-of-sql/src/base/database/column.rs | 53 +++++++++--- .../database/owned_and_arrow_conversions.rs | 11 ++- .../src/base/database/owned_column.rs | 16 ++-- .../database/owned_table_test_accessor.rs | 2 +- .../base/database/test_accessor_utility.rs | 27 +++++-- crates/proof-of-sql/src/base/mod.rs | 1 + .../base/polynomial/multilinear_extension.rs | 8 +- crates/proof-of-sql/src/base/time/mod.rs | 1 + .../proof-of-sql/src/base/time/timestamp.rs | 81 +++++++++++++++++++ .../dory/dory_commitment_helper_cpu.rs | 2 +- .../dory/dory_commitment_helper_gpu.rs | 4 +- .../src/sql/ast/dense_filter_util.rs | 8 +- .../src/sql/ast/filter_result_expr.rs | 2 +- .../proof-of-sql/src/sql/ast/group_by_util.rs | 6 +- .../src/sql/proof/provable_query_result.rs | 8 +- .../src/sql/proof/provable_result_column.rs | 4 +- .../src/sql/proof/verifiable_query_result.rs | 2 +- 21 files changed, 203 insertions(+), 57 deletions(-) create mode 100644 crates/proof-of-sql/src/base/time/mod.rs create mode 100644 crates/proof-of-sql/src/base/time/timestamp.rs diff --git a/Cargo.toml b/Cargo.toml index 0adc9593c..38bd450b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ bytemuck = {version = "1.14.2" } byte-slice-cast = { version = "1.2.1" } clap = { version = "4.5.4" } criterion = { version = "0.5.1" } +chrono-tz = {version = "0.9.0", features = ["serde"]} curve25519-dalek = { version = "4", features = ["rand_core"] } derive_more = { version = "0.99" } dyn_partial_eq = { version = "0.1.2" } diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index 6535d7f2d..d6a0736e9 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -30,6 +30,7 @@ bumpalo = { workspace = true, features = ["collections"] } bytemuck = { workspace = true } byte-slice-cast = { workspace = true } curve25519-dalek = { workspace = true, features = ["serde"] } +chrono-tz = {workspace = true, features = ["serde"]} derive_more = { workspace = true } dyn_partial_eq = { workspace = true } hashbrown = { workspace = true } diff --git a/crates/proof-of-sql/src/base/commitment/column_bounds.rs b/crates/proof-of-sql/src/base/commitment/column_bounds.rs index 6af96d820..672b280fd 100644 --- a/crates/proof-of-sql/src/base/commitment/column_bounds.rs +++ b/crates/proof-of-sql/src/base/commitment/column_bounds.rs @@ -208,7 +208,7 @@ pub enum ColumnBounds { /// The bounds of an Int128 column. Int128(Bounds), /// The bounds of a Timestamp column. - Timestamp(Bounds), + Timestamp(Bounds), } impl ColumnBounds { @@ -221,7 +221,7 @@ impl ColumnBounds { CommittableColumn::Int(ints) => ColumnBounds::Int(Bounds::from_iter(*ints)), CommittableColumn::BigInt(ints) => ColumnBounds::BigInt(Bounds::from_iter(*ints)), CommittableColumn::Int128(ints) => ColumnBounds::Int128(Bounds::from_iter(*ints)), - CommittableColumn::Timestamp(times) => { + CommittableColumn::Timestamp(_, _, times) => { ColumnBounds::Timestamp(Bounds::from_iter(*times)) } CommittableColumn::Boolean(_) diff --git a/crates/proof-of-sql/src/base/commitment/committable_column.rs b/crates/proof-of-sql/src/base/commitment/committable_column.rs index a3b33452a..71e0fa35d 100644 --- a/crates/proof-of-sql/src/base/commitment/committable_column.rs +++ b/crates/proof-of-sql/src/base/commitment/committable_column.rs @@ -3,6 +3,7 @@ use crate::base::{ math::decimal::Precision, ref_into::RefInto, scalar::Scalar, + time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, }; #[cfg(feature = "blitzar")] use blitzar::sequence::Sequence; @@ -38,7 +39,7 @@ pub enum CommittableColumn<'a> { /// Column of limbs for committing to scalars, hashed from a VarChar column. VarChar(Vec<[u64; 4]>), /// Borrowed Timestamp column, mapped to `u64`. - Timestamp(&'a [u64]), + Timestamp(ProofsTimeUnit, ProofsTimeZone, &'a [i64]), } impl<'a> CommittableColumn<'a> { @@ -53,7 +54,7 @@ impl<'a> CommittableColumn<'a> { CommittableColumn::Scalar(col) => col.len(), CommittableColumn::VarChar(col) => col.len(), CommittableColumn::Boolean(col) => col.len(), - CommittableColumn::Timestamp(col) => col.len(), + CommittableColumn::Timestamp(_, _, col) => col.len(), } } @@ -81,7 +82,7 @@ impl<'a> From<&CommittableColumn<'a>> for ColumnType { CommittableColumn::Scalar(_) => ColumnType::Scalar, CommittableColumn::VarChar(_) => ColumnType::VarChar, CommittableColumn::Boolean(_) => ColumnType::Boolean, - CommittableColumn::Timestamp(_) => ColumnType::Timestamp, + CommittableColumn::Timestamp(tu, tz, _) => ColumnType::Timestamp(*tu, *tz), } } } @@ -103,7 +104,7 @@ impl<'a, S: Scalar> From<&Column<'a, S>> for CommittableColumn<'a> { let as_limbs: Vec<_> = scalars.iter().map(RefInto::<[u64; 4]>::ref_into).collect(); CommittableColumn::VarChar(as_limbs) } - Column::Timestamp(times) => CommittableColumn::Timestamp(times), + Column::Timestamp(tu, tz, times) => CommittableColumn::Timestamp(*tu, *tz, times), } } } @@ -133,7 +134,7 @@ impl<'a, S: Scalar> From<&'a OwnedColumn> for CommittableColumn<'a> { .map(Into::<[u64; 4]>::into) .collect(), ), - OwnedColumn::Timestamp(times) => (times as &[_]).into(), + OwnedColumn::Timestamp(_, _, times) => (times as &[_]).into(), } } } @@ -168,11 +169,6 @@ impl<'a> From<&'a [bool]> for CommittableColumn<'a> { CommittableColumn::Boolean(value) } } -impl<'a> From<&'a [u64]> for CommittableColumn<'a> { - fn from(value: &'a [u64]) -> Self { - CommittableColumn::Timestamp(value) - } -} #[cfg(feature = "blitzar")] impl<'a, 'b> From<&'a CommittableColumn<'b>> for Sequence<'a> { @@ -186,7 +182,7 @@ impl<'a, 'b> From<&'a CommittableColumn<'b>> for Sequence<'a> { CommittableColumn::Scalar(limbs) => Sequence::from(limbs), CommittableColumn::VarChar(limbs) => Sequence::from(limbs), CommittableColumn::Boolean(bools) => Sequence::from(*bools), - CommittableColumn::Timestamp(times) => Sequence::from(*times), + CommittableColumn::Timestamp(_, _, times) => Sequence::from(*times), } } } diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index a3fb6732b..f41aeb013 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -2,12 +2,14 @@ use super::{LiteralValue, TableRef}; use crate::base::{ math::decimal::{scale_scalar, Precision}, scalar::Scalar, + time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, }; -use arrow::datatypes::{DataType, Field, TimeUnit}; +use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; use bumpalo::Bump; use proof_of_sql_parser::Identifier; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use serde::{Deserialize, Serialize}; +use std::{str::FromStr, sync::Arc}; /// Represents a read-only view of a column in an in-memory, /// column-oriented database. @@ -38,8 +40,12 @@ pub enum Column<'a, S: Scalar> { /// - the second element maps to the str hashes (see [crate::base::scalar::Scalar]). VarChar((&'a [&'a str], &'a [S])), /// Timestamp columns - Timestamp(&'a [u64]), + /// - the first element maps to the stored [`TimeUnit`] + /// - the second element maps to an optional timezone as a string + /// - the third element maps to columns of timeunits since unix epoch + Timestamp(ProofsTimeUnit, ProofsTimeZone, &'a [i64]), } + impl<'a, S: Scalar> Column<'a, S> { /// Provides the column type associated with the column pub fn column_type(&self) -> ColumnType { @@ -52,7 +58,7 @@ impl<'a, S: Scalar> Column<'a, S> { Self::Int128(_) => ColumnType::Int128, Self::Scalar(_) => ColumnType::Scalar, Self::Decimal75(precision, scale, _) => ColumnType::Decimal75(*precision, *scale), - Self::Timestamp(_) => ColumnType::Timestamp, + Self::Timestamp(time_unit, timezone, _) => ColumnType::Timestamp(*time_unit, *timezone), } } /// Returns the length of the column. @@ -69,7 +75,7 @@ impl<'a, S: Scalar> Column<'a, S> { Self::Int128(col) => col.len(), Self::Scalar(col) => col.len(), Self::Decimal75(_, _, col) => col.len(), - Self::Timestamp(col) => col.len(), + Self::Timestamp(_, _, col) => col.len(), } } /// Returns `true` if the column has no elements. @@ -157,7 +163,7 @@ impl<'a, S: Scalar> Column<'a, S> { .par_iter() .map(|s| *s * scale_factor) .collect::>(), - Self::Timestamp(col) => col + Self::Timestamp(_, _, col) => col .par_iter() .map(|i| S::from(i) * scale_factor) .collect::>(), @@ -202,9 +208,9 @@ pub enum ColumnType { /// Mapped to i256 #[serde(rename = "Decimal75", alias = "DECIMAL75", alias = "decimal75")] Decimal75(Precision, i8), - /// Mapped to u64 + /// Mapped to i64 #[serde(alias = "TIMESTAMP", alias = "timestamp")] - Timestamp, + Timestamp(ProofsTimeUnit, ProofsTimeZone), } impl ColumnType { @@ -217,7 +223,7 @@ impl ColumnType { | ColumnType::BigInt | ColumnType::Int128 | ColumnType::Scalar - | ColumnType::Decimal75(_, _) + | ColumnType::Decimal75(_, _) // TODO: is a timestamp numeric? ) } @@ -225,7 +231,7 @@ impl ColumnType { pub fn is_integer(&self) -> bool { matches!( self, - ColumnType::SmallInt | ColumnType::Int | ColumnType::BigInt | ColumnType::Int128 + ColumnType::SmallInt | ColumnType::Int | ColumnType::BigInt | ColumnType::Int128 // TODO: is a timestamp an integer? ) } @@ -235,6 +241,7 @@ impl ColumnType { Self::SmallInt => Some(5_u8), Self::Int => Some(10_u8), Self::BigInt => Some(19_u8), + Self::Timestamp(_, _) => Some(19_u8), Self::Int128 => Some(39_u8), Self::Decimal75(precision, _) => Some(precision.value()), // Scalars are not in database & are only used for typeless comparisons for testing so we return 0 @@ -267,7 +274,10 @@ impl From<&ColumnType> for DataType { } ColumnType::VarChar => DataType::Utf8, ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), - ColumnType::Timestamp => DataType::Time64(TimeUnit::Second), + ColumnType::Timestamp(timeunit, timezone) => DataType::Timestamp( + ArrowTimeUnit::from(*timeunit), + Some(Arc::from(timezone)), + ), } } } @@ -286,6 +296,23 @@ impl TryFrom for ColumnType { DataType::Decimal256(precision, scale) if precision <= 75 => { Ok(ColumnType::Decimal75(Precision::new(precision)?, scale)) } + DataType::Timestamp(time_unit, timezone_option) => { + let custom_time_unit = ProofsTimeUnit::from(time_unit); + + let timezone = match timezone_option { + Some(tz_arc) => { + let tz_str = &*tz_arc; // Deref Arc to &str + chrono_tz::Tz::from_str(tz_str) + .map_err(|_| format!("Invalid timezone string: {}", tz_str))? + } + None => chrono_tz::Tz::UTC, // Default to UTC if None + }; + + Ok(ColumnType::Timestamp( + custom_time_unit, + ProofsTimeZone(timezone), + )) + } DataType::Utf8 => Ok(ColumnType::VarChar), _ => Err(format!("Unsupported arrow data type {:?}", data_type)), } @@ -310,7 +337,11 @@ impl std::fmt::Display for ColumnType { } ColumnType::VarChar => write!(f, "VARCHAR"), ColumnType::Scalar => write!(f, "SCALAR"), - ColumnType::Timestamp => write!(f, "TIMESTAMP"), + ColumnType::Timestamp(timeunit, timezone) => write!( + f, + "TIMESTAMP(TIMEUNIT: {:?}, TIMEZONE: {timeunit})", + timezone + ), } } } diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs index 07724c3fb..4465671a3 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs @@ -20,11 +20,13 @@ use crate::base::{ }, math::decimal::Precision, scalar::Scalar, + time::timestamp::ProofsTimeUnit, }; use arrow::{ array::{ ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, - Int64Array, StringArray, UInt64Array, + Int64Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, }, datatypes::{i256, DataType, Schema, SchemaRef}, error::ArrowError, @@ -79,7 +81,12 @@ impl From> for ArrayRef { } OwnedColumn::Scalar(_) => unimplemented!("Cannot convert Scalar type to arrow type"), OwnedColumn::VarChar(col) => Arc::new(StringArray::from(col)), - OwnedColumn::Timestamp(col) => Arc::new(UInt64Array::from(col)), + OwnedColumn::Timestamp(time_unit, _, col) => match time_unit { + ProofsTimeUnit::Second => Arc::new(TimestampSecondArray::from(col)), + ProofsTimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from(col)), + ProofsTimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from(col)), + ProofsTimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from(col)), + }, } } } diff --git a/crates/proof-of-sql/src/base/database/owned_column.rs b/crates/proof-of-sql/src/base/database/owned_column.rs index 631e16866..1b9190860 100644 --- a/crates/proof-of-sql/src/base/database/owned_column.rs +++ b/crates/proof-of-sql/src/base/database/owned_column.rs @@ -3,7 +3,11 @@ /// converting to the final result in either Arrow format or JSON. /// This is the analog of an arrow Array. use super::ColumnType; -use crate::base::{math::decimal::Precision, scalar::Scalar}; +use crate::base::{ + math::decimal::Precision, + scalar::Scalar, + time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, +}; #[derive(Debug, PartialEq, Clone, Eq)] #[non_exhaustive] /// Supported types for OwnedColumn @@ -24,8 +28,8 @@ pub enum OwnedColumn { Decimal75(Precision, i8, Vec), /// Scalar columns Scalar(Vec), - /// i32 columns - Timestamp(Vec), + /// Timestamp columns + Timestamp(ProofsTimeUnit, ProofsTimeZone, Vec), } impl OwnedColumn { @@ -40,7 +44,7 @@ impl OwnedColumn { OwnedColumn::Int128(col) => col.len(), OwnedColumn::Decimal75(_, _, col) => col.len(), OwnedColumn::Scalar(col) => col.len(), - OwnedColumn::Timestamp(col) => col.len(), + OwnedColumn::Timestamp(_, _, col) => col.len(), } } /// Returns true if the column is empty. @@ -54,7 +58,7 @@ impl OwnedColumn { OwnedColumn::Int128(col) => col.is_empty(), OwnedColumn::Scalar(col) => col.is_empty(), OwnedColumn::Decimal75(_, _, col) => col.is_empty(), - OwnedColumn::Timestamp(col) => col.is_empty(), + OwnedColumn::Timestamp(_, _, col) => col.is_empty(), } } /// Returns the type of the column. @@ -70,7 +74,7 @@ impl OwnedColumn { OwnedColumn::Decimal75(precision, scale, _) => { ColumnType::Decimal75(*precision, *scale) } - OwnedColumn::Timestamp(_) => ColumnType::Timestamp, + OwnedColumn::Timestamp(tu, tz, _) => ColumnType::Timestamp(*tu, *tz), } } } diff --git a/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs b/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs index b9d480675..4d669aabe 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs @@ -95,7 +95,7 @@ impl DataAccessor for OwnedTableTestA .alloc_slice_fill_iter(col.iter().map(|s| (*s).into())); Column::VarChar((col, scals)) } - OwnedColumn::Timestamp(col) => Column::Timestamp(col), + OwnedColumn::Timestamp(tu, tz, col) => Column::Timestamp(*tu, *tz, col), } } } diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs index 551679f8c..d912d756f 100644 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs @@ -1,8 +1,9 @@ -use crate::base::database::ColumnType; +use crate::base::{database::ColumnType, time::timestamp::ProofsTimeUnit}; use arrow::{ array::{ Array, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, Int64Array, - StringArray, UInt64Array, + StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, }, datatypes::{i256, DataType, Field, Schema, TimeUnit}, record_batch::RecordBatch, @@ -115,15 +116,29 @@ pub fn make_random_test_accessor_data( columns.push(Arc::new(StringArray::from(col))); } ColumnType::Scalar => unimplemented!("Scalar columns are not supported by arrow"), - ColumnType::Timestamp => { + ColumnType::Timestamp(tu, tz) => { column_fields.push(Field::new( *col_name, - DataType::Time64(TimeUnit::Second), + DataType::Timestamp(TimeUnit::from(*tu), Some(Arc::from(tz.0.name()))), false, )); - let values: Vec = values.iter().map(|x| *x as u64).collect(); - columns.push(Arc::new(UInt64Array::from(values.to_vec()))); + // Create the correct timestamp array based on the time unit + let timestamp_array: Arc = match tu { + ProofsTimeUnit::Second => Arc::new(TimestampSecondArray::from( + values.to_vec(), + )), + ProofsTimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from( + values.to_vec(), + )), + ProofsTimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from( + values.to_vec(), + )), + ProofsTimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from( + values.to_vec(), + )), + }; + columns.push(timestamp_array); } } } diff --git a/crates/proof-of-sql/src/base/mod.rs b/crates/proof-of-sql/src/base/mod.rs index 55eb53b1a..893c22b06 100644 --- a/crates/proof-of-sql/src/base/mod.rs +++ b/crates/proof-of-sql/src/base/mod.rs @@ -11,3 +11,4 @@ pub mod scalar; mod serialize; pub(crate) use serialize::{impl_serde_for_ark_serde_checked, impl_serde_for_ark_serde_unchecked}; pub(crate) mod slice_ops; +pub(crate) mod time; diff --git a/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs b/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs index cc52e694d..696d81d31 100644 --- a/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs +++ b/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs @@ -102,7 +102,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => c.inner_product(evaluation_vec), Column::Int128(c) => c.inner_product(evaluation_vec), Column::Decimal75(_, _, c) => c.inner_product(evaluation_vec), - Column::Timestamp(c) => c.inner_product(evaluation_vec), + Column::Timestamp(_, _, c) => c.inner_product(evaluation_vec), } } @@ -116,7 +116,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => c.mul_add(res, multiplier), Column::Int128(c) => c.mul_add(res, multiplier), Column::Decimal75(_, _, c) => c.mul_add(res, multiplier), - Column::Timestamp(c) => c.mul_add(res, multiplier), + Column::Timestamp(_, _, c) => c.mul_add(res, multiplier), } } @@ -130,7 +130,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => c.to_sumcheck_term(num_vars), Column::Int128(c) => c.to_sumcheck_term(num_vars), Column::Decimal75(_, _, c) => c.to_sumcheck_term(num_vars), - Column::Timestamp(c) => c.to_sumcheck_term(num_vars), + Column::Timestamp(_, _, c) => c.to_sumcheck_term(num_vars), } } @@ -144,7 +144,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => MultilinearExtension::::id(c), Column::Int128(c) => MultilinearExtension::::id(c), Column::Decimal75(_, _, c) => MultilinearExtension::::id(c), - Column::Timestamp(c) => MultilinearExtension::::id(c), + Column::Timestamp(_, _, c) => MultilinearExtension::::id(c), } } } diff --git a/crates/proof-of-sql/src/base/time/mod.rs b/crates/proof-of-sql/src/base/time/mod.rs new file mode 100644 index 000000000..9bef4a59b --- /dev/null +++ b/crates/proof-of-sql/src/base/time/mod.rs @@ -0,0 +1 @@ +pub(crate) mod timestamp; diff --git a/crates/proof-of-sql/src/base/time/timestamp.rs b/crates/proof-of-sql/src/base/time/timestamp.rs new file mode 100644 index 000000000..862f4927b --- /dev/null +++ b/crates/proof-of-sql/src/base/time/timestamp.rs @@ -0,0 +1,81 @@ +use arrow::datatypes::TimeUnit as ArrowTimeUnit; +use chrono_tz::Tz; +use core::fmt; +use serde::{Deserialize, Serialize}; +use std::{str::FromStr, sync::Arc}; // Tz implements the TimeZone trait and provides access to IANA time zones + +#[derive(Debug, Clone, Deserialize, Serialize, Hash)] +pub struct Timestamp { + time: i64, + timeunit: ProofsTimeUnit, + timezone: Tz, +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Deserialize, Serialize, Hash)] +pub enum ProofsTimeUnit { + Second, + Millisecond, + Microsecond, + Nanosecond, +} + +impl fmt::Display for ProofsTimeUnit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ProofsTimeUnit::Second => write!(f, "Second"), + ProofsTimeUnit::Millisecond => write!(f, "Millisecond"), + ProofsTimeUnit::Microsecond => write!(f, "Microsecond"), + ProofsTimeUnit::Nanosecond => write!(f, "Nanosecond"), + } + } +} + +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize)] +pub struct ProofsTimeZone(pub Tz); + +impl fmt::Display for ProofsTimeZone { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl From for ArrowTimeUnit { + fn from(unit: ProofsTimeUnit) -> Self { + match unit { + ProofsTimeUnit::Second => ArrowTimeUnit::Second, + ProofsTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, + ProofsTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, + ProofsTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, + } + } +} + +impl From for ProofsTimeUnit { + fn from(unit: ArrowTimeUnit) -> Self { + match unit { + ArrowTimeUnit::Second => ProofsTimeUnit::Second, + ArrowTimeUnit::Millisecond => ProofsTimeUnit::Millisecond, + ArrowTimeUnit::Microsecond => ProofsTimeUnit::Microsecond, + ArrowTimeUnit::Nanosecond => ProofsTimeUnit::Nanosecond, + } + } +} + +impl TryFrom> for ProofsTimeZone { + type Error = &'static str; // Or use a more descriptive error type + + fn try_from(value: Option<&str>) -> Result { + match value { + Some(tz_str) => Tz::from_str(tz_str) + .map(ProofsTimeZone) + .map_err(|_| "Invalid timezone string"), + None => Ok(ProofsTimeZone(Tz::UTC)), // Default to UTC + } + } +} + +impl From<&ProofsTimeZone> for Arc { + fn from(timezone: &ProofsTimeZone) -> Self { + Arc::from(timezone.0.name()) + } +} diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs index 5254661e9..e595b9eee 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs @@ -60,7 +60,7 @@ fn compute_dory_commitment( } CommittableColumn::VarChar(column) => compute_dory_commitment_impl(column, offset, setup), CommittableColumn::Boolean(column) => compute_dory_commitment_impl(column, offset, setup), - CommittableColumn::Timestamp(column) => compute_dory_commitment_impl(column, offset, setup), + CommittableColumn::Timestamp(_, _, column) => compute_dory_commitment_impl(column, offset, setup), } } diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs index 7402e06ae..c9a3f06d3 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs @@ -280,7 +280,9 @@ fn compute_dory_commitment( CommittableColumn::Scalar(column) => compute_dory_commitment_impl(column, offset, setup), CommittableColumn::VarChar(column) => compute_dory_commitment_impl(column, offset, setup), CommittableColumn::Boolean(column) => compute_dory_commitment_impl(column, offset, setup), - CommittableColumn::Timestamp(column) => compute_dory_commitment_impl(column, offset, setup), + CommittableColumn::Timestamp(_, _, column) => { + compute_dory_commitment_impl(column, offset, setup) + } } } diff --git a/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs b/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs index c3a62c153..d5d81b75b 100644 --- a/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs +++ b/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs @@ -65,9 +65,11 @@ pub fn filter_column_by_index<'a, S: Scalar>( *scale, alloc.alloc_slice_fill_iter(indexes.iter().map(|&i| col[i])), ), - Column::Timestamp(col) => { - Column::Timestamp(alloc.alloc_slice_fill_iter(indexes.iter().map(|&i| col[i]))) - } + Column::Timestamp(tu, tz, col) => Column::Timestamp( + *tu, + *tz, + alloc.alloc_slice_fill_iter(indexes.iter().map(|&i| col[i])), + ), } } diff --git a/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs b/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs index 7c863ad6a..7c8b330a7 100644 --- a/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs +++ b/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs @@ -75,7 +75,7 @@ impl FilterResultExpr { Column::Scalar(_col) => todo!(), Column::Decimal75(_, _, col) => prover_evaluate_impl(builder, alloc, selection, col), Column::VarChar((_, scals)) => prover_evaluate_impl(builder, alloc, selection, scals), - Column::Timestamp(col) => prover_evaluate_impl(builder, alloc, selection, col), + Column::Timestamp(_, _, col) => prover_evaluate_impl(builder, alloc, selection, col), }; } diff --git a/crates/proof-of-sql/src/sql/ast/group_by_util.rs b/crates/proof-of-sql/src/sql/ast/group_by_util.rs index f79544e0e..2c206fcd0 100644 --- a/crates/proof-of-sql/src/sql/ast/group_by_util.rs +++ b/crates/proof-of-sql/src/sql/ast/group_by_util.rs @@ -114,7 +114,9 @@ pub(super) fn sum_aggregate_column_by_index_counts<'a, S: Scalar>( } Column::Scalar(col) => sum_aggregate_slice_by_index_counts(alloc, col, counts, indexes), Column::VarChar(_) => unimplemented!("Cannot sum varchar columns"), - Column::Timestamp(col) => sum_aggregate_slice_by_index_counts(alloc, col, counts, indexes), + Column::Timestamp(_, _, col) => { + sum_aggregate_slice_by_index_counts(alloc, col, counts, indexes) + } } } @@ -176,7 +178,7 @@ pub(super) fn compare_indexes_by_columns( Column::Decimal75(_, _, _) => todo!("TODO: unimplemented"), Column::Scalar(col) => col[i].cmp(&col[j]), Column::VarChar((col, _)) => col[i].cmp(col[j]), - Column::Timestamp(col) => col[i].cmp(&col[j]), + Column::Timestamp(_, _, col) => col[i].cmp(&col[j]), }) .find(|&ord| ord != Ordering::Equal) .unwrap_or(Ordering::Equal) diff --git a/crates/proof-of-sql/src/sql/proof/provable_query_result.rs b/crates/proof-of-sql/src/sql/proof/provable_query_result.rs index 401c19fcf..455d3ed1f 100644 --- a/crates/proof-of-sql/src/sql/proof/provable_query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/provable_query_result.rs @@ -115,7 +115,9 @@ impl ProvableQueryResult { ColumnType::Scalar => decode_and_convert::(&self.data[offset..]), ColumnType::VarChar => decode_and_convert::<&str, S>(&self.data[offset..]), - ColumnType::Timestamp => decode_and_convert::(&self.data[offset..]), + ColumnType::Timestamp(_, _) => { + decode_and_convert::(&self.data[offset..]) + } }?; val += evaluation_vec[index as usize] * x; @@ -195,11 +197,11 @@ impl ProvableQueryResult { offset += num_read; Ok((field.name(), OwnedColumn::Decimal75(precision, scale, col))) } - ColumnType::Timestamp => { + ColumnType::Timestamp(tu, tz) => { let (col, num_read) = decode_multiple_elements(&self.data[offset..], n) .ok_or(QueryError::Overflow)?; offset += num_read; - Ok((field.name(), OwnedColumn::Int(col))) + Ok((field.name(), OwnedColumn::Timestamp(tu, tz, col))) } }) .collect::>()?, diff --git a/crates/proof-of-sql/src/sql/proof/provable_result_column.rs b/crates/proof-of-sql/src/sql/proof/provable_result_column.rs index fc4d319a8..98e66d4e7 100644 --- a/crates/proof-of-sql/src/sql/proof/provable_result_column.rs +++ b/crates/proof-of-sql/src/sql/proof/provable_result_column.rs @@ -42,7 +42,7 @@ impl ProvableResultColumn for Column<'_, S> { Column::Decimal75(_, _, col) => col.num_bytes(selection), Column::Scalar(col) => col.num_bytes(selection), Column::VarChar((col, _)) => col.num_bytes(selection), - Column::Timestamp(col) => col.num_bytes(selection), + Column::Timestamp(_, _, col) => col.num_bytes(selection), } } @@ -56,7 +56,7 @@ impl ProvableResultColumn for Column<'_, S> { Column::Decimal75(_, _, col) => col.write(out, selection), Column::Scalar(col) => col.write(out, selection), Column::VarChar((col, _)) => col.write(out, selection), - Column::Timestamp(col) => col.write(out, selection), + Column::Timestamp(_, _, col) => col.write(out, selection), } } } diff --git a/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs b/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs index 31ded2e8e..8883d7580 100644 --- a/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs @@ -163,7 +163,7 @@ fn make_empty_query_result(result_fields: Vec) -> QueryR } ColumnType::Scalar => OwnedColumn::Scalar(vec![]), ColumnType::VarChar => OwnedColumn::VarChar(vec![]), - ColumnType::Timestamp => OwnedColumn::Timestamp(vec![]), + ColumnType::Timestamp(tu, tz) => OwnedColumn::Timestamp(tu, tz, vec![]), }, ) }) From 1ec82de9f43ad74f0a7c394bbfa30c1a40522e9d Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Mon, 17 Jun 2024 21:01:21 -0700 Subject: [PATCH 04/24] feat: tests for timezone and timeunit conversions --- .../proof-of-sql/src/base/database/column.rs | 7 +- .../base/database/test_accessor_utility.rs | 22 +++-- .../proof-of-sql/src/base/time/timestamp.rs | 88 ++++++++++++++++++- .../dory/dory_commitment_helper_cpu.rs | 4 +- 4 files changed, 102 insertions(+), 19 deletions(-) diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index f41aeb013..0fa4a84f4 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -274,10 +274,9 @@ impl From<&ColumnType> for DataType { } ColumnType::VarChar => DataType::Utf8, ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), - ColumnType::Timestamp(timeunit, timezone) => DataType::Timestamp( - ArrowTimeUnit::from(*timeunit), - Some(Arc::from(timezone)), - ), + ColumnType::Timestamp(timeunit, timezone) => { + DataType::Timestamp(ArrowTimeUnit::from(*timeunit), Some(Arc::from(timezone))) + } } } } diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs index d912d756f..29cda7a18 100644 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs @@ -125,18 +125,16 @@ pub fn make_random_test_accessor_data( // Create the correct timestamp array based on the time unit let timestamp_array: Arc = match tu { - ProofsTimeUnit::Second => Arc::new(TimestampSecondArray::from( - values.to_vec(), - )), - ProofsTimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from( - values.to_vec(), - )), - ProofsTimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from( - values.to_vec(), - )), - ProofsTimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from( - values.to_vec(), - )), + ProofsTimeUnit::Second => Arc::new(TimestampSecondArray::from(values.to_vec())), + ProofsTimeUnit::Millisecond => { + Arc::new(TimestampMillisecondArray::from(values.to_vec())) + } + ProofsTimeUnit::Microsecond => { + Arc::new(TimestampMicrosecondArray::from(values.to_vec())) + } + ProofsTimeUnit::Nanosecond => { + Arc::new(TimestampNanosecondArray::from(values.to_vec())) + } }; columns.push(timestamp_array); } diff --git a/crates/proof-of-sql/src/base/time/timestamp.rs b/crates/proof-of-sql/src/base/time/timestamp.rs index 862f4927b..613ed9688 100644 --- a/crates/proof-of-sql/src/base/time/timestamp.rs +++ b/crates/proof-of-sql/src/base/time/timestamp.rs @@ -2,7 +2,7 @@ use arrow::datatypes::TimeUnit as ArrowTimeUnit; use chrono_tz::Tz; use core::fmt; use serde::{Deserialize, Serialize}; -use std::{str::FromStr, sync::Arc}; // Tz implements the TimeZone trait and provides access to IANA time zones +use std::{str::FromStr, sync::Arc}; #[derive(Debug, Clone, Deserialize, Serialize, Hash)] pub struct Timestamp { @@ -62,7 +62,7 @@ impl From for ProofsTimeUnit { } impl TryFrom> for ProofsTimeZone { - type Error = &'static str; // Or use a more descriptive error type + type Error = &'static str; fn try_from(value: Option<&str>) -> Result { match value { @@ -79,3 +79,87 @@ impl From<&ProofsTimeZone> for Arc { Arc::from(timezone.0.name()) } } + +#[cfg(test)] +mod tests { + use super::*; + use chrono_tz::Tz; + + #[test] + fn we_can_convert_valid_timezones() { + let examples = ["Europe/London", "America/New_York", "Asia/Tokyo", "UTC"]; + + for &tz_str in &examples { + let timezone = ProofsTimeZone::try_from(Some(tz_str)); + assert!(timezone.is_ok(), "Timezone should be valid: {}", tz_str); + assert_eq!( + timezone.unwrap().0, + Tz::from_str(tz_str).unwrap(), + "Timezone mismatch for {}", + tz_str + ); + } + } + + #[test] + fn we_cannot_convert_invalid_timezones() { + let invalid_tz_str = "Not/A_TimeZone"; + let result = ProofsTimeZone::try_from(Some(invalid_tz_str)); + assert!( + result.is_err(), + "Should return an error for invalid timezones" + ); + assert_eq!( + result.unwrap_err(), + "Invalid timezone string", + "Error message should indicate invalid timezone string" + ); + } + + #[test] + fn we_can_get_utc_with_none_timezone() { + let result = ProofsTimeZone::try_from(None); + assert!(result.is_ok(), "None should convert without error"); + assert_eq!(result.unwrap().0, Tz::UTC, "None should default to UTC"); + } + + #[test] + fn we_can_convert_from_arrow_time_units() { + assert_eq!( + ProofsTimeUnit::from(ArrowTimeUnit::Second), + ProofsTimeUnit::Second + ); + assert_eq!( + ProofsTimeUnit::from(ArrowTimeUnit::Millisecond), + ProofsTimeUnit::Millisecond + ); + assert_eq!( + ProofsTimeUnit::from(ArrowTimeUnit::Microsecond), + ProofsTimeUnit::Microsecond + ); + assert_eq!( + ProofsTimeUnit::from(ArrowTimeUnit::Nanosecond), + ProofsTimeUnit::Nanosecond + ); + } + + #[test] + fn we_can_convert_to_arrow_time_units() { + assert_eq!( + ArrowTimeUnit::from(ProofsTimeUnit::Second), + ArrowTimeUnit::Second + ); + assert_eq!( + ArrowTimeUnit::from(ProofsTimeUnit::Millisecond), + ArrowTimeUnit::Millisecond + ); + assert_eq!( + ArrowTimeUnit::from(ProofsTimeUnit::Microsecond), + ArrowTimeUnit::Microsecond + ); + assert_eq!( + ArrowTimeUnit::from(ProofsTimeUnit::Nanosecond), + ArrowTimeUnit::Nanosecond + ); + } +} diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs index e595b9eee..88f3be836 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs @@ -60,7 +60,9 @@ fn compute_dory_commitment( } CommittableColumn::VarChar(column) => compute_dory_commitment_impl(column, offset, setup), CommittableColumn::Boolean(column) => compute_dory_commitment_impl(column, offset, setup), - CommittableColumn::Timestamp(_, _, column) => compute_dory_commitment_impl(column, offset, setup), + CommittableColumn::Timestamp(_, _, column) => { + compute_dory_commitment_impl(column, offset, setup) + } } } From 80237d53b53ef67778a9b03552591a7432ea3f58 Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Mon, 17 Jun 2024 21:45:17 -0700 Subject: [PATCH 05/24] feat: update typing and Scalar bounds --- .../arrow_array_to_column_conversion.rs | 70 ++++++++++- .../proof-of-sql/src/base/database/column.rs | 6 +- .../base/database/test_accessor_utility.rs | 2 +- crates/proof-of-sql/src/base/scalar/mod.rs | 1 - .../proof-of-sql/src/base/time/timestamp.rs | 112 ++++++++++++------ 5 files changed, 144 insertions(+), 47 deletions(-) diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs index 391ca5c4e..9ff069ffa 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs @@ -1,14 +1,20 @@ use super::scalar_and_i256_conversions::convert_i256_to_scalar; use crate::{ - base::{database::Column, math::decimal::Precision, scalar::Scalar}, + base::{ + database::Column, + math::decimal::Precision, + scalar::Scalar, + time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, + }, sql::parse::ConversionError, }; use arrow::{ array::{ Array, ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, - Int64Array, StringArray, + Int64Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, }, - datatypes::{i256, DataType}, + datatypes::{i256, DataType, TimeUnit as ArrowTimeUnit}, }; use bumpalo::Bump; use std::ops::Range; @@ -32,6 +38,9 @@ pub enum ArrowArrayToColumnConversionError { /// Variant for conversion errors #[error("conversion error: {0}")] ConversionError(#[from] ConversionError), + /// Variant for timezone conversion errors, i.e. invalid timezone + #[error("Timezone conversion failed: {0}")] + TimezoneConversionError(String), } /// This trait is used to provide utility functions to convert ArrayRefs into proof types (Column, Scalars, etc.) @@ -247,6 +256,61 @@ impl ArrayRefExt for ArrayRef { )) } } + // Handle all possible TimeStamp TimeUnit instances + DataType::Timestamp(time_unit, tz) => match time_unit { + ArrowTimeUnit::Second => { + if let Some(array) = self.as_any().downcast_ref::() { + Ok(Column::Timestamp( + ProofsTimeUnit::Second, + ProofsTimeZone::try_from(tz.clone())?, + array.values(), + )) + } else { + Err(ArrowArrayToColumnConversionError::UnsupportedType( + self.data_type().clone(), + )) + } + } + ArrowTimeUnit::Millisecond => { + if let Some(array) = self.as_any().downcast_ref::() { + Ok(Column::Timestamp( + ProofsTimeUnit::Millisecond, + ProofsTimeZone::try_from(tz.clone())?, + array.values(), + )) + } else { + Err(ArrowArrayToColumnConversionError::UnsupportedType( + self.data_type().clone(), + )) + } + } + ArrowTimeUnit::Microsecond => { + if let Some(array) = self.as_any().downcast_ref::() { + Ok(Column::Timestamp( + ProofsTimeUnit::Microsecond, + ProofsTimeZone::try_from(tz.clone())?, + array.values(), + )) + } else { + Err(ArrowArrayToColumnConversionError::UnsupportedType( + self.data_type().clone(), + )) + } + } + ArrowTimeUnit::Nanosecond => { + if let Some(array) = self.as_any().downcast_ref::() { + Ok(Column::Timestamp( + ProofsTimeUnit::Nanosecond, + ProofsTimeZone::try_from(tz.clone())?, + array.values(), + )) + } else { + Err(ArrowArrayToColumnConversionError::UnsupportedType( + self.data_type().clone(), + )) + } + } + }, DataType::Utf8 => { if let Some(array) = self.as_any().downcast_ref::() { let vals = alloc diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index 0fa4a84f4..b2c4e8a90 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -41,7 +41,7 @@ pub enum Column<'a, S: Scalar> { VarChar((&'a [&'a str], &'a [S])), /// Timestamp columns /// - the first element maps to the stored [`TimeUnit`] - /// - the second element maps to an optional timezone as a string + /// - the second element maps to a timezone /// - the third element maps to columns of timeunits since unix epoch Timestamp(ProofsTimeUnit, ProofsTimeZone, &'a [i64]), } @@ -300,7 +300,7 @@ impl TryFrom for ColumnType { let timezone = match timezone_option { Some(tz_arc) => { - let tz_str = &*tz_arc; // Deref Arc to &str + let tz_str = &*tz_arc; // Dereference Arc to &str chrono_tz::Tz::from_str(tz_str) .map_err(|_| format!("Invalid timezone string: {}", tz_str))? } @@ -309,7 +309,7 @@ impl TryFrom for ColumnType { Ok(ColumnType::Timestamp( custom_time_unit, - ProofsTimeZone(timezone), + ProofsTimeZone::from(timezone), )) } DataType::Utf8 => Ok(ColumnType::VarChar), diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs index 29cda7a18..0d2e6b721 100644 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs @@ -119,7 +119,7 @@ pub fn make_random_test_accessor_data( ColumnType::Timestamp(tu, tz) => { column_fields.push(Field::new( *col_name, - DataType::Timestamp(TimeUnit::from(*tu), Some(Arc::from(tz.0.name()))), + DataType::Timestamp(TimeUnit::from(*tu), Some(Arc::from(tz.to_string()))), false, )); diff --git a/crates/proof-of-sql/src/base/scalar/mod.rs b/crates/proof-of-sql/src/base/scalar/mod.rs index 074b0c16b..9ff4f56b2 100644 --- a/crates/proof-of-sql/src/base/scalar/mod.rs +++ b/crates/proof-of-sql/src/base/scalar/mod.rs @@ -43,7 +43,6 @@ pub trait Scalar: + for<'a> std::convert::From<&'a i16> // Required for `Column` to implement `MultilinearExtension` + for<'a> std::convert::From<&'a i32> // Required for `Column` to implement `MultilinearExtension` + for<'a> std::convert::From<&'a i64> // Required for `Column` to implement `MultilinearExtension` - + for<'a> std::convert::From<&'a u64> // Required for `Column` to implement `MultilinearExtension` + for<'a> std::convert::From<&'a i128> // Required for `Column` to implement `MultilinearExtension` + std::convert::TryInto + std::convert::TryInto diff --git a/crates/proof-of-sql/src/base/time/timestamp.rs b/crates/proof-of-sql/src/base/time/timestamp.rs index 613ed9688..7dd4384df 100644 --- a/crates/proof-of-sql/src/base/time/timestamp.rs +++ b/crates/proof-of-sql/src/base/time/timestamp.rs @@ -1,3 +1,4 @@ +use crate::base::database::ArrowArrayToColumnConversionError; use arrow::datatypes::TimeUnit as ArrowTimeUnit; use chrono_tz::Tz; use core::fmt; @@ -11,27 +12,8 @@ pub struct Timestamp { timezone: Tz, } -#[derive(Debug, Clone, Copy, Eq, PartialEq, Deserialize, Serialize, Hash)] -pub enum ProofsTimeUnit { - Second, - Millisecond, - Microsecond, - Nanosecond, -} - -impl fmt::Display for ProofsTimeUnit { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - ProofsTimeUnit::Second => write!(f, "Second"), - ProofsTimeUnit::Millisecond => write!(f, "Millisecond"), - ProofsTimeUnit::Microsecond => write!(f, "Microsecond"), - ProofsTimeUnit::Nanosecond => write!(f, "Nanosecond"), - } - } -} - #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize)] -pub struct ProofsTimeZone(pub Tz); +pub struct ProofsTimeZone(Tz); impl fmt::Display for ProofsTimeZone { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -50,6 +32,25 @@ impl From for ArrowTimeUnit { } } +#[derive(Debug, Clone, Copy, Eq, PartialEq, Deserialize, Serialize, Hash)] +pub enum ProofsTimeUnit { + Second, + Millisecond, + Microsecond, + Nanosecond, +} + +impl fmt::Display for ProofsTimeUnit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ProofsTimeUnit::Second => write!(f, "Second"), + ProofsTimeUnit::Millisecond => write!(f, "Millisecond"), + ProofsTimeUnit::Microsecond => write!(f, "Microsecond"), + ProofsTimeUnit::Nanosecond => write!(f, "Nanosecond"), + } + } +} + impl From for ProofsTimeUnit { fn from(unit: ArrowTimeUnit) -> Self { match unit { @@ -61,12 +62,12 @@ impl From for ProofsTimeUnit { } } -impl TryFrom> for ProofsTimeZone { - type Error = &'static str; +impl TryFrom>> for ProofsTimeZone { + type Error = &'static str; // Explicitly state the error type - fn try_from(value: Option<&str>) -> Result { + fn try_from(value: Option>) -> Result { match value { - Some(tz_str) => Tz::from_str(tz_str) + Some(arc_str) => Tz::from_str(&arc_str) .map(ProofsTimeZone) .map_err(|_| "Invalid timezone string"), None => Ok(ProofsTimeZone(Tz::UTC)), // Default to UTC @@ -80,17 +81,32 @@ impl From<&ProofsTimeZone> for Arc { } } +impl From<&'static str> for ArrowArrayToColumnConversionError { + fn from(error: &'static str) -> Self { + ArrowArrayToColumnConversionError::TimezoneConversionError(error.to_string()) + } +} + +impl From for ProofsTimeZone { + fn from(tz: Tz) -> Self { + ProofsTimeZone(tz) + } +} + #[cfg(test)] mod tests { use super::*; use chrono_tz::Tz; #[test] - fn we_can_convert_valid_timezones() { - let examples = ["Europe/London", "America/New_York", "Asia/Tokyo", "UTC"]; - - for &tz_str in &examples { - let timezone = ProofsTimeZone::try_from(Some(tz_str)); + fn valid_timezones_convert_correctly() { + let valid_timezones = ["Europe/London", "America/New_York", "Asia/Tokyo", "UTC"]; + + for tz_str in &valid_timezones { + let arc_tz = Arc::new(tz_str.to_string()); + // Convert Arc to Arc by dereferencing to &str then creating a new Arc + let arc_tz_str: Arc = Arc::from(&**arc_tz); + let timezone = ProofsTimeZone::try_from(Some(arc_tz_str)); assert!(timezone.is_ok(), "Timezone should be valid: {}", tz_str); assert_eq!( timezone.unwrap().0, @@ -102,22 +118,40 @@ mod tests { } #[test] - fn we_cannot_convert_invalid_timezones() { - let invalid_tz_str = "Not/A_TimeZone"; - let result = ProofsTimeZone::try_from(Some(invalid_tz_str)); + fn test_edge_timezone_strings() { + let edge_timezones = ["Etc/GMT+12", "Etc/GMT-14", "America/Argentina/Ushuaia"]; + for tz_str in &edge_timezones { + let arc_tz = Arc::from(*tz_str); + let result = ProofsTimeZone::try_from(Some(arc_tz)); + assert!(result.is_ok(), "Edge timezone should be valid: {}", tz_str); + assert_eq!( + result.unwrap().0, + Tz::from_str(tz_str).unwrap(), + "Mismatch for edge timezone {}", + tz_str + ); + } + } + + #[test] + fn test_empty_timezone_string() { + let empty_tz = Arc::from(""); + let result = ProofsTimeZone::try_from(Some(empty_tz)); + assert!(result.is_err(), "Empty timezone string should fail"); + } + + #[test] + fn test_unicode_timezone_strings() { + let unicode_tz = Arc::from("Europe/Paris\u{00A0}"); // Non-breaking space character + let result = ProofsTimeZone::try_from(Some(unicode_tz)); assert!( result.is_err(), - "Should return an error for invalid timezones" - ); - assert_eq!( - result.unwrap_err(), - "Invalid timezone string", - "Error message should indicate invalid timezone string" + "Unicode characters should not be valid in timezone strings" ); } #[test] - fn we_can_get_utc_with_none_timezone() { + fn test_null_option() { let result = ProofsTimeZone::try_from(None); assert!(result.is_ok(), "None should convert without error"); assert_eq!(result.unwrap().0, Tz::UTC, "None should default to UTC"); From aae72a7be86b96b873116e2b3d5e77b44222d411 Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Mon, 17 Jun 2024 23:10:07 -0700 Subject: [PATCH 06/24] feat: support TimeStamp --- .../src/base/commitment/column_bounds.rs | 3 + .../commitment/column_commitment_metadata.rs | 5 ++ .../src/base/commitment/committable_column.rs | 5 +- .../arrow_array_to_column_conversion.rs | 18 ++++ .../proof-of-sql/src/base/database/column.rs | 4 +- .../src/base/database/literal_value.rs | 12 ++- .../database/owned_and_arrow_conversions.rs | 76 ++++++++++++++++- .../src/base/database/owned_column.rs | 1 + .../src/base/database/owned_table_utility.rs | 37 +++++++- .../base/database/test_accessor_utility.rs | 1 - crates/proof-of-sql/src/base/mod.rs | 3 +- crates/proof-of-sql/src/base/time/mod.rs | 3 +- .../proof-of-sql/src/base/time/timestamp.rs | 85 ++++++++++++------- 13 files changed, 214 insertions(+), 39 deletions(-) diff --git a/crates/proof-of-sql/src/base/commitment/column_bounds.rs b/crates/proof-of-sql/src/base/commitment/column_bounds.rs index 672b280fd..76619e4ea 100644 --- a/crates/proof-of-sql/src/base/commitment/column_bounds.rs +++ b/crates/proof-of-sql/src/base/commitment/column_bounds.rs @@ -246,6 +246,9 @@ impl ColumnBounds { (ColumnBounds::BigInt(bounds_a), ColumnBounds::BigInt(bounds_b)) => { Ok(ColumnBounds::BigInt(bounds_a.union(bounds_b))) } + (ColumnBounds::Timestamp(bounds_a), ColumnBounds::Timestamp(bounds_b)) => { + Ok(ColumnBounds::Timestamp(bounds_a.union(bounds_b))) + } (ColumnBounds::Int128(bounds_a), ColumnBounds::Int128(bounds_b)) => { Ok(ColumnBounds::Int128(bounds_a.union(bounds_b))) } diff --git a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs index 6210c96d3..dacad37c3 100644 --- a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs +++ b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs @@ -40,6 +40,7 @@ impl ColumnCommitmentMetadata { | (ColumnType::Int, ColumnBounds::Int(_)) | (ColumnType::BigInt, ColumnBounds::BigInt(_)) | (ColumnType::Int128, ColumnBounds::Int128(_)) + | (ColumnType::Timestamp(_, _), ColumnBounds::Timestamp(_)) | ( ColumnType::Boolean | ColumnType::VarChar @@ -72,6 +73,10 @@ impl ColumnCommitmentMetadata { BoundsInner::try_new(i64::MIN, i64::MAX) .expect("i64::MIN and i64::MAX are valid bounds for BigInt"), )), + ColumnType::Timestamp(_, _) => ColumnBounds::Timestamp(super::Bounds::Bounded( + BoundsInner::try_new(i64::MIN, i64::MAX) + .expect("i64::MIN and i64::MAX are valid bounds for TimeStamp"), + )), ColumnType::Int128 => ColumnBounds::Int128(super::Bounds::Bounded( BoundsInner::try_new(i128::MIN, i128::MAX) .expect("i128::MIN and i128::MAX are valid bounds for Int128"), diff --git a/crates/proof-of-sql/src/base/commitment/committable_column.rs b/crates/proof-of-sql/src/base/commitment/committable_column.rs index 71e0fa35d..9c47c3eb2 100644 --- a/crates/proof-of-sql/src/base/commitment/committable_column.rs +++ b/crates/proof-of-sql/src/base/commitment/committable_column.rs @@ -38,7 +38,7 @@ pub enum CommittableColumn<'a> { Scalar(Vec<[u64; 4]>), /// Column of limbs for committing to scalars, hashed from a VarChar column. VarChar(Vec<[u64; 4]>), - /// Borrowed Timestamp column, mapped to `u64`. + /// Borrowed Timestamp column, mapped to `i64`. Timestamp(ProofsTimeUnit, ProofsTimeZone, &'a [i64]), } @@ -149,11 +149,14 @@ impl<'a> From<&'a [i32]> for CommittableColumn<'a> { CommittableColumn::Int(value) } } + +// TODO: make sure this does not conflict with TimeStamp impl<'a> From<&'a [i64]> for CommittableColumn<'a> { fn from(value: &'a [i64]) -> Self { CommittableColumn::BigInt(value) } } + impl<'a> From<&'a [i128]> for CommittableColumn<'a> { fn from(value: &'a [i128]) -> Self { CommittableColumn::Int128(value) diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs index 9ff069ffa..b730af3c1 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs @@ -136,6 +136,24 @@ impl ArrayRefExt for ArrayRef { }) .collect() }), + DataType::Timestamp(time_unit, _) => match time_unit { + ArrowTimeUnit::Second => self + .as_any() + .downcast_ref::() + .map(|array| array.values().iter().map(|v| Ok((*v).into())).collect()), + ArrowTimeUnit::Millisecond => self + .as_any() + .downcast_ref::() + .map(|array| array.values().iter().map(|v| Ok((*v).into())).collect()), + ArrowTimeUnit::Microsecond => self + .as_any() + .downcast_ref::() + .map(|array| array.values().iter().map(|v| Ok((*v).into())).collect()), + ArrowTimeUnit::Nanosecond => self + .as_any() + .downcast_ref::() + .map(|array| array.values().iter().map(|v| Ok((*v).into())).collect()), + }, _ => None, }; diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index b2c4e8a90..97a2b7843 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -111,6 +111,9 @@ impl<'a, S: Scalar> Column<'a, S> { *scale, alloc.alloc_slice_fill_copy(length, *value), ), + LiteralValue::TimeStamp(tu, tz, value) => { + Column::Timestamp(*tu, *tz, alloc.alloc_slice_fill_copy(length, *value)) + } LiteralValue::VarChar((string, scalar)) => Column::VarChar(( alloc.alloc_slice_fill_with(length, |_| alloc.alloc_str(string) as &str), alloc.alloc_slice_fill_copy(length, *scalar), @@ -306,7 +309,6 @@ impl TryFrom for ColumnType { } None => chrono_tz::Tz::UTC, // Default to UTC if None }; - Ok(ColumnType::Timestamp( custom_time_unit, ProofsTimeZone::from(timezone), diff --git a/crates/proof-of-sql/src/base/database/literal_value.rs b/crates/proof-of-sql/src/base/database/literal_value.rs index 3d9345685..205a66614 100644 --- a/crates/proof-of-sql/src/base/database/literal_value.rs +++ b/crates/proof-of-sql/src/base/database/literal_value.rs @@ -1,4 +1,9 @@ -use crate::base::{database::ColumnType, math::decimal::Precision, scalar::Scalar}; +use crate::base::{ + database::ColumnType, + math::decimal::Precision, + scalar::Scalar, + time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, +}; use serde::{Deserialize, Serialize}; /// Represents a literal value. @@ -29,6 +34,9 @@ pub enum LiteralValue { Decimal75(Precision, i8, S), /// Scalar literals Scalar(S), + /// TimeStamp defined over a unit (s, ms, ns, etc) and timezone with backing store + /// mapped to i64, which is time units since unix epoch + TimeStamp(ProofsTimeUnit, ProofsTimeZone, i64), } impl LiteralValue { @@ -43,6 +51,7 @@ impl LiteralValue { Self::Int128(_) => ColumnType::Int128, Self::Scalar(_) => ColumnType::Scalar, Self::Decimal75(precision, scale, _) => ColumnType::Decimal75(*precision, *scale), + Self::TimeStamp(tu, tz, _) => ColumnType::Timestamp(*tu, *tz), } } @@ -57,6 +66,7 @@ impl LiteralValue { Self::Int128(i) => i.into(), Self::Decimal75(_, _, s) => *s, Self::Scalar(scalar) => *scalar, + Self::TimeStamp(_, _, time) => time.into(), } } } diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs index 4465671a3..210f89584 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs @@ -20,7 +20,7 @@ use crate::base::{ }, math::decimal::Precision, scalar::Scalar, - time::timestamp::ProofsTimeUnit, + time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, }; use arrow::{ array::{ @@ -28,7 +28,7 @@ use arrow::{ Int64Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, }, - datatypes::{i256, DataType, Schema, SchemaRef}, + datatypes::{i256, DataType, Schema, SchemaRef, TimeUnit as ArrowTimeUnit}, error::ArrowError, record_batch::RecordBatch, }; @@ -56,6 +56,12 @@ pub enum OwnedArrowConversionError { /// This error occurs when trying to convert from an Arrow array with nulls. #[error("null values are not supported in OwnedColumn yet")] NullNotSupportedYet, + /// This error occurs when trying to convert from an unsupported timestamp unit. + #[error("unsupported timestamp unit: {0}")] + UnsupportedTimestampUnit(String), + /// This error occurs when trying to convert from an invalid timezone string. + #[error("invalid timezone string: {0}")] + InvalidTimezone(String), // New error variant for timezone strings } impl From> for ArrayRef { @@ -182,6 +188,72 @@ impl TryFrom<&ArrayRef> for OwnedColumn { .map(|s| s.unwrap().to_string()) .collect(), )), + DataType::Timestamp(time_unit, timezone) => match time_unit { + ArrowTimeUnit::Second => { + let array = value + .as_any() + .downcast_ref::() + .ok_or_else(|| { + OwnedArrowConversionError::UnsupportedTimestampUnit( + "Second".to_string(), + ) + })?; + let timestamps = array.values().iter().copied().collect::>(); + Ok(OwnedColumn::Timestamp( + ProofsTimeUnit::Second, + ProofsTimeZone::try_from(timezone.clone())?, + timestamps, + )) + } + ArrowTimeUnit::Millisecond => { + let array = value + .as_any() + .downcast_ref::() + .ok_or_else(|| { + OwnedArrowConversionError::UnsupportedTimestampUnit( + "Millisecond".to_string(), + ) + })?; + let timestamps = array.values().iter().copied().collect::>(); + Ok(OwnedColumn::Timestamp( + ProofsTimeUnit::Millisecond, + ProofsTimeZone::try_from(timezone.clone())?, + timestamps, + )) + } + ArrowTimeUnit::Microsecond => { + let array = value + .as_any() + .downcast_ref::() + .ok_or_else(|| { + OwnedArrowConversionError::UnsupportedTimestampUnit( + "Microsecond".to_string(), + ) + })?; + let timestamps = array.values().iter().copied().collect::>(); + Ok(OwnedColumn::Timestamp( + ProofsTimeUnit::Microsecond, + ProofsTimeZone::try_from(timezone.clone())?, + timestamps, + )) + } + ArrowTimeUnit::Nanosecond => { + let array = value + .as_any() + .downcast_ref::() + .ok_or_else(|| { + OwnedArrowConversionError::UnsupportedTimestampUnit( + "Nanosecond".to_string(), + ) + })?; + let timestamps = array.values().iter().copied().collect::>(); + Ok(OwnedColumn::Timestamp( + ProofsTimeUnit::Nanosecond, + ProofsTimeZone::try_from(timezone.clone())?, + timestamps, + )) + } + }, &data_type => Err(OwnedArrowConversionError::UnsupportedType( data_type.clone(), )), diff --git a/crates/proof-of-sql/src/base/database/owned_column.rs b/crates/proof-of-sql/src/base/database/owned_column.rs index 1b9190860..e4f5dccb2 100644 --- a/crates/proof-of-sql/src/base/database/owned_column.rs +++ b/crates/proof-of-sql/src/base/database/owned_column.rs @@ -94,6 +94,7 @@ impl FromIterator for OwnedColumn { Self::Int(Vec::from_iter(iter)) } } +// TODO: does this conflict with TimeStamp? impl FromIterator for OwnedColumn { fn from_iter>(iter: T) -> Self { Self::BigInt(Vec::from_iter(iter)) diff --git a/crates/proof-of-sql/src/base/database/owned_table_utility.rs b/crates/proof-of-sql/src/base/database/owned_table_utility.rs index bf36acbbf..8ab2833b3 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_utility.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_utility.rs @@ -14,7 +14,10 @@ //! ]); //! ``` use super::{OwnedColumn, OwnedTable}; -use crate::base::scalar::Scalar; +use crate::base::{ + scalar::Scalar, + time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, +}; use core::ops::Deref; use proof_of_sql_parser::Identifier; @@ -195,3 +198,35 @@ pub fn decimal75( ), ) } + +/// Creates a (Identifier, OwnedColumn) pair for a timestamp column. +/// This is primarily intended for use in conjunction with [owned_table]. +/// +/// # Parameters +/// - `name`: The name of the column. +/// - `time_unit`: The time unit of the timestamps. +/// - `timezone`: The timezone for the timestamps. +/// - `data`: The data for the column, provided as an iterator over `i64` values representing time since the unix epoch. +/// +/// # Example +/// ``` +/// use proof_of_sql::base::{database::owned_table_utility::*, +/// scalar::Curve25519Scalar, +/// time::timestamp::{ProofsTimeUnit, ProofsTimeZone}}; +/// use chrono_tz::Europe::London; +/// +/// let result = owned_table::([ +/// timestamp("event_time", ProofsTimeUnit::Second, ProofsTimeZone::new(London), vec![1625072400, 1625076000, 1625079600]), +/// ]); +/// ``` +pub fn timestamp( + name: impl Deref, + time_unit: ProofsTimeUnit, + timezone: ProofsTimeZone, + data: impl IntoIterator, +) -> (Identifier, OwnedColumn) { + ( + name.parse().unwrap(), + OwnedColumn::Timestamp(time_unit, timezone, data.into_iter().collect()), + ) +} diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs index 0d2e6b721..abd740adf 100644 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs @@ -122,7 +122,6 @@ pub fn make_random_test_accessor_data( DataType::Timestamp(TimeUnit::from(*tu), Some(Arc::from(tz.to_string()))), false, )); - // Create the correct timestamp array based on the time unit let timestamp_array: Arc = match tu { ProofsTimeUnit::Second => Arc::new(TimestampSecondArray::from(values.to_vec())), diff --git a/crates/proof-of-sql/src/base/mod.rs b/crates/proof-of-sql/src/base/mod.rs index 893c22b06..72443e8fd 100644 --- a/crates/proof-of-sql/src/base/mod.rs +++ b/crates/proof-of-sql/src/base/mod.rs @@ -11,4 +11,5 @@ pub mod scalar; mod serialize; pub(crate) use serialize::{impl_serde_for_ark_serde_checked, impl_serde_for_ark_serde_unchecked}; pub(crate) mod slice_ops; -pub(crate) mod time; +/// Stores all functionality relelvant to time +pub mod time; diff --git a/crates/proof-of-sql/src/base/time/mod.rs b/crates/proof-of-sql/src/base/time/mod.rs index 9bef4a59b..2775c0048 100644 --- a/crates/proof-of-sql/src/base/time/mod.rs +++ b/crates/proof-of-sql/src/base/time/mod.rs @@ -1 +1,2 @@ -pub(crate) mod timestamp; +/// Stores all functionality relelvant to timestamps +pub mod timestamp; diff --git a/crates/proof-of-sql/src/base/time/timestamp.rs b/crates/proof-of-sql/src/base/time/timestamp.rs index 7dd4384df..09802e9eb 100644 --- a/crates/proof-of-sql/src/base/time/timestamp.rs +++ b/crates/proof-of-sql/src/base/time/timestamp.rs @@ -1,10 +1,13 @@ -use crate::base::database::ArrowArrayToColumnConversionError; +use crate::base::database::{ArrowArrayToColumnConversionError, OwnedArrowConversionError}; use arrow::datatypes::TimeUnit as ArrowTimeUnit; use chrono_tz::Tz; use core::fmt; use serde::{Deserialize, Serialize}; use std::{str::FromStr, sync::Arc}; +/// A postgresql-like `TimeStamp` type. It is defined over +/// a [`TimeUnit`], which is a signed count of units either +/// after or before the [Unix epoch](https://en.wikipedia.org/wiki/Unix_time). #[derive(Debug, Clone, Deserialize, Serialize, Hash)] pub struct Timestamp { time: i64, @@ -12,34 +15,75 @@ pub struct Timestamp { timezone: Tz, } +/// A typed TimeZone for a [`TimeStamp`]. It is optionally +/// used to define a timezone other than UTC for a new TimeStamp. +/// It exists as a wrapper around chrono-tz because chrono-tz does +/// not implement uniform bit distribution #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize)] pub struct ProofsTimeZone(Tz); +impl ProofsTimeZone { + /// Create a new ProofsTimeZone from a chrono TimeZone + pub fn new(tz: Tz) -> Self { + ProofsTimeZone(tz) + } +} + +impl From<&ProofsTimeZone> for Arc { + fn from(timezone: &ProofsTimeZone) -> Self { + Arc::from(timezone.0.name()) + } +} + +impl From for ProofsTimeZone { + fn from(tz: Tz) -> Self { + ProofsTimeZone(tz) + } +} + impl fmt::Display for ProofsTimeZone { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.0) } } -impl From for ArrowTimeUnit { - fn from(unit: ProofsTimeUnit) -> Self { - match unit { - ProofsTimeUnit::Second => ArrowTimeUnit::Second, - ProofsTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, - ProofsTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, - ProofsTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, +impl TryFrom>> for ProofsTimeZone { + type Error = &'static str; // Explicitly state the error type + + fn try_from(value: Option>) -> Result { + match value { + Some(arc_str) => Tz::from_str(&arc_str) + .map(ProofsTimeZone) + .map_err(|_| "Invalid timezone string"), + None => Ok(ProofsTimeZone(Tz::UTC)), // Default to UTC } } } +/// Specifies different units of time measurement relative to the Unix epoch. #[derive(Debug, Clone, Copy, Eq, PartialEq, Deserialize, Serialize, Hash)] pub enum ProofsTimeUnit { + /// Represents a time unit of one second. Second, + /// Represents a time unit of one millisecond (1/1,000 of a second). Millisecond, + /// Represents a time unit of one microsecond (1/1,000,000 of a second). Microsecond, + /// Represents a time unit of one nanosecond (1/1,000,000,000 of a second). Nanosecond, } +impl From for ArrowTimeUnit { + fn from(unit: ProofsTimeUnit) -> Self { + match unit { + ProofsTimeUnit::Second => ArrowTimeUnit::Second, + ProofsTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, + ProofsTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, + ProofsTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, + } + } +} + impl fmt::Display for ProofsTimeUnit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -62,22 +106,9 @@ impl From for ProofsTimeUnit { } } -impl TryFrom>> for ProofsTimeZone { - type Error = &'static str; // Explicitly state the error type - - fn try_from(value: Option>) -> Result { - match value { - Some(arc_str) => Tz::from_str(&arc_str) - .map(ProofsTimeZone) - .map_err(|_| "Invalid timezone string"), - None => Ok(ProofsTimeZone(Tz::UTC)), // Default to UTC - } - } -} - -impl From<&ProofsTimeZone> for Arc { - fn from(timezone: &ProofsTimeZone) -> Self { - Arc::from(timezone.0.name()) +impl From<&'static str> for OwnedArrowConversionError { + fn from(error: &'static str) -> Self { + OwnedArrowConversionError::InvalidTimezone(error.to_string()) } } @@ -87,12 +118,6 @@ impl From<&'static str> for ArrowArrayToColumnConversionError { } } -impl From for ProofsTimeZone { - fn from(tz: Tz) -> Self { - ProofsTimeZone(tz) - } -} - #[cfg(test)] mod tests { use super::*; From a1870fdc81e4cdf1c762a366c14c12d74391bee8 Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Mon, 17 Jun 2024 23:34:50 -0700 Subject: [PATCH 07/24] fix: rename to align with postgres --- .../src/base/commitment/column_bounds.rs | 10 +++---- .../commitment/column_commitment_metadata.rs | 4 +-- .../src/base/commitment/committable_column.rs | 14 +++++----- .../arrow_array_to_column_conversion.rs | 8 +++--- .../proof-of-sql/src/base/database/column.rs | 26 ++++++++++--------- .../src/base/database/literal_value.rs | 6 ++--- .../database/owned_and_arrow_conversions.rs | 10 +++---- .../src/base/database/owned_column.rs | 8 +++--- .../database/owned_table_test_accessor.rs | 2 +- .../src/base/database/owned_table_utility.rs | 4 +-- .../base/database/test_accessor_utility.rs | 2 +- .../base/polynomial/multilinear_extension.rs | 8 +++--- .../proof-of-sql/src/base/time/timestamp.rs | 2 +- .../dory/dory_commitment_helper_cpu.rs | 2 +- .../dory/dory_commitment_helper_gpu.rs | 2 +- .../src/sql/ast/dense_filter_util.rs | 2 +- .../src/sql/ast/filter_result_expr.rs | 2 +- .../proof-of-sql/src/sql/ast/group_by_util.rs | 4 +-- .../src/sql/proof/provable_query_result.rs | 6 ++--- .../src/sql/proof/provable_result_column.rs | 4 +-- .../src/sql/proof/verifiable_query_result.rs | 2 +- 21 files changed, 65 insertions(+), 63 deletions(-) diff --git a/crates/proof-of-sql/src/base/commitment/column_bounds.rs b/crates/proof-of-sql/src/base/commitment/column_bounds.rs index 76619e4ea..8b44e8d9f 100644 --- a/crates/proof-of-sql/src/base/commitment/column_bounds.rs +++ b/crates/proof-of-sql/src/base/commitment/column_bounds.rs @@ -208,7 +208,7 @@ pub enum ColumnBounds { /// The bounds of an Int128 column. Int128(Bounds), /// The bounds of a Timestamp column. - Timestamp(Bounds), + TimestampTZ(Bounds), } impl ColumnBounds { @@ -221,8 +221,8 @@ impl ColumnBounds { CommittableColumn::Int(ints) => ColumnBounds::Int(Bounds::from_iter(*ints)), CommittableColumn::BigInt(ints) => ColumnBounds::BigInt(Bounds::from_iter(*ints)), CommittableColumn::Int128(ints) => ColumnBounds::Int128(Bounds::from_iter(*ints)), - CommittableColumn::Timestamp(_, _, times) => { - ColumnBounds::Timestamp(Bounds::from_iter(*times)) + CommittableColumn::TimestampTZ(_, _, times) => { + ColumnBounds::TimestampTZ(Bounds::from_iter(*times)) } CommittableColumn::Boolean(_) | CommittableColumn::Decimal75(_, _, _) @@ -246,8 +246,8 @@ impl ColumnBounds { (ColumnBounds::BigInt(bounds_a), ColumnBounds::BigInt(bounds_b)) => { Ok(ColumnBounds::BigInt(bounds_a.union(bounds_b))) } - (ColumnBounds::Timestamp(bounds_a), ColumnBounds::Timestamp(bounds_b)) => { - Ok(ColumnBounds::Timestamp(bounds_a.union(bounds_b))) + (ColumnBounds::TimestampTZ(bounds_a), ColumnBounds::TimestampTZ(bounds_b)) => { + Ok(ColumnBounds::TimestampTZ(bounds_a.union(bounds_b))) } (ColumnBounds::Int128(bounds_a), ColumnBounds::Int128(bounds_b)) => { Ok(ColumnBounds::Int128(bounds_a.union(bounds_b))) diff --git a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs index dacad37c3..a9473ed53 100644 --- a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs +++ b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs @@ -40,7 +40,7 @@ impl ColumnCommitmentMetadata { | (ColumnType::Int, ColumnBounds::Int(_)) | (ColumnType::BigInt, ColumnBounds::BigInt(_)) | (ColumnType::Int128, ColumnBounds::Int128(_)) - | (ColumnType::Timestamp(_, _), ColumnBounds::Timestamp(_)) + | (ColumnType::TimestampTZ(_, _), ColumnBounds::TimestampTZ(_)) | ( ColumnType::Boolean | ColumnType::VarChar @@ -73,7 +73,7 @@ impl ColumnCommitmentMetadata { BoundsInner::try_new(i64::MIN, i64::MAX) .expect("i64::MIN and i64::MAX are valid bounds for BigInt"), )), - ColumnType::Timestamp(_, _) => ColumnBounds::Timestamp(super::Bounds::Bounded( + ColumnType::TimestampTZ(_, _) => ColumnBounds::TimestampTZ(super::Bounds::Bounded( BoundsInner::try_new(i64::MIN, i64::MAX) .expect("i64::MIN and i64::MAX are valid bounds for TimeStamp"), )), diff --git a/crates/proof-of-sql/src/base/commitment/committable_column.rs b/crates/proof-of-sql/src/base/commitment/committable_column.rs index 9c47c3eb2..3ce212868 100644 --- a/crates/proof-of-sql/src/base/commitment/committable_column.rs +++ b/crates/proof-of-sql/src/base/commitment/committable_column.rs @@ -38,8 +38,8 @@ pub enum CommittableColumn<'a> { Scalar(Vec<[u64; 4]>), /// Column of limbs for committing to scalars, hashed from a VarChar column. VarChar(Vec<[u64; 4]>), - /// Borrowed Timestamp column, mapped to `i64`. - Timestamp(ProofsTimeUnit, ProofsTimeZone, &'a [i64]), + /// Borrowed Timestamp column with Timezone, mapped to `i64`. + TimestampTZ(ProofsTimeUnit, ProofsTimeZone, &'a [i64]), } impl<'a> CommittableColumn<'a> { @@ -54,7 +54,7 @@ impl<'a> CommittableColumn<'a> { CommittableColumn::Scalar(col) => col.len(), CommittableColumn::VarChar(col) => col.len(), CommittableColumn::Boolean(col) => col.len(), - CommittableColumn::Timestamp(_, _, col) => col.len(), + CommittableColumn::TimestampTZ(_, _, col) => col.len(), } } @@ -82,7 +82,7 @@ impl<'a> From<&CommittableColumn<'a>> for ColumnType { CommittableColumn::Scalar(_) => ColumnType::Scalar, CommittableColumn::VarChar(_) => ColumnType::VarChar, CommittableColumn::Boolean(_) => ColumnType::Boolean, - CommittableColumn::Timestamp(tu, tz, _) => ColumnType::Timestamp(*tu, *tz), + CommittableColumn::TimestampTZ(tu, tz, _) => ColumnType::TimestampTZ(*tu, *tz), } } } @@ -104,7 +104,7 @@ impl<'a, S: Scalar> From<&Column<'a, S>> for CommittableColumn<'a> { let as_limbs: Vec<_> = scalars.iter().map(RefInto::<[u64; 4]>::ref_into).collect(); CommittableColumn::VarChar(as_limbs) } - Column::Timestamp(tu, tz, times) => CommittableColumn::Timestamp(*tu, *tz, times), + Column::TimestampTZ(tu, tz, times) => CommittableColumn::TimestampTZ(*tu, *tz, times), } } } @@ -134,7 +134,7 @@ impl<'a, S: Scalar> From<&'a OwnedColumn> for CommittableColumn<'a> { .map(Into::<[u64; 4]>::into) .collect(), ), - OwnedColumn::Timestamp(_, _, times) => (times as &[_]).into(), + OwnedColumn::TimestampTZ(_, _, times) => (times as &[_]).into(), } } } @@ -185,7 +185,7 @@ impl<'a, 'b> From<&'a CommittableColumn<'b>> for Sequence<'a> { CommittableColumn::Scalar(limbs) => Sequence::from(limbs), CommittableColumn::VarChar(limbs) => Sequence::from(limbs), CommittableColumn::Boolean(bools) => Sequence::from(*bools), - CommittableColumn::Timestamp(_, _, times) => Sequence::from(*times), + CommittableColumn::TimestampTZ(_, _, times) => Sequence::from(*times), } } } diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs index b730af3c1..c24b91918 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs @@ -278,7 +278,7 @@ impl ArrayRefExt for ArrayRef { DataType::Timestamp(time_unit, tz) => match time_unit { ArrowTimeUnit::Second => { if let Some(array) = self.as_any().downcast_ref::() { - Ok(Column::Timestamp( + Ok(Column::TimestampTZ( ProofsTimeUnit::Second, ProofsTimeZone::try_from(tz.clone())?, array.values(), @@ -291,7 +291,7 @@ impl ArrayRefExt for ArrayRef { } ArrowTimeUnit::Millisecond => { if let Some(array) = self.as_any().downcast_ref::() { - Ok(Column::Timestamp( + Ok(Column::TimestampTZ( ProofsTimeUnit::Millisecond, ProofsTimeZone::try_from(tz.clone())?, array.values(), @@ -304,7 +304,7 @@ impl ArrayRefExt for ArrayRef { } ArrowTimeUnit::Microsecond => { if let Some(array) = self.as_any().downcast_ref::() { - Ok(Column::Timestamp( + Ok(Column::TimestampTZ( ProofsTimeUnit::Microsecond, ProofsTimeZone::try_from(tz.clone())?, array.values(), @@ -317,7 +317,7 @@ impl ArrayRefExt for ArrayRef { } ArrowTimeUnit::Nanosecond => { if let Some(array) = self.as_any().downcast_ref::() { - Ok(Column::Timestamp( + Ok(Column::TimestampTZ( ProofsTimeUnit::Nanosecond, ProofsTimeZone::try_from(tz.clone())?, array.values(), diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index 97a2b7843..c99981d86 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -39,11 +39,11 @@ pub enum Column<'a, S: Scalar> { /// - the first element maps to the str values. /// - the second element maps to the str hashes (see [crate::base::scalar::Scalar]). VarChar((&'a [&'a str], &'a [S])), - /// Timestamp columns + /// Timestamp columns with timezone /// - the first element maps to the stored [`TimeUnit`] /// - the second element maps to a timezone /// - the third element maps to columns of timeunits since unix epoch - Timestamp(ProofsTimeUnit, ProofsTimeZone, &'a [i64]), + TimestampTZ(ProofsTimeUnit, ProofsTimeZone, &'a [i64]), } impl<'a, S: Scalar> Column<'a, S> { @@ -58,7 +58,9 @@ impl<'a, S: Scalar> Column<'a, S> { Self::Int128(_) => ColumnType::Int128, Self::Scalar(_) => ColumnType::Scalar, Self::Decimal75(precision, scale, _) => ColumnType::Decimal75(*precision, *scale), - Self::Timestamp(time_unit, timezone, _) => ColumnType::Timestamp(*time_unit, *timezone), + Self::TimestampTZ(time_unit, timezone, _) => { + ColumnType::TimestampTZ(*time_unit, *timezone) + } } } /// Returns the length of the column. @@ -75,7 +77,7 @@ impl<'a, S: Scalar> Column<'a, S> { Self::Int128(col) => col.len(), Self::Scalar(col) => col.len(), Self::Decimal75(_, _, col) => col.len(), - Self::Timestamp(_, _, col) => col.len(), + Self::TimestampTZ(_, _, col) => col.len(), } } /// Returns `true` if the column has no elements. @@ -111,8 +113,8 @@ impl<'a, S: Scalar> Column<'a, S> { *scale, alloc.alloc_slice_fill_copy(length, *value), ), - LiteralValue::TimeStamp(tu, tz, value) => { - Column::Timestamp(*tu, *tz, alloc.alloc_slice_fill_copy(length, *value)) + LiteralValue::TimeStampTZ(tu, tz, value) => { + Column::TimestampTZ(*tu, *tz, alloc.alloc_slice_fill_copy(length, *value)) } LiteralValue::VarChar((string, scalar)) => Column::VarChar(( alloc.alloc_slice_fill_with(length, |_| alloc.alloc_str(string) as &str), @@ -166,7 +168,7 @@ impl<'a, S: Scalar> Column<'a, S> { .par_iter() .map(|s| *s * scale_factor) .collect::>(), - Self::Timestamp(_, _, col) => col + Self::TimestampTZ(_, _, col) => col .par_iter() .map(|i| S::from(i) * scale_factor) .collect::>(), @@ -213,7 +215,7 @@ pub enum ColumnType { Decimal75(Precision, i8), /// Mapped to i64 #[serde(alias = "TIMESTAMP", alias = "timestamp")] - Timestamp(ProofsTimeUnit, ProofsTimeZone), + TimestampTZ(ProofsTimeUnit, ProofsTimeZone), } impl ColumnType { @@ -244,7 +246,7 @@ impl ColumnType { Self::SmallInt => Some(5_u8), Self::Int => Some(10_u8), Self::BigInt => Some(19_u8), - Self::Timestamp(_, _) => Some(19_u8), + Self::TimestampTZ(_, _) => Some(19_u8), Self::Int128 => Some(39_u8), Self::Decimal75(precision, _) => Some(precision.value()), // Scalars are not in database & are only used for typeless comparisons for testing so we return 0 @@ -277,7 +279,7 @@ impl From<&ColumnType> for DataType { } ColumnType::VarChar => DataType::Utf8, ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), - ColumnType::Timestamp(timeunit, timezone) => { + ColumnType::TimestampTZ(timeunit, timezone) => { DataType::Timestamp(ArrowTimeUnit::from(*timeunit), Some(Arc::from(timezone))) } } @@ -309,7 +311,7 @@ impl TryFrom for ColumnType { } None => chrono_tz::Tz::UTC, // Default to UTC if None }; - Ok(ColumnType::Timestamp( + Ok(ColumnType::TimestampTZ( custom_time_unit, ProofsTimeZone::from(timezone), )) @@ -338,7 +340,7 @@ impl std::fmt::Display for ColumnType { } ColumnType::VarChar => write!(f, "VARCHAR"), ColumnType::Scalar => write!(f, "SCALAR"), - ColumnType::Timestamp(timeunit, timezone) => write!( + ColumnType::TimestampTZ(timeunit, timezone) => write!( f, "TIMESTAMP(TIMEUNIT: {:?}, TIMEZONE: {timeunit})", timezone diff --git a/crates/proof-of-sql/src/base/database/literal_value.rs b/crates/proof-of-sql/src/base/database/literal_value.rs index 205a66614..4b99c3c25 100644 --- a/crates/proof-of-sql/src/base/database/literal_value.rs +++ b/crates/proof-of-sql/src/base/database/literal_value.rs @@ -36,7 +36,7 @@ pub enum LiteralValue { Scalar(S), /// TimeStamp defined over a unit (s, ms, ns, etc) and timezone with backing store /// mapped to i64, which is time units since unix epoch - TimeStamp(ProofsTimeUnit, ProofsTimeZone, i64), + TimeStampTZ(ProofsTimeUnit, ProofsTimeZone, i64), } impl LiteralValue { @@ -51,7 +51,7 @@ impl LiteralValue { Self::Int128(_) => ColumnType::Int128, Self::Scalar(_) => ColumnType::Scalar, Self::Decimal75(precision, scale, _) => ColumnType::Decimal75(*precision, *scale), - Self::TimeStamp(tu, tz, _) => ColumnType::Timestamp(*tu, *tz), + Self::TimeStampTZ(tu, tz, _) => ColumnType::TimestampTZ(*tu, *tz), } } @@ -66,7 +66,7 @@ impl LiteralValue { Self::Int128(i) => i.into(), Self::Decimal75(_, _, s) => *s, Self::Scalar(scalar) => *scalar, - Self::TimeStamp(_, _, time) => time.into(), + Self::TimeStampTZ(_, _, time) => time.into(), } } } diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs index 210f89584..74e9d80b4 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs @@ -87,7 +87,7 @@ impl From> for ArrayRef { } OwnedColumn::Scalar(_) => unimplemented!("Cannot convert Scalar type to arrow type"), OwnedColumn::VarChar(col) => Arc::new(StringArray::from(col)), - OwnedColumn::Timestamp(time_unit, _, col) => match time_unit { + OwnedColumn::TimestampTZ(time_unit, _, col) => match time_unit { ProofsTimeUnit::Second => Arc::new(TimestampSecondArray::from(col)), ProofsTimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from(col)), ProofsTimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from(col)), @@ -199,7 +199,7 @@ impl TryFrom<&ArrayRef> for OwnedColumn { ) })?; let timestamps = array.values().iter().copied().collect::>(); - Ok(OwnedColumn::Timestamp( + Ok(OwnedColumn::TimestampTZ( ProofsTimeUnit::Second, ProofsTimeZone::try_from(timezone.clone())?, timestamps, @@ -215,7 +215,7 @@ impl TryFrom<&ArrayRef> for OwnedColumn { ) })?; let timestamps = array.values().iter().copied().collect::>(); - Ok(OwnedColumn::Timestamp( + Ok(OwnedColumn::TimestampTZ( ProofsTimeUnit::Millisecond, ProofsTimeZone::try_from(timezone.clone())?, timestamps, @@ -231,7 +231,7 @@ impl TryFrom<&ArrayRef> for OwnedColumn { ) })?; let timestamps = array.values().iter().copied().collect::>(); - Ok(OwnedColumn::Timestamp( + Ok(OwnedColumn::TimestampTZ( ProofsTimeUnit::Microsecond, ProofsTimeZone::try_from(timezone.clone())?, timestamps, @@ -247,7 +247,7 @@ impl TryFrom<&ArrayRef> for OwnedColumn { ) })?; let timestamps = array.values().iter().copied().collect::>(); - Ok(OwnedColumn::Timestamp( + Ok(OwnedColumn::TimestampTZ( ProofsTimeUnit::Nanosecond, ProofsTimeZone::try_from(timezone.clone())?, timestamps, diff --git a/crates/proof-of-sql/src/base/database/owned_column.rs b/crates/proof-of-sql/src/base/database/owned_column.rs index e4f5dccb2..9240e4702 100644 --- a/crates/proof-of-sql/src/base/database/owned_column.rs +++ b/crates/proof-of-sql/src/base/database/owned_column.rs @@ -29,7 +29,7 @@ pub enum OwnedColumn { /// Scalar columns Scalar(Vec), /// Timestamp columns - Timestamp(ProofsTimeUnit, ProofsTimeZone, Vec), + TimestampTZ(ProofsTimeUnit, ProofsTimeZone, Vec), } impl OwnedColumn { @@ -44,7 +44,7 @@ impl OwnedColumn { OwnedColumn::Int128(col) => col.len(), OwnedColumn::Decimal75(_, _, col) => col.len(), OwnedColumn::Scalar(col) => col.len(), - OwnedColumn::Timestamp(_, _, col) => col.len(), + OwnedColumn::TimestampTZ(_, _, col) => col.len(), } } /// Returns true if the column is empty. @@ -58,7 +58,7 @@ impl OwnedColumn { OwnedColumn::Int128(col) => col.is_empty(), OwnedColumn::Scalar(col) => col.is_empty(), OwnedColumn::Decimal75(_, _, col) => col.is_empty(), - OwnedColumn::Timestamp(_, _, col) => col.is_empty(), + OwnedColumn::TimestampTZ(_, _, col) => col.is_empty(), } } /// Returns the type of the column. @@ -74,7 +74,7 @@ impl OwnedColumn { OwnedColumn::Decimal75(precision, scale, _) => { ColumnType::Decimal75(*precision, *scale) } - OwnedColumn::Timestamp(tu, tz, _) => ColumnType::Timestamp(*tu, *tz), + OwnedColumn::TimestampTZ(tu, tz, _) => ColumnType::TimestampTZ(*tu, *tz), } } } diff --git a/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs b/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs index 4d669aabe..8b88519d2 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_test_accessor.rs @@ -95,7 +95,7 @@ impl DataAccessor for OwnedTableTestA .alloc_slice_fill_iter(col.iter().map(|s| (*s).into())); Column::VarChar((col, scals)) } - OwnedColumn::Timestamp(tu, tz, col) => Column::Timestamp(*tu, *tz, col), + OwnedColumn::TimestampTZ(tu, tz, col) => Column::TimestampTZ(*tu, *tz, col), } } } diff --git a/crates/proof-of-sql/src/base/database/owned_table_utility.rs b/crates/proof-of-sql/src/base/database/owned_table_utility.rs index 8ab2833b3..e0348bc84 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_utility.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_utility.rs @@ -219,7 +219,7 @@ pub fn decimal75( /// timestamp("event_time", ProofsTimeUnit::Second, ProofsTimeZone::new(London), vec![1625072400, 1625076000, 1625079600]), /// ]); /// ``` -pub fn timestamp( +pub fn timestamptz( name: impl Deref, time_unit: ProofsTimeUnit, timezone: ProofsTimeZone, @@ -227,6 +227,6 @@ pub fn timestamp( ) -> (Identifier, OwnedColumn) { ( name.parse().unwrap(), - OwnedColumn::Timestamp(time_unit, timezone, data.into_iter().collect()), + OwnedColumn::TimestampTZ(time_unit, timezone, data.into_iter().collect()), ) } diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs index abd740adf..8538aca92 100644 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs @@ -116,7 +116,7 @@ pub fn make_random_test_accessor_data( columns.push(Arc::new(StringArray::from(col))); } ColumnType::Scalar => unimplemented!("Scalar columns are not supported by arrow"), - ColumnType::Timestamp(tu, tz) => { + ColumnType::TimestampTZ(tu, tz) => { column_fields.push(Field::new( *col_name, DataType::Timestamp(TimeUnit::from(*tu), Some(Arc::from(tz.to_string()))), diff --git a/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs b/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs index 696d81d31..3915636b5 100644 --- a/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs +++ b/crates/proof-of-sql/src/base/polynomial/multilinear_extension.rs @@ -102,7 +102,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => c.inner_product(evaluation_vec), Column::Int128(c) => c.inner_product(evaluation_vec), Column::Decimal75(_, _, c) => c.inner_product(evaluation_vec), - Column::Timestamp(_, _, c) => c.inner_product(evaluation_vec), + Column::TimestampTZ(_, _, c) => c.inner_product(evaluation_vec), } } @@ -116,7 +116,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => c.mul_add(res, multiplier), Column::Int128(c) => c.mul_add(res, multiplier), Column::Decimal75(_, _, c) => c.mul_add(res, multiplier), - Column::Timestamp(_, _, c) => c.mul_add(res, multiplier), + Column::TimestampTZ(_, _, c) => c.mul_add(res, multiplier), } } @@ -130,7 +130,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => c.to_sumcheck_term(num_vars), Column::Int128(c) => c.to_sumcheck_term(num_vars), Column::Decimal75(_, _, c) => c.to_sumcheck_term(num_vars), - Column::Timestamp(_, _, c) => c.to_sumcheck_term(num_vars), + Column::TimestampTZ(_, _, c) => c.to_sumcheck_term(num_vars), } } @@ -144,7 +144,7 @@ impl MultilinearExtension for Column<'_, S> { Column::VarChar((_, c)) => MultilinearExtension::::id(c), Column::Int128(c) => MultilinearExtension::::id(c), Column::Decimal75(_, _, c) => MultilinearExtension::::id(c), - Column::Timestamp(_, _, c) => MultilinearExtension::::id(c), + Column::TimestampTZ(_, _, c) => MultilinearExtension::::id(c), } } } diff --git a/crates/proof-of-sql/src/base/time/timestamp.rs b/crates/proof-of-sql/src/base/time/timestamp.rs index 09802e9eb..4122610fc 100644 --- a/crates/proof-of-sql/src/base/time/timestamp.rs +++ b/crates/proof-of-sql/src/base/time/timestamp.rs @@ -9,7 +9,7 @@ use std::{str::FromStr, sync::Arc}; /// a [`TimeUnit`], which is a signed count of units either /// after or before the [Unix epoch](https://en.wikipedia.org/wiki/Unix_time). #[derive(Debug, Clone, Deserialize, Serialize, Hash)] -pub struct Timestamp { +pub struct TimestampTZ { time: i64, timeunit: ProofsTimeUnit, timezone: Tz, diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs index 88f3be836..59c0ca996 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_cpu.rs @@ -60,7 +60,7 @@ fn compute_dory_commitment( } CommittableColumn::VarChar(column) => compute_dory_commitment_impl(column, offset, setup), CommittableColumn::Boolean(column) => compute_dory_commitment_impl(column, offset, setup), - CommittableColumn::Timestamp(_, _, column) => { + CommittableColumn::TimestampTZ(_, _, column) => { compute_dory_commitment_impl(column, offset, setup) } } diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs index c9a3f06d3..c3eb297eb 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dory_commitment_helper_gpu.rs @@ -280,7 +280,7 @@ fn compute_dory_commitment( CommittableColumn::Scalar(column) => compute_dory_commitment_impl(column, offset, setup), CommittableColumn::VarChar(column) => compute_dory_commitment_impl(column, offset, setup), CommittableColumn::Boolean(column) => compute_dory_commitment_impl(column, offset, setup), - CommittableColumn::Timestamp(_, _, column) => { + CommittableColumn::TimestampTZ(_, _, column) => { compute_dory_commitment_impl(column, offset, setup) } } diff --git a/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs b/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs index d5d81b75b..99804fa14 100644 --- a/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs +++ b/crates/proof-of-sql/src/sql/ast/dense_filter_util.rs @@ -65,7 +65,7 @@ pub fn filter_column_by_index<'a, S: Scalar>( *scale, alloc.alloc_slice_fill_iter(indexes.iter().map(|&i| col[i])), ), - Column::Timestamp(tu, tz, col) => Column::Timestamp( + Column::TimestampTZ(tu, tz, col) => Column::TimestampTZ( *tu, *tz, alloc.alloc_slice_fill_iter(indexes.iter().map(|&i| col[i])), diff --git a/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs b/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs index 7c8b330a7..cbba90066 100644 --- a/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs +++ b/crates/proof-of-sql/src/sql/ast/filter_result_expr.rs @@ -75,7 +75,7 @@ impl FilterResultExpr { Column::Scalar(_col) => todo!(), Column::Decimal75(_, _, col) => prover_evaluate_impl(builder, alloc, selection, col), Column::VarChar((_, scals)) => prover_evaluate_impl(builder, alloc, selection, scals), - Column::Timestamp(_, _, col) => prover_evaluate_impl(builder, alloc, selection, col), + Column::TimestampTZ(_, _, col) => prover_evaluate_impl(builder, alloc, selection, col), }; } diff --git a/crates/proof-of-sql/src/sql/ast/group_by_util.rs b/crates/proof-of-sql/src/sql/ast/group_by_util.rs index 2c206fcd0..b874f128e 100644 --- a/crates/proof-of-sql/src/sql/ast/group_by_util.rs +++ b/crates/proof-of-sql/src/sql/ast/group_by_util.rs @@ -114,7 +114,7 @@ pub(super) fn sum_aggregate_column_by_index_counts<'a, S: Scalar>( } Column::Scalar(col) => sum_aggregate_slice_by_index_counts(alloc, col, counts, indexes), Column::VarChar(_) => unimplemented!("Cannot sum varchar columns"), - Column::Timestamp(_, _, col) => { + Column::TimestampTZ(_, _, col) => { sum_aggregate_slice_by_index_counts(alloc, col, counts, indexes) } } @@ -178,7 +178,7 @@ pub(super) fn compare_indexes_by_columns( Column::Decimal75(_, _, _) => todo!("TODO: unimplemented"), Column::Scalar(col) => col[i].cmp(&col[j]), Column::VarChar((col, _)) => col[i].cmp(col[j]), - Column::Timestamp(_, _, col) => col[i].cmp(&col[j]), + Column::TimestampTZ(_, _, col) => col[i].cmp(&col[j]), }) .find(|&ord| ord != Ordering::Equal) .unwrap_or(Ordering::Equal) diff --git a/crates/proof-of-sql/src/sql/proof/provable_query_result.rs b/crates/proof-of-sql/src/sql/proof/provable_query_result.rs index 455d3ed1f..ad2e1c678 100644 --- a/crates/proof-of-sql/src/sql/proof/provable_query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/provable_query_result.rs @@ -115,7 +115,7 @@ impl ProvableQueryResult { ColumnType::Scalar => decode_and_convert::(&self.data[offset..]), ColumnType::VarChar => decode_and_convert::<&str, S>(&self.data[offset..]), - ColumnType::Timestamp(_, _) => { + ColumnType::TimestampTZ(_, _) => { decode_and_convert::(&self.data[offset..]) } }?; @@ -197,11 +197,11 @@ impl ProvableQueryResult { offset += num_read; Ok((field.name(), OwnedColumn::Decimal75(precision, scale, col))) } - ColumnType::Timestamp(tu, tz) => { + ColumnType::TimestampTZ(tu, tz) => { let (col, num_read) = decode_multiple_elements(&self.data[offset..], n) .ok_or(QueryError::Overflow)?; offset += num_read; - Ok((field.name(), OwnedColumn::Timestamp(tu, tz, col))) + Ok((field.name(), OwnedColumn::TimestampTZ(tu, tz, col))) } }) .collect::>()?, diff --git a/crates/proof-of-sql/src/sql/proof/provable_result_column.rs b/crates/proof-of-sql/src/sql/proof/provable_result_column.rs index 98e66d4e7..c81f02c10 100644 --- a/crates/proof-of-sql/src/sql/proof/provable_result_column.rs +++ b/crates/proof-of-sql/src/sql/proof/provable_result_column.rs @@ -42,7 +42,7 @@ impl ProvableResultColumn for Column<'_, S> { Column::Decimal75(_, _, col) => col.num_bytes(selection), Column::Scalar(col) => col.num_bytes(selection), Column::VarChar((col, _)) => col.num_bytes(selection), - Column::Timestamp(_, _, col) => col.num_bytes(selection), + Column::TimestampTZ(_, _, col) => col.num_bytes(selection), } } @@ -56,7 +56,7 @@ impl ProvableResultColumn for Column<'_, S> { Column::Decimal75(_, _, col) => col.write(out, selection), Column::Scalar(col) => col.write(out, selection), Column::VarChar((col, _)) => col.write(out, selection), - Column::Timestamp(_, _, col) => col.write(out, selection), + Column::TimestampTZ(_, _, col) => col.write(out, selection), } } } diff --git a/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs b/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs index 8883d7580..001cd5a52 100644 --- a/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/verifiable_query_result.rs @@ -163,7 +163,7 @@ fn make_empty_query_result(result_fields: Vec) -> QueryR } ColumnType::Scalar => OwnedColumn::Scalar(vec![]), ColumnType::VarChar => OwnedColumn::VarChar(vec![]), - ColumnType::Timestamp(tu, tz) => OwnedColumn::Timestamp(tu, tz, vec![]), + ColumnType::TimestampTZ(tu, tz) => OwnedColumn::TimestampTZ(tu, tz, vec![]), }, ) }) From 5e0f468cf93a1729bc38d654e67f330abf4872b5 Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Mon, 17 Jun 2024 23:39:12 -0700 Subject: [PATCH 08/24] fix: doctest --- crates/proof-of-sql/src/base/database/owned_table_utility.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/proof-of-sql/src/base/database/owned_table_utility.rs b/crates/proof-of-sql/src/base/database/owned_table_utility.rs index e0348bc84..707cd412b 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_utility.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_utility.rs @@ -216,7 +216,7 @@ pub fn decimal75( /// use chrono_tz::Europe::London; /// /// let result = owned_table::([ -/// timestamp("event_time", ProofsTimeUnit::Second, ProofsTimeZone::new(London), vec![1625072400, 1625076000, 1625079600]), +/// timestamptz("event_time", ProofsTimeUnit::Second, ProofsTimeZone::new(London), vec![1625072400, 1625076000, 1625079600]), /// ]); /// ``` pub fn timestamptz( From 5989ae68c7dc5e3cd02d6312be16322642815219 Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Tue, 18 Jun 2024 08:12:58 -0700 Subject: [PATCH 09/24] fix: breaking change and update to main --- .../proof-of-sql/src/base/database/column.rs | 22 +++++-------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index c99981d86..e789c2974 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -9,7 +9,7 @@ use bumpalo::Bump; use proof_of_sql_parser::Identifier; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use serde::{Deserialize, Serialize}; -use std::{str::FromStr, sync::Arc}; +use std::sync::Arc; /// Represents a read-only view of a column in an in-memory, /// column-oriented database. @@ -300,22 +300,10 @@ impl TryFrom for ColumnType { DataType::Decimal256(precision, scale) if precision <= 75 => { Ok(ColumnType::Decimal75(Precision::new(precision)?, scale)) } - DataType::Timestamp(time_unit, timezone_option) => { - let custom_time_unit = ProofsTimeUnit::from(time_unit); - - let timezone = match timezone_option { - Some(tz_arc) => { - let tz_str = &*tz_arc; // Dereference Arc to &str - chrono_tz::Tz::from_str(tz_str) - .map_err(|_| format!("Invalid timezone string: {}", tz_str))? - } - None => chrono_tz::Tz::UTC, // Default to UTC if None - }; - Ok(ColumnType::TimestampTZ( - custom_time_unit, - ProofsTimeZone::from(timezone), - )) - } + DataType::Timestamp(time_unit, timezone_option) => Ok(ColumnType::TimestampTZ( + ProofsTimeUnit::from(time_unit), + ProofsTimeZone::try_from(timezone_option)?, + )), DataType::Utf8 => Ok(ColumnType::VarChar), _ => Err(format!("Unsupported arrow data type {:?}", data_type)), } From a2a04a1e1e8d6c7e8a93ee30fa475f84a5c6ab2e Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Tue, 18 Jun 2024 08:15:56 -0700 Subject: [PATCH 10/24] fix: fmt --- crates/proof-of-sql/src/base/database/column.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index e789c2974..3790f8597 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -228,7 +228,7 @@ impl ColumnType { | ColumnType::BigInt | ColumnType::Int128 | ColumnType::Scalar - | ColumnType::Decimal75(_, _) // TODO: is a timestamp numeric? + | ColumnType::Decimal75(_, _) ) } @@ -236,7 +236,7 @@ impl ColumnType { pub fn is_integer(&self) -> bool { matches!( self, - ColumnType::SmallInt | ColumnType::Int | ColumnType::BigInt | ColumnType::Int128 // TODO: is a timestamp an integer? + ColumnType::SmallInt | ColumnType::Int | ColumnType::BigInt | ColumnType::Int128 ) } From 0e045bc712b7b318b897854ea936c7eff2b5870c Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Tue, 18 Jun 2024 16:25:53 -0700 Subject: [PATCH 11/24] feat: add unit tests --- .../src/base/commitment/column_bounds.rs | 46 ++++- .../commitment/column_commitment_metadata.rs | 149 ++++++++++++++- .../src/base/commitment/committable_column.rs | 116 +++++++++++- .../arrow_array_to_column_conversion.rs | 179 ++++++++++++++++-- .../proof-of-sql/src/base/database/column.rs | 14 +- .../src/base/database/literal_value.rs | 4 +- .../database/owned_and_arrow_conversions.rs | 26 +-- .../src/base/database/owned_column.rs | 46 +---- .../src/base/database/owned_table_test.rs | 39 ++++ .../owned_table_test_accessor_test.rs | 17 ++ .../src/base/database/owned_table_utility.rs | 10 +- .../base/database/test_accessor_utility.rs | 17 +- .../proof-of-sql/src/base/time/timestamp.rs | 121 ++++++------ 13 files changed, 630 insertions(+), 154 deletions(-) diff --git a/crates/proof-of-sql/src/base/commitment/column_bounds.rs b/crates/proof-of-sql/src/base/commitment/column_bounds.rs index 8b44e8d9f..22106c930 100644 --- a/crates/proof-of-sql/src/base/commitment/column_bounds.rs +++ b/crates/proof-of-sql/src/base/commitment/column_bounds.rs @@ -277,7 +277,9 @@ impl ColumnBounds { (ColumnBounds::Int128(bounds_a), ColumnBounds::Int128(bounds_b)) => { Ok(ColumnBounds::Int128(bounds_a.difference(bounds_b))) } - + (ColumnBounds::TimestampTZ(bounds_a), ColumnBounds::TimestampTZ(bounds_b)) => { + Ok(ColumnBounds::TimestampTZ(bounds_a.difference(bounds_b))) + } (_, _) => Err(ColumnBoundsMismatch(Box::new(self), Box::new(other))), } } @@ -286,7 +288,12 @@ impl ColumnBounds { #[cfg(test)] mod tests { use super::*; - use crate::base::{database::OwnedColumn, math::decimal::Precision, scalar::Curve25519Scalar}; + use crate::base::{ + database::OwnedColumn, + math::decimal::Precision, + scalar::Curve25519Scalar, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, + }; use itertools::Itertools; #[test] @@ -526,8 +533,19 @@ mod tests { ); let committable_decimal75_column = CommittableColumn::from(&decimal75_column); let decimal75_column_bounds = ColumnBounds::from_column(&committable_decimal75_column); - assert_eq!(decimal75_column_bounds, ColumnBounds::NoOrder); + + let timestamp_column = OwnedColumn::::TimestampTZ( + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + vec![1_i64, 2, 3, 4], + ); + let committable_timestamp_column = CommittableColumn::from(×tamp_column); + let timestamp_column_bounds = ColumnBounds::from_column(&committable_timestamp_column); + assert_eq!( + timestamp_column_bounds, + ColumnBounds::TimestampTZ(Bounds::Sharp(BoundsInner { min: 1, max: 4 })) + ); } #[test] @@ -569,6 +587,14 @@ mod tests { int128_a.try_union(int128_b).unwrap(), ColumnBounds::Int128(Bounds::Bounded(BoundsInner { min: 1, max: 6 })) ); + + let timestamp_a = ColumnBounds::TimestampTZ(Bounds::Sharp(BoundsInner { min: 1, max: 3 })); + let timestamp_b = + ColumnBounds::TimestampTZ(Bounds::Bounded(BoundsInner { min: 4, max: 6 })); + assert_eq!( + timestamp_a.try_union(timestamp_b).unwrap(), + ColumnBounds::TimestampTZ(Bounds::Bounded(BoundsInner { min: 1, max: 6 })) + ); } #[test] @@ -578,6 +604,7 @@ mod tests { let int = ColumnBounds::Int(Bounds::Sharp(BoundsInner { min: -10, max: 10 })); let bigint = ColumnBounds::BigInt(Bounds::Sharp(BoundsInner { min: 1, max: 3 })); let int128 = ColumnBounds::Int128(Bounds::Sharp(BoundsInner { min: 4, max: 6 })); + let timestamp = ColumnBounds::TimestampTZ(Bounds::Sharp(BoundsInner { min: 4, max: 6 })); let bounds = [ (no_order, "NoOrder"), @@ -585,6 +612,7 @@ mod tests { (int, "Int"), (bigint, "BigInt"), (int128, "Int128"), + (timestamp, "Timestamp"), ]; for ((bound_a, name_a), (bound_b, name_b)) in bounds.iter().tuple_combinations() { @@ -626,6 +654,13 @@ mod tests { int128_a.try_difference(int128_b).unwrap(), ColumnBounds::Int128(Bounds::Bounded(BoundsInner { min: 1, max: 4 })) ); + + let timestamp_a = ColumnBounds::TimestampTZ(Bounds::Sharp(BoundsInner { min: 1, max: 4 })); + let timestamp_b = ColumnBounds::TimestampTZ(Bounds::Sharp(BoundsInner { min: 3, max: 6 })); + assert_eq!( + timestamp_a.try_difference(timestamp_b).unwrap(), + ColumnBounds::TimestampTZ(Bounds::Bounded(BoundsInner { min: 1, max: 4 })) + ); } #[test] @@ -633,6 +668,8 @@ mod tests { let no_order = ColumnBounds::NoOrder; let bigint = ColumnBounds::BigInt(Bounds::Sharp(BoundsInner { min: 1, max: 3 })); let int128 = ColumnBounds::Int128(Bounds::Sharp(BoundsInner { min: 4, max: 6 })); + let timestamp = ColumnBounds::TimestampTZ(Bounds::Sharp(BoundsInner { min: 4, max: 6 })); + let smallint = ColumnBounds::SmallInt(Bounds::Sharp(BoundsInner { min: 1, max: 3 })); assert!(no_order.try_difference(bigint).is_err()); assert!(bigint.try_difference(no_order).is_err()); @@ -642,5 +679,8 @@ mod tests { assert!(bigint.try_difference(int128).is_err()); assert!(int128.try_difference(bigint).is_err()); + + assert!(smallint.try_difference(timestamp).is_err()); + assert!(timestamp.try_difference(smallint).is_err()); } } diff --git a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs index a9473ed53..9d3fce8cd 100644 --- a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs +++ b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs @@ -165,8 +165,11 @@ impl ColumnCommitmentMetadata { mod tests { use super::*; use crate::base::{ - commitment::column_bounds::Bounds, database::OwnedColumn, math::decimal::Precision, + commitment::column_bounds::Bounds, + database::OwnedColumn, + math::decimal::Precision, scalar::Curve25519Scalar, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, }; #[test] @@ -224,6 +227,18 @@ mod tests { } ); + assert_eq!( + ColumnCommitmentMetadata::try_new( + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + ColumnBounds::TimestampTZ(Bounds::Empty), + ) + .unwrap(), + ColumnCommitmentMetadata { + column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + bounds: ColumnBounds::TimestampTZ(Bounds::Empty), + } + ); + assert_eq!( ColumnCommitmentMetadata::try_new( ColumnType::Int128, @@ -354,6 +369,26 @@ mod tests { ); assert_eq!(decimal_metadata.bounds(), &ColumnBounds::NoOrder); + let timestamp_column: OwnedColumn = + OwnedColumn::::TimestampTZ( + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + [1i64, 2, 3, 4, 5].to_vec(), + ); + let committable_timestamp_column = CommittableColumn::from(×tamp_column); + let timestamp_metadata = + ColumnCommitmentMetadata::from_column(&committable_timestamp_column); + assert_eq!( + timestamp_metadata.column_type(), + &ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC) + ); + if let ColumnBounds::TimestampTZ(Bounds::Sharp(bounds)) = timestamp_metadata.bounds() { + assert_eq!(bounds.min(), &1); + assert_eq!(bounds.max(), &5); + } else { + panic!("Bounds constructed from nonempty TimestampTZ column should be ColumnBounds::BigInt(Bounds::Sharp(_))"); + } + let varchar_column = OwnedColumn::::VarChar( ["Lorem", "ipsum", "dolor", "sit", "amet"] .map(String::from) @@ -489,6 +524,80 @@ mod tests { bigint_metadata_a.try_union(bigint_metadata_b).unwrap(), bigint_metadata_c ); + + // Ordered case for TimestampTZ + // Example Unix epoch times + let times = [ + 1_625_072_400, + 1_625_076_000, + 1_625_079_600, + 1_625_072_400, + 1_625_065_000, + ]; + let timezone = PoSQLTimeZone::UTC; + let timeunit = PoSQLTimeUnit::Second; + let timestamp_column_a = CommittableColumn::TimestampTZ(timeunit, timezone, ×[..2]); + let timestamp_metadata_a = ColumnCommitmentMetadata::from_column(×tamp_column_a); + let timestamp_column_b = CommittableColumn::TimestampTZ(timeunit, timezone, ×[2..]); + let timestamp_metadata_b = ColumnCommitmentMetadata::from_column(×tamp_column_b); + let timestamp_column_c = CommittableColumn::TimestampTZ(timeunit, timezone, ×); + let timestamp_metadata_c = ColumnCommitmentMetadata::from_column(×tamp_column_c); + assert_eq!( + timestamp_metadata_a + .try_union(timestamp_metadata_b) + .unwrap(), + timestamp_metadata_c + ); + } + + #[test] + fn we_can_difference_timestamp_tz_matching_metadata() { + // Ordered case + let times = [ + 1_625_072_400, + 1_625_076_000, + 1_625_079_600, + 1_625_072_400, + 1_625_065_000, + ]; + let timezone = PoSQLTimeZone::UTC; + let timeunit = PoSQLTimeUnit::Second; + + let timestamp_column_a = CommittableColumn::TimestampTZ(timeunit, timezone, ×[..2]); + let timestamp_metadata_a = ColumnCommitmentMetadata::from_column(×tamp_column_a); + let timestamp_column_b = CommittableColumn::TimestampTZ(timeunit, timezone, ×); + let timestamp_metadata_b = ColumnCommitmentMetadata::from_column(×tamp_column_b); + + let b_difference_a = timestamp_metadata_b + .try_difference(timestamp_metadata_a) + .unwrap(); + assert_eq!( + b_difference_a.column_type, + ColumnType::TimestampTZ(timeunit, timezone) + ); + if let ColumnBounds::TimestampTZ(Bounds::Bounded(bounds)) = b_difference_a.bounds { + assert_eq!(bounds.min(), &1_625_065_000); + assert_eq!(bounds.max(), &1_625_079_600); + } else { + panic!("difference of overlapping bounds should be Bounded"); + } + + let timestamp_column_empty = CommittableColumn::TimestampTZ(timeunit, timezone, &[]); + let timestamp_metadata_empty = + ColumnCommitmentMetadata::from_column(×tamp_column_empty); + + assert_eq!( + timestamp_metadata_b + .try_difference(timestamp_metadata_empty) + .unwrap(), + timestamp_metadata_b + ); + assert_eq!( + timestamp_metadata_empty + .try_difference(timestamp_metadata_b) + .unwrap(), + timestamp_metadata_empty + ); } #[test] @@ -746,5 +855,43 @@ mod tests { assert!(different_decimal75_metadata .try_union(decimal75_metadata) .is_err()); + + let timestamp_tz_metadata_a = ColumnCommitmentMetadata { + column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + bounds: ColumnBounds::TimestampTZ(Bounds::Empty), + }; + + let timestamp_tz_metadata_b = ColumnCommitmentMetadata { + column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Millisecond, PoSQLTimeZone::UTC), + bounds: ColumnBounds::TimestampTZ(Bounds::Empty), + }; + + // Tests for union operations + assert!(timestamp_tz_metadata_a.try_union(varchar_metadata).is_err()); + assert!(varchar_metadata.try_union(timestamp_tz_metadata_a).is_err()); + + // Tests for difference operations + assert!(timestamp_tz_metadata_a + .try_difference(scalar_metadata) + .is_err()); + assert!(scalar_metadata + .try_difference(timestamp_tz_metadata_a) + .is_err()); + + // Tests for different time units within the same type + assert!(timestamp_tz_metadata_a + .try_union(timestamp_tz_metadata_b) + .is_err()); + assert!(timestamp_tz_metadata_b + .try_union(timestamp_tz_metadata_a) + .is_err()); + + // Difference with different time units + assert!(timestamp_tz_metadata_a + .try_difference(timestamp_tz_metadata_b) + .is_err()); + assert!(timestamp_tz_metadata_b + .try_difference(timestamp_tz_metadata_a) + .is_err()); } } diff --git a/crates/proof-of-sql/src/base/commitment/committable_column.rs b/crates/proof-of-sql/src/base/commitment/committable_column.rs index 3ce212868..546f303f5 100644 --- a/crates/proof-of-sql/src/base/commitment/committable_column.rs +++ b/crates/proof-of-sql/src/base/commitment/committable_column.rs @@ -3,7 +3,7 @@ use crate::base::{ math::decimal::Precision, ref_into::RefInto, scalar::Scalar, - time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, }; #[cfg(feature = "blitzar")] use blitzar::sequence::Sequence; @@ -39,7 +39,7 @@ pub enum CommittableColumn<'a> { /// Column of limbs for committing to scalars, hashed from a VarChar column. VarChar(Vec<[u64; 4]>), /// Borrowed Timestamp column with Timezone, mapped to `i64`. - TimestampTZ(ProofsTimeUnit, ProofsTimeZone, &'a [i64]), + TimestampTZ(PoSQLTimeUnit, PoSQLTimeZone, &'a [i64]), } impl<'a> CommittableColumn<'a> { @@ -134,7 +134,9 @@ impl<'a, S: Scalar> From<&'a OwnedColumn> for CommittableColumn<'a> { .map(Into::<[u64; 4]>::into) .collect(), ), - OwnedColumn::TimestampTZ(_, _, times) => (times as &[_]).into(), + OwnedColumn::TimestampTZ(tu, tz, times) => { + CommittableColumn::TimestampTZ(*tu, *tz, times as &[_]) + } } } } @@ -150,7 +152,6 @@ impl<'a> From<&'a [i32]> for CommittableColumn<'a> { } } -// TODO: make sure this does not conflict with TimeStamp impl<'a> From<&'a [i64]> for CommittableColumn<'a> { fn from(value: &'a [i64]) -> Self { CommittableColumn::BigInt(value) @@ -219,6 +220,31 @@ mod tests { assert_eq!(res_committable_column, test_committable_column) } + #[test] + fn we_can_get_type_and_length_of_timestamp_column() { + // empty case + let smallint_committable_column = + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]); + assert_eq!(smallint_committable_column.len(), 0); + assert!(smallint_committable_column.is_empty()); + assert_eq!( + smallint_committable_column.column_type(), + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC) + ); + + let smallint_committable_column = CommittableColumn::TimestampTZ( + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + &[12, 34, 56], + ); + assert_eq!(smallint_committable_column.len(), 3); + assert!(!smallint_committable_column.is_empty()); + assert_eq!( + smallint_committable_column.column_type(), + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC) + ); + } + #[test] fn we_can_get_type_and_length_of_smallint_column() { // empty case @@ -358,6 +384,34 @@ mod tests { assert_eq!(bool_committable_column.column_type(), ColumnType::Boolean); } + #[test] + fn we_can_convert_from_borrowing_timestamp_column() { + // empty case + let from_borrowed_column = + CommittableColumn::from(&Column::::TimestampTZ( + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + &[], + )); + assert_eq!( + from_borrowed_column, + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + ); + + // non-empty case + let timestamps = [1625072400, 1625076000, 1625083200]; + let from_borrowed_column = + CommittableColumn::from(&Column::::TimestampTZ( + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + ×tamps, + )); + assert_eq!( + from_borrowed_column, + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, ×tamps) + ); + } + #[test] fn we_can_convert_from_borrowing_bigint_column() { // empty case @@ -512,6 +566,34 @@ mod tests { ); } + #[test] + fn we_can_convert_from_owned_timestamp_column() { + // empty case + let owned_column = OwnedColumn::::TimestampTZ( + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + Vec::new(), + ); + let from_owned_column = CommittableColumn::from(&owned_column); + assert_eq!( + from_owned_column, + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + ); + + // non-empty case + let timestamps = vec![1625072400, 1625076000, 1625083200]; + let owned_column = OwnedColumn::::TimestampTZ( + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + timestamps.clone(), + ); + let from_owned_column = CommittableColumn::from(&owned_column); + assert_eq!( + from_owned_column, + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, ×tamps) + ); + } + #[test] fn we_can_convert_from_owned_int_column() { // empty case @@ -790,4 +872,30 @@ mod tests { ); assert_eq!(commitment_buffer[0], commitment_buffer[1]); } + + #[test] + fn we_can_commit_to_timestamp_column_through_committable_column() { + // Empty case + let committable_column = + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]); + let sequence = Sequence::from(&committable_column); + let mut commitment_buffer = [CompressedRistretto::default()]; + compute_curve25519_commitments(&mut commitment_buffer, &[sequence], 0); + assert_eq!(commitment_buffer[0], CompressedRistretto::default()); + + // Non-empty case + let timestamps = [1625072400, 1625076000, 1625083200]; + let committable_column = + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, ×tamps); + + let sequence_actual = Sequence::from(&committable_column); + let sequence_expected = Sequence::from(timestamps.as_slice()); + let mut commitment_buffer = [CompressedRistretto::default(); 2]; + compute_curve25519_commitments( + &mut commitment_buffer, + &[sequence_actual, sequence_expected], + 0, + ); + assert_eq!(commitment_buffer[0], commitment_buffer[1]); + } } diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs index c24b91918..91bb4b6c1 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs @@ -4,7 +4,7 @@ use crate::{ database::Column, math::decimal::Precision, scalar::Scalar, - time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, }, sql::parse::ConversionError, }; @@ -279,9 +279,9 @@ impl ArrayRefExt for ArrayRef { ArrowTimeUnit::Second => { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( - ProofsTimeUnit::Second, - ProofsTimeZone::try_from(tz.clone())?, - array.values(), + PoSQLTimeUnit::Second, + PoSQLTimeZone::try_from(tz.clone())?, + &array.values()[range.start..range.end], )) } else { Err(ArrowArrayToColumnConversionError::UnsupportedType( @@ -292,9 +292,9 @@ impl ArrayRefExt for ArrayRef { ArrowTimeUnit::Millisecond => { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( - ProofsTimeUnit::Millisecond, - ProofsTimeZone::try_from(tz.clone())?, - array.values(), + PoSQLTimeUnit::Millisecond, + PoSQLTimeZone::try_from(tz.clone())?, + &array.values()[range.start..range.end], )) } else { Err(ArrowArrayToColumnConversionError::UnsupportedType( @@ -305,9 +305,9 @@ impl ArrayRefExt for ArrayRef { ArrowTimeUnit::Microsecond => { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( - ProofsTimeUnit::Microsecond, - ProofsTimeZone::try_from(tz.clone())?, - array.values(), + PoSQLTimeUnit::Microsecond, + PoSQLTimeZone::try_from(tz.clone())?, + &array.values()[range.start..range.end], )) } else { Err(ArrowArrayToColumnConversionError::UnsupportedType( @@ -318,9 +318,9 @@ impl ArrayRefExt for ArrayRef { ArrowTimeUnit::Nanosecond => { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( - ProofsTimeUnit::Nanosecond, - ProofsTimeZone::try_from(tz.clone())?, - array.values(), + PoSQLTimeUnit::Nanosecond, + PoSQLTimeZone::try_from(tz.clone())?, + &array.values()[range.start..range.end], )) } else { Err(ArrowArrayToColumnConversionError::UnsupportedType( @@ -365,6 +365,88 @@ mod tests { use arrow::array::Decimal256Builder; use std::{str::FromStr, sync::Arc}; + #[test] + fn we_can_convert_timestamp_array_normal_range() { + let alloc = Bump::new(); + let data = vec![1625072400, 1625076000, 1625083200]; // Example Unix timestamps + let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( + data.clone().into(), + Some("UTC"), + )); + + let result = array.to_column::(&alloc, &(1..3), None); + assert_eq!( + result.unwrap(), + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &data[1..3]) + ); + } + + #[test] + fn we_can_build_an_empty_column_from_an_empty_range_timestamp() { + let alloc = Bump::new(); + let data = vec![1625072400, 1625076000]; // Example Unix timestamps + let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( + data.into(), + Some("UTC"), + )); + + let result = array + .to_column::(&alloc, &(2..2), None) + .unwrap(); + assert_eq!( + result, + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + ); + } + + #[test] + fn we_can_convert_timestamp_array_empty_range() { + let alloc = Bump::new(); + let data = vec![1625072400, 1625076000, 1625083200]; // Example Unix timestamps + let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( + data.into(), + Some("UTC"), + )); + + let result = array.to_column::(&alloc, &(1..1), None); + assert_eq!( + result.unwrap(), + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + ); + } + + #[test] + fn we_cannot_convert_timestamp_array_oob_range() { + let alloc = Bump::new(); + let data = vec![1625072400, 1625076000, 1625083200]; + let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( + data.into(), + Some("UTC"), + )); + + let result = array.to_column::(&alloc, &(3..5), None); + assert_eq!( + result, + Err(ArrowArrayToColumnConversionError::IndexOutOfBounds(3, 5)) + ); + } + + #[test] + fn we_can_convert_timestamp_array_with_nulls() { + let alloc = Bump::new(); + let data = vec![Some(1625072400), None, Some(1625083200)]; + let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( + data.into(), + Some("UTC"), + )); + + let result = array.to_column::(&alloc, &(0..3), None); + assert!(matches!( + result, + Err(ArrowArrayToColumnConversionError::ArrayContainsNulls) + )); + } + #[test] fn we_cannot_convert_utf8_array_oob_range() { let alloc = Bump::new(); @@ -908,6 +990,24 @@ mod tests { ); } + #[test] + fn we_can_convert_valid_timestamp_array_refs_into_valid_columns() { + let alloc = Bump::new(); + let data = vec![1625072400, 1625076000]; // Example Unix timestamps + let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( + data.clone().into(), + Some("UTC"), + )); + + let result = array + .to_column::(&alloc, &(0..2), None) + .unwrap(); + assert_eq!( + result, + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &data[..]) + ); + } + #[test] fn we_can_convert_valid_boolean_array_refs_into_valid_columns_using_ranges_smaller_than_arrays() { @@ -951,6 +1051,25 @@ mod tests { ); } + #[test] + fn we_can_convert_valid_timestamp_array_refs_into_valid_columns_using_ranges_smaller_than_arrays( + ) { + let alloc = Bump::new(); + let data = vec![1625072400, 1625076000, 1625083200]; // Example Unix timestamps + let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( + data.clone().into(), + Some("UTC"), + )); + + // Test using a range smaller than the array size + assert_eq!( + array + .to_column::(&alloc, &(1..3), None) + .unwrap(), + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &data[1..3]) + ); + } + #[test] fn we_can_convert_valid_string_array_refs_into_valid_columns_using_ranges_smaller_than_arrays() { @@ -992,6 +1111,23 @@ mod tests { assert_eq!(result, Column::VarChar((&[], &[]))); } + #[test] + fn we_can_convert_valid_timestamp_array_refs_into_valid_columns_using_ranges_with_zero_size() { + let alloc = Bump::new(); + let data = vec![1625072400, 1625076000]; // Example Unix timestamps + let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( + data.clone().into(), + Some("UTC"), + )); + let result = array + .to_column::(&alloc, &(0..0), None) + .unwrap(); + assert_eq!( + result, + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + ); + } + #[test] fn we_can_convert_valid_boolean_array_refs_into_valid_vec_scalars() { let data = vec![false, true]; @@ -1005,6 +1141,23 @@ mod tests { ); } + #[test] + fn we_can_convert_valid_timestamp_array_refs_into_valid_vec_scalars() { + let data = vec![1625072400, 1625076000]; // Example Unix timestamps + let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( + data.clone().into(), + Some("UTC"), + )); + + assert_eq!( + array.to_curve25519_scalars(), + Ok(data + .iter() + .map(|&v| Curve25519Scalar::from(v)) + .collect::>()) + ); + } + #[test] fn we_can_convert_valid_integer_array_refs_into_valid_vec_scalars() { let data = vec![1, -3]; diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index 3790f8597..5bb0fa19d 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -2,7 +2,7 @@ use super::{LiteralValue, TableRef}; use crate::base::{ math::decimal::{scale_scalar, Precision}, scalar::Scalar, - time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, }; use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; use bumpalo::Bump; @@ -43,7 +43,7 @@ pub enum Column<'a, S: Scalar> { /// - the first element maps to the stored [`TimeUnit`] /// - the second element maps to a timezone /// - the third element maps to columns of timeunits since unix epoch - TimestampTZ(ProofsTimeUnit, ProofsTimeZone, &'a [i64]), + TimestampTZ(PoSQLTimeUnit, PoSQLTimeZone, &'a [i64]), } impl<'a, S: Scalar> Column<'a, S> { @@ -215,7 +215,7 @@ pub enum ColumnType { Decimal75(Precision, i8), /// Mapped to i64 #[serde(alias = "TIMESTAMP", alias = "timestamp")] - TimestampTZ(ProofsTimeUnit, ProofsTimeZone), + TimestampTZ(PoSQLTimeUnit, PoSQLTimeZone), } impl ColumnType { @@ -301,8 +301,8 @@ impl TryFrom for ColumnType { Ok(ColumnType::Decimal75(Precision::new(precision)?, scale)) } DataType::Timestamp(time_unit, timezone_option) => Ok(ColumnType::TimestampTZ( - ProofsTimeUnit::from(time_unit), - ProofsTimeZone::try_from(timezone_option)?, + PoSQLTimeUnit::from(time_unit), + PoSQLTimeZone::try_from(timezone_option)?, )), DataType::Utf8 => Ok(ColumnType::VarChar), _ => Err(format!("Unsupported arrow data type {:?}", data_type)), @@ -416,6 +416,10 @@ mod tests { #[test] fn column_type_serializes_to_string() { + let column_type = ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC); + let serialized = serde_json::to_string(&column_type).unwrap(); + assert_eq!(serialized, r#"{"TimestampTZ":["Second","UTC"]}"#); + let column_type = ColumnType::Boolean; let serialized = serde_json::to_string(&column_type).unwrap(); assert_eq!(serialized, r#""Boolean""#); diff --git a/crates/proof-of-sql/src/base/database/literal_value.rs b/crates/proof-of-sql/src/base/database/literal_value.rs index 4b99c3c25..76bc41865 100644 --- a/crates/proof-of-sql/src/base/database/literal_value.rs +++ b/crates/proof-of-sql/src/base/database/literal_value.rs @@ -2,7 +2,7 @@ use crate::base::{ database::ColumnType, math::decimal::Precision, scalar::Scalar, - time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, }; use serde::{Deserialize, Serialize}; @@ -36,7 +36,7 @@ pub enum LiteralValue { Scalar(S), /// TimeStamp defined over a unit (s, ms, ns, etc) and timezone with backing store /// mapped to i64, which is time units since unix epoch - TimeStampTZ(ProofsTimeUnit, ProofsTimeZone, i64), + TimeStampTZ(PoSQLTimeUnit, PoSQLTimeZone, i64), } impl LiteralValue { diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs index 74e9d80b4..3e551b89e 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs @@ -20,7 +20,7 @@ use crate::base::{ }, math::decimal::Precision, scalar::Scalar, - time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, }; use arrow::{ array::{ @@ -88,10 +88,10 @@ impl From> for ArrayRef { OwnedColumn::Scalar(_) => unimplemented!("Cannot convert Scalar type to arrow type"), OwnedColumn::VarChar(col) => Arc::new(StringArray::from(col)), OwnedColumn::TimestampTZ(time_unit, _, col) => match time_unit { - ProofsTimeUnit::Second => Arc::new(TimestampSecondArray::from(col)), - ProofsTimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from(col)), - ProofsTimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from(col)), - ProofsTimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from(col)), + PoSQLTimeUnit::Second => Arc::new(TimestampSecondArray::from(col)), + PoSQLTimeUnit::Millisecond => Arc::new(TimestampMillisecondArray::from(col)), + PoSQLTimeUnit::Microsecond => Arc::new(TimestampMicrosecondArray::from(col)), + PoSQLTimeUnit::Nanosecond => Arc::new(TimestampNanosecondArray::from(col)), }, } } @@ -200,8 +200,8 @@ impl TryFrom<&ArrayRef> for OwnedColumn { })?; let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( - ProofsTimeUnit::Second, - ProofsTimeZone::try_from(timezone.clone())?, + PoSQLTimeUnit::Second, + PoSQLTimeZone::try_from(timezone.clone())?, timestamps, )) } @@ -216,8 +216,8 @@ impl TryFrom<&ArrayRef> for OwnedColumn { })?; let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( - ProofsTimeUnit::Millisecond, - ProofsTimeZone::try_from(timezone.clone())?, + PoSQLTimeUnit::Millisecond, + PoSQLTimeZone::try_from(timezone.clone())?, timestamps, )) } @@ -232,8 +232,8 @@ impl TryFrom<&ArrayRef> for OwnedColumn { })?; let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( - ProofsTimeUnit::Microsecond, - ProofsTimeZone::try_from(timezone.clone())?, + PoSQLTimeUnit::Microsecond, + PoSQLTimeZone::try_from(timezone.clone())?, timestamps, )) } @@ -248,8 +248,8 @@ impl TryFrom<&ArrayRef> for OwnedColumn { })?; let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( - ProofsTimeUnit::Nanosecond, - ProofsTimeZone::try_from(timezone.clone())?, + PoSQLTimeUnit::Nanosecond, + PoSQLTimeZone::try_from(timezone.clone())?, timestamps, )) } diff --git a/crates/proof-of-sql/src/base/database/owned_column.rs b/crates/proof-of-sql/src/base/database/owned_column.rs index 9240e4702..f6b9fb6a0 100644 --- a/crates/proof-of-sql/src/base/database/owned_column.rs +++ b/crates/proof-of-sql/src/base/database/owned_column.rs @@ -6,7 +6,7 @@ use super::ColumnType; use crate::base::{ math::decimal::Precision, scalar::Scalar, - time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, }; #[derive(Debug, PartialEq, Clone, Eq)] #[non_exhaustive] @@ -29,7 +29,7 @@ pub enum OwnedColumn { /// Scalar columns Scalar(Vec), /// Timestamp columns - TimestampTZ(ProofsTimeUnit, ProofsTimeZone, Vec), + TimestampTZ(PoSQLTimeUnit, PoSQLTimeZone, Vec), } impl OwnedColumn { @@ -78,45 +78,3 @@ impl OwnedColumn { } } } - -impl FromIterator for OwnedColumn { - fn from_iter>(iter: T) -> Self { - Self::Boolean(Vec::from_iter(iter)) - } -} -impl FromIterator for OwnedColumn { - fn from_iter>(iter: T) -> Self { - Self::SmallInt(Vec::from_iter(iter)) - } -} -impl FromIterator for OwnedColumn { - fn from_iter>(iter: T) -> Self { - Self::Int(Vec::from_iter(iter)) - } -} -// TODO: does this conflict with TimeStamp? -impl FromIterator for OwnedColumn { - fn from_iter>(iter: T) -> Self { - Self::BigInt(Vec::from_iter(iter)) - } -} -impl FromIterator for OwnedColumn { - fn from_iter>(iter: T) -> Self { - Self::Int128(Vec::from_iter(iter)) - } -} -impl FromIterator for OwnedColumn { - fn from_iter>(iter: T) -> Self { - Self::VarChar(Vec::from_iter(iter)) - } -} -impl FromIterator for OwnedColumn { - fn from_iter>(iter: T) -> Self { - Self::Scalar(Vec::from_iter(iter)) - } -} -impl<'a, S: Scalar> FromIterator<&'a str> for OwnedColumn { - fn from_iter>(iter: T) -> Self { - Self::from_iter(iter.into_iter().map(|s| s.to_string())) - } -} diff --git a/crates/proof-of-sql/src/base/database/owned_table_test.rs b/crates/proof-of-sql/src/base/database/owned_table_test.rs index 3a9e3c297..617fda933 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_test.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_test.rs @@ -2,6 +2,7 @@ use crate::{ base::{ database::{owned_table_utility::*, OwnedColumn, OwnedTable, OwnedTableError}, scalar::Curve25519Scalar, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, }, proof_primitive::dory::DoryScalar, }; @@ -56,8 +57,22 @@ fn we_can_create_an_owned_table_with_data() { "boolean", [true, false, true, false, true, false, true, false, true], ), + timestamptz( + "timestamp", + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX], + ), ]); let mut table = IndexMap::new(); + table.insert( + Identifier::try_new("timestamp").unwrap(), + OwnedColumn::TimestampTZ( + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX].into(), + ), + ); table.insert( Identifier::try_new("bigint").unwrap(), OwnedColumn::BigInt(vec![0_i64, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX]), @@ -109,12 +124,24 @@ fn we_get_inequality_between_tables_with_differing_column_order() { int128("b", [0; 0]), varchar("c", ["0"; 0]), boolean("d", [false; 0]), + timestamptz( + "timestamp", + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + [0; 0], + ), ]); let owned_table_b: OwnedTable = owned_table([ boolean("d", [false; 0]), int128("b", [0; 0]), bigint("a", [0; 0]), varchar("c", ["0"; 0]), + timestamptz( + "timestamp", + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + [0; 0], + ), ]); assert_ne!(owned_table_a, owned_table_b); } @@ -125,12 +152,24 @@ fn we_get_inequality_between_tables_with_differing_data() { int128("b", [0]), varchar("c", ["0"]), boolean("d", [true]), + timestamptz( + "timestamp", + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + [1625072400], + ), ]); let owned_table_b: OwnedTable = owned_table([ bigint("a", [1]), int128("b", [0]), varchar("c", ["0"]), boolean("d", [true]), + timestamptz( + "timestamp", + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + [1625076000], + ), ]); assert_ne!(owned_table_a, owned_table_b); } diff --git a/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs b/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs index e3c34d906..0c70a1ade 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs @@ -5,6 +5,7 @@ use super::{ use crate::base::{ database::owned_table_utility::*, scalar::{compute_commitment_for_testing, Curve25519Scalar}, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, }; use blitzar::proof::InnerProductProof; @@ -48,6 +49,12 @@ fn we_can_access_the_columns_of_a_table() { varchar("varchar", ["a", "bc", "d", "e"]), scalar("scalar", [1, 2, 3, 4]), boolean("boolean", [true, false, true, false]), + timestamptz( + "time", + PoSQLTimeUnit::Second, + PoSQLTimeZone::UTC, + [4, 5, 6, 5], + ), ]); accessor.add_table(table_ref_2, data2, 0_usize); @@ -99,6 +106,16 @@ fn we_can_access_the_columns_of_a_table() { Column::Boolean(col) => assert_eq!(col.to_vec(), vec![true, false, true, false]), _ => panic!("Invalid column type"), }; + + let column = ColumnRef::new( + table_ref_2, + "time".parse().unwrap(), + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + ); + match accessor.get_column(column) { + Column::TimestampTZ(_, _, col) => assert_eq!(col.to_vec(), vec![4, 5, 6, 5]), + _ => panic!("Invalid column type"), + }; } #[test] diff --git a/crates/proof-of-sql/src/base/database/owned_table_utility.rs b/crates/proof-of-sql/src/base/database/owned_table_utility.rs index 707cd412b..0b2131b30 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_utility.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_utility.rs @@ -16,7 +16,7 @@ use super::{OwnedColumn, OwnedTable}; use crate::base::{ scalar::Scalar, - time::timestamp::{ProofsTimeUnit, ProofsTimeZone}, + time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}, }; use core::ops::Deref; use proof_of_sql_parser::Identifier; @@ -212,17 +212,17 @@ pub fn decimal75( /// ``` /// use proof_of_sql::base::{database::owned_table_utility::*, /// scalar::Curve25519Scalar, -/// time::timestamp::{ProofsTimeUnit, ProofsTimeZone}}; +/// time::timestamp::{PoSQLTimeUnit, PoSQLTimeZone}}; /// use chrono_tz::Europe::London; /// /// let result = owned_table::([ -/// timestamptz("event_time", ProofsTimeUnit::Second, ProofsTimeZone::new(London), vec![1625072400, 1625076000, 1625079600]), +/// timestamptz("event_time", PoSQLTimeUnit::Second, PoSQLTimeZone::new(London), vec![1625072400, 1625076000, 1625079600]), /// ]); /// ``` pub fn timestamptz( name: impl Deref, - time_unit: ProofsTimeUnit, - timezone: ProofsTimeZone, + time_unit: PoSQLTimeUnit, + timezone: PoSQLTimeZone, data: impl IntoIterator, ) -> (Identifier, OwnedColumn) { ( diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs index 8538aca92..0aba06a22 100644 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs @@ -1,11 +1,11 @@ -use crate::base::{database::ColumnType, time::timestamp::ProofsTimeUnit}; +use crate::base::{database::ColumnType, time::timestamp::PoSQLTimeUnit}; use arrow::{ array::{ Array, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, Int64Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, }, - datatypes::{i256, DataType, Field, Schema, TimeUnit}, + datatypes::{i256, DataType, Field, Schema}, record_batch::RecordBatch, }; use rand::{ @@ -119,19 +119,22 @@ pub fn make_random_test_accessor_data( ColumnType::TimestampTZ(tu, tz) => { column_fields.push(Field::new( *col_name, - DataType::Timestamp(TimeUnit::from(*tu), Some(Arc::from(tz.to_string()))), + DataType::Timestamp( + (*tu).into(), + Some(Arc::from(tz.to_string())), + ), false, )); // Create the correct timestamp array based on the time unit let timestamp_array: Arc = match tu { - ProofsTimeUnit::Second => Arc::new(TimestampSecondArray::from(values.to_vec())), - ProofsTimeUnit::Millisecond => { + PoSQLTimeUnit::Second => Arc::new(TimestampSecondArray::from(values.to_vec())), + PoSQLTimeUnit::Millisecond => { Arc::new(TimestampMillisecondArray::from(values.to_vec())) } - ProofsTimeUnit::Microsecond => { + PoSQLTimeUnit::Microsecond => { Arc::new(TimestampMicrosecondArray::from(values.to_vec())) } - ProofsTimeUnit::Nanosecond => { + PoSQLTimeUnit::Nanosecond => { Arc::new(TimestampNanosecondArray::from(values.to_vec())) } }; diff --git a/crates/proof-of-sql/src/base/time/timestamp.rs b/crates/proof-of-sql/src/base/time/timestamp.rs index 4122610fc..2905c55e8 100644 --- a/crates/proof-of-sql/src/base/time/timestamp.rs +++ b/crates/proof-of-sql/src/base/time/timestamp.rs @@ -5,64 +5,71 @@ use core::fmt; use serde::{Deserialize, Serialize}; use std::{str::FromStr, sync::Arc}; -/// A postgresql-like `TimeStamp` type. It is defined over -/// a [`TimeUnit`], which is a signed count of units either -/// after or before the [Unix epoch](https://en.wikipedia.org/wiki/Unix_time). -#[derive(Debug, Clone, Deserialize, Serialize, Hash)] -pub struct TimestampTZ { - time: i64, - timeunit: ProofsTimeUnit, - timezone: Tz, -} - /// A typed TimeZone for a [`TimeStamp`]. It is optionally /// used to define a timezone other than UTC for a new TimeStamp. /// It exists as a wrapper around chrono-tz because chrono-tz does /// not implement uniform bit distribution #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize)] -pub struct ProofsTimeZone(Tz); +pub struct PoSQLTimeZone(Tz); + +impl PoSQLTimeZone { + /// Convenience constant for the UTC timezone + pub const UTC: PoSQLTimeZone = PoSQLTimeZone(Tz::UTC); +} -impl ProofsTimeZone { +impl PoSQLTimeZone { /// Create a new ProofsTimeZone from a chrono TimeZone pub fn new(tz: Tz) -> Self { - ProofsTimeZone(tz) + PoSQLTimeZone(tz) } } -impl From<&ProofsTimeZone> for Arc { - fn from(timezone: &ProofsTimeZone) -> Self { +impl From<&PoSQLTimeZone> for Arc { + fn from(timezone: &PoSQLTimeZone) -> Self { Arc::from(timezone.0.name()) } } -impl From for ProofsTimeZone { +impl From for PoSQLTimeZone { fn from(tz: Tz) -> Self { - ProofsTimeZone(tz) + PoSQLTimeZone(tz) } } -impl fmt::Display for ProofsTimeZone { +impl fmt::Display for PoSQLTimeZone { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.0) } } -impl TryFrom>> for ProofsTimeZone { - type Error = &'static str; // Explicitly state the error type +impl TryFrom>> for PoSQLTimeZone { + type Error = &'static str; fn try_from(value: Option>) -> Result { match value { Some(arc_str) => Tz::from_str(&arc_str) - .map(ProofsTimeZone) + .map(PoSQLTimeZone) .map_err(|_| "Invalid timezone string"), - None => Ok(ProofsTimeZone(Tz::UTC)), // Default to UTC + None => Ok(PoSQLTimeZone(Tz::UTC)), // Default to UTC } } } -/// Specifies different units of time measurement relative to the Unix epoch. +impl TryFrom<&str> for PoSQLTimeZone { + type Error = &'static str; + + fn try_from(value: &str) -> Result { + Tz::from_str(value) + .map(PoSQLTimeZone) + .map_err(|_| "Invalid timezone string") + } +} + +/// Specifies different units of time measurement relative to the Unix epoch. It is essentially +/// a wrapper over [arrow::datatypes::TimeUnit] so that we can derive Copy and implement custom traits +/// such as bit distribution and Hash. #[derive(Debug, Clone, Copy, Eq, PartialEq, Deserialize, Serialize, Hash)] -pub enum ProofsTimeUnit { +pub enum PoSQLTimeUnit { /// Represents a time unit of one second. Second, /// Represents a time unit of one millisecond (1/1,000 of a second). @@ -73,35 +80,35 @@ pub enum ProofsTimeUnit { Nanosecond, } -impl From for ArrowTimeUnit { - fn from(unit: ProofsTimeUnit) -> Self { +impl From for ArrowTimeUnit { + fn from(unit: PoSQLTimeUnit) -> Self { match unit { - ProofsTimeUnit::Second => ArrowTimeUnit::Second, - ProofsTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, - ProofsTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, - ProofsTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, + PoSQLTimeUnit::Second => ArrowTimeUnit::Second, + PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, + PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, + PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, } } } -impl fmt::Display for ProofsTimeUnit { +impl fmt::Display for PoSQLTimeUnit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - ProofsTimeUnit::Second => write!(f, "Second"), - ProofsTimeUnit::Millisecond => write!(f, "Millisecond"), - ProofsTimeUnit::Microsecond => write!(f, "Microsecond"), - ProofsTimeUnit::Nanosecond => write!(f, "Nanosecond"), + PoSQLTimeUnit::Second => write!(f, "Second"), + PoSQLTimeUnit::Millisecond => write!(f, "Millisecond"), + PoSQLTimeUnit::Microsecond => write!(f, "Microsecond"), + PoSQLTimeUnit::Nanosecond => write!(f, "Nanosecond"), } } } -impl From for ProofsTimeUnit { +impl From for PoSQLTimeUnit { fn from(unit: ArrowTimeUnit) -> Self { match unit { - ArrowTimeUnit::Second => ProofsTimeUnit::Second, - ArrowTimeUnit::Millisecond => ProofsTimeUnit::Millisecond, - ArrowTimeUnit::Microsecond => ProofsTimeUnit::Microsecond, - ArrowTimeUnit::Nanosecond => ProofsTimeUnit::Nanosecond, + ArrowTimeUnit::Second => PoSQLTimeUnit::Second, + ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, + ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, + ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, } } } @@ -131,7 +138,7 @@ mod tests { let arc_tz = Arc::new(tz_str.to_string()); // Convert Arc to Arc by dereferencing to &str then creating a new Arc let arc_tz_str: Arc = Arc::from(&**arc_tz); - let timezone = ProofsTimeZone::try_from(Some(arc_tz_str)); + let timezone = PoSQLTimeZone::try_from(Some(arc_tz_str)); assert!(timezone.is_ok(), "Timezone should be valid: {}", tz_str); assert_eq!( timezone.unwrap().0, @@ -147,7 +154,7 @@ mod tests { let edge_timezones = ["Etc/GMT+12", "Etc/GMT-14", "America/Argentina/Ushuaia"]; for tz_str in &edge_timezones { let arc_tz = Arc::from(*tz_str); - let result = ProofsTimeZone::try_from(Some(arc_tz)); + let result = PoSQLTimeZone::try_from(Some(arc_tz)); assert!(result.is_ok(), "Edge timezone should be valid: {}", tz_str); assert_eq!( result.unwrap().0, @@ -161,14 +168,14 @@ mod tests { #[test] fn test_empty_timezone_string() { let empty_tz = Arc::from(""); - let result = ProofsTimeZone::try_from(Some(empty_tz)); + let result = PoSQLTimeZone::try_from(Some(empty_tz)); assert!(result.is_err(), "Empty timezone string should fail"); } #[test] fn test_unicode_timezone_strings() { let unicode_tz = Arc::from("Europe/Paris\u{00A0}"); // Non-breaking space character - let result = ProofsTimeZone::try_from(Some(unicode_tz)); + let result = PoSQLTimeZone::try_from(Some(unicode_tz)); assert!( result.is_err(), "Unicode characters should not be valid in timezone strings" @@ -177,7 +184,7 @@ mod tests { #[test] fn test_null_option() { - let result = ProofsTimeZone::try_from(None); + let result = PoSQLTimeZone::try_from(None); assert!(result.is_ok(), "None should convert without error"); assert_eq!(result.unwrap().0, Tz::UTC, "None should default to UTC"); } @@ -185,39 +192,39 @@ mod tests { #[test] fn we_can_convert_from_arrow_time_units() { assert_eq!( - ProofsTimeUnit::from(ArrowTimeUnit::Second), - ProofsTimeUnit::Second + PoSQLTimeUnit::from(ArrowTimeUnit::Second), + PoSQLTimeUnit::Second ); assert_eq!( - ProofsTimeUnit::from(ArrowTimeUnit::Millisecond), - ProofsTimeUnit::Millisecond + PoSQLTimeUnit::from(ArrowTimeUnit::Millisecond), + PoSQLTimeUnit::Millisecond ); assert_eq!( - ProofsTimeUnit::from(ArrowTimeUnit::Microsecond), - ProofsTimeUnit::Microsecond + PoSQLTimeUnit::from(ArrowTimeUnit::Microsecond), + PoSQLTimeUnit::Microsecond ); assert_eq!( - ProofsTimeUnit::from(ArrowTimeUnit::Nanosecond), - ProofsTimeUnit::Nanosecond + PoSQLTimeUnit::from(ArrowTimeUnit::Nanosecond), + PoSQLTimeUnit::Nanosecond ); } #[test] fn we_can_convert_to_arrow_time_units() { assert_eq!( - ArrowTimeUnit::from(ProofsTimeUnit::Second), + ArrowTimeUnit::from(PoSQLTimeUnit::Second), ArrowTimeUnit::Second ); assert_eq!( - ArrowTimeUnit::from(ProofsTimeUnit::Millisecond), + ArrowTimeUnit::from(PoSQLTimeUnit::Millisecond), ArrowTimeUnit::Millisecond ); assert_eq!( - ArrowTimeUnit::from(ProofsTimeUnit::Microsecond), + ArrowTimeUnit::from(PoSQLTimeUnit::Microsecond), ArrowTimeUnit::Microsecond ); assert_eq!( - ArrowTimeUnit::from(ProofsTimeUnit::Nanosecond), + ArrowTimeUnit::from(PoSQLTimeUnit::Nanosecond), ArrowTimeUnit::Nanosecond ); } From b8ed589229c41aeb9d36b3767004ff2d999a4546 Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Tue, 18 Jun 2024 16:31:08 -0700 Subject: [PATCH 12/24] fix: fmt --- .../proof-of-sql/src/base/database/test_accessor_utility.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs index 0aba06a22..f14397c4d 100644 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs @@ -119,10 +119,7 @@ pub fn make_random_test_accessor_data( ColumnType::TimestampTZ(tu, tz) => { column_fields.push(Field::new( *col_name, - DataType::Timestamp( - (*tu).into(), - Some(Arc::from(tz.to_string())), - ), + DataType::Timestamp((*tu).into(), Some(Arc::from(tz.to_string()))), false, )); // Create the correct timestamp array based on the time unit From d21e1e8a94412effe822e3ad275081dc167b4b53 Mon Sep 17 00:00:00 2001 From: Dustin Ray Date: Tue, 18 Jun 2024 18:05:51 -0700 Subject: [PATCH 13/24] feat: add tests --- .../record_batch_dataframe_conversion.rs | 74 ++++++++++++++++++- .../src/base/database/record_batch_utility.rs | 47 ++++++++++++ .../proof-of-sql/src/base/time/timestamp.rs | 10 +++ 3 files changed, 130 insertions(+), 1 deletion(-) diff --git a/crates/proof-of-sql/src/base/database/record_batch_dataframe_conversion.rs b/crates/proof-of-sql/src/base/database/record_batch_dataframe_conversion.rs index 88ba6d76b..4c49a148d 100644 --- a/crates/proof-of-sql/src/base/database/record_batch_dataframe_conversion.rs +++ b/crates/proof-of-sql/src/base/database/record_batch_dataframe_conversion.rs @@ -1,8 +1,9 @@ use arrow::{ array::{ Array, BooleanArray, Decimal128Array, Int16Array, Int32Array, Int64Array, StringArray, + TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, }, - datatypes::{DataType, Field, Schema}, + datatypes::{DataType, Field, Schema, TimeUnit as ArrowTimeUnit}, record_batch::RecordBatch, }; use polars::{ @@ -60,6 +61,7 @@ pub fn record_batch_to_dataframe(record_batch: RecordBatch) -> Option Series::new(f.name(), data) } + arrow::datatypes::DataType::Utf8 => { let data = col .as_any() @@ -81,6 +83,42 @@ pub fn record_batch_to_dataframe(record_batch: RecordBatch) -> Option // Note: we make this unchecked because if record batch has values that overflow 38 digits, so should the data frame. .into_series() } + arrow::datatypes::DataType::Timestamp(time_unit, _timezone_option) => { + match time_unit { + arrow::datatypes::TimeUnit::Second => { + let data = col + .as_any() + .downcast_ref::() + .map(|array| array.values()) + .unwrap(); + Series::new(f.name(), data) + } + arrow::datatypes::TimeUnit::Millisecond => { + let data = col + .as_any() + .downcast_ref::() + .map(|array| array.values()) + .unwrap(); + Series::new(f.name(), data) + } + arrow::datatypes::TimeUnit::Microsecond => { + let data = col + .as_any() + .downcast_ref::() + .map(|array| array.values()) + .unwrap(); + Series::new(f.name(), data) + } + arrow::datatypes::TimeUnit::Nanosecond => { + let data = col + .as_any() + .downcast_ref::() + .map(|array| array.values()) + .unwrap(); + Series::new(f.name(), data) + } + } + } _ => None?, }) }) @@ -170,6 +208,40 @@ pub fn dataframe_to_record_batch(data: DataFrame) -> Option { DataType::Decimal128(38, 0) } + // NOTE: Polars does not support seconds + polars::datatypes::DataType::Datetime(timeunit, timezone) => { + let col = series.i64().unwrap().cont_slice().unwrap(); + let timezone_arc = timezone.as_ref().map(|tz| Arc::from(tz.as_str())); + let arrow_array: Arc = match timeunit { + polars::datatypes::TimeUnit::Nanoseconds => { + Arc::new(TimestampNanosecondArray::with_timezone_opt( + col.to_vec().into(), + timezone_arc, + )) + } + polars::datatypes::TimeUnit::Microseconds => { + Arc::new(TimestampMicrosecondArray::with_timezone_opt( + col.to_vec().into(), + timezone_arc, + )) + } + polars::datatypes::TimeUnit::Milliseconds => { + Arc::new(TimestampMillisecondArray::with_timezone_opt( + col.to_vec().into(), + timezone_arc, + )) + } + }; + columns.push(arrow_array); + DataType::Timestamp( + match timeunit { + polars::datatypes::TimeUnit::Nanoseconds => ArrowTimeUnit::Nanosecond, + polars::datatypes::TimeUnit::Microseconds => ArrowTimeUnit::Microsecond, + polars::datatypes::TimeUnit::Milliseconds => ArrowTimeUnit::Millisecond, + }, + None, + ) + } _ => return None, }; diff --git a/crates/proof-of-sql/src/base/database/record_batch_utility.rs b/crates/proof-of-sql/src/base/database/record_batch_utility.rs index aa461a64f..7c67c8f7c 100644 --- a/crates/proof-of-sql/src/base/database/record_batch_utility.rs +++ b/crates/proof-of-sql/src/base/database/record_batch_utility.rs @@ -1,3 +1,8 @@ +use crate::base::time::timestamp::{PoSQLTimeUnit, Time}; +use arrow::array::{ + TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, + TimestampSecondArray, +}; use std::sync::Arc; /// Extension trait for Vec to convert it to an Arrow array @@ -18,6 +23,48 @@ impl ToArrow for Vec { } } +impl ToArrow for Vec