diff --git a/Cargo.toml b/Cargo.toml index 1913112a2..b48b0e77f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ bytemuck = {version = "1.14.2" } byte-slice-cast = { version = "1.2.1" } clap = { version = "4.5.4" } criterion = { version = "0.5.1" } -chrono-tz = {version = "0.9.0", features = ["serde"]} +chrono = { version = "0.4.38" } curve25519-dalek = { version = "4", features = ["rand_core"] } derive_more = { version = "0.99" } dyn_partial_eq = { version = "0.1.2" } diff --git a/crates/proof-of-sql-parser/Cargo.toml b/crates/proof-of-sql-parser/Cargo.toml index 3ffbff9e3..745c2fa02 100644 --- a/crates/proof-of-sql-parser/Cargo.toml +++ b/crates/proof-of-sql-parser/Cargo.toml @@ -15,8 +15,10 @@ doctest = true test = true [dependencies] +arrow = { workspace = true } arrayvec = { workspace = true, features = ["serde"] } bigdecimal = { workspace = true } +chrono = { workspace = true, features = ["serde"] } lalrpop-util = { workspace = true, features = ["lexer", "unicode"] } serde = { workspace = true, features = ["serde_derive"] } thiserror = { workspace = true } diff --git a/crates/proof-of-sql-parser/src/error.rs b/crates/proof-of-sql-parser/src/error.rs index 3d032f864..fbe90148c 100644 --- a/crates/proof-of-sql-parser/src/error.rs +++ b/crates/proof-of-sql-parser/src/error.rs @@ -1,3 +1,4 @@ +use serde::{Deserialize, Serialize}; use thiserror::Error; /// Errors encountered during the parsing process @@ -14,4 +15,44 @@ pub enum ParseError { ResourceIdParseError(String), } +/// General parsing error that may occur, for example if the provided schema/object_name strings +/// aren't valid postgres-style identifiers (excluding dollar signs). pub type ParseResult = std::result::Result; + +/// Errors related to time operations, including timezone and timestamp conversions.s +#[derive(Error, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum PoSQLTimestampError { + /// Error when the timezone string provided cannot be parsed into a valid timezone. + #[error("invalid timezone string: {0}")] + InvalidTimezone(String), + + /// Error indicating an invalid timezone offset was provided. + #[error("invalid timezone offset")] + InvalidTimezoneOffset, + + /// Indicates a failure to convert between different representations of time units. + #[error("Invalid time unit")] + InvalidTimeUnit(String), + + /// The local time does not exist because there is a gap in the local time. + /// This variant may also be returned if there was an error while resolving the local time, + /// caused by for example missing time zone data files, an error in an OS API, or overflow. + #[error("Local time does not exist because there is a gap in the local time")] + LocalTimeDoesNotExist, + + /// The local time is ambiguous because there is a fold in the local time. + /// This variant contains the two possible results, in the order (earliest, latest). + #[error("Unix timestamp is ambiguous because there is a fold in the local time.")] + Ambiguous(String), + + /// Represents a catch-all for parsing errors not specifically covered by other variants. + #[error("Timestamp parsing error: {0}")] + ParsingError(String), +} + +// This exists because TryFrom for ColumnType error is String +impl From for String { + fn from(error: PoSQLTimestampError) -> Self { + error.to_string() + } +} diff --git a/crates/proof-of-sql-parser/src/identifier.rs b/crates/proof-of-sql-parser/src/identifier.rs index 1d85590ef..d8ee5ddae 100644 --- a/crates/proof-of-sql-parser/src/identifier.rs +++ b/crates/proof-of-sql-parser/src/identifier.rs @@ -44,7 +44,8 @@ impl FromStr for Identifier { fn from_str(string: &str) -> ParseResult { let name = IdentifierParser::new() .parse(string) - .map_err(|e| ParseError::IdentifierParseError(format!("{:?}", e)))?; + .map_err(|e| ParseError::IdentifierParseError( + format!("failed to parse identifier, (you may have used a reserved keyword as an ID, i.e. 'timestamp') {:?}", e)))?; Ok(Identifier::new(name)) } @@ -152,6 +153,41 @@ mod tests { assert!(Identifier::from_str("GOOD_IDENTIFIER.").is_err()); assert!(Identifier::from_str(".GOOD_IDENTIFIER").is_err()); assert!(Identifier::from_str(&"LONG_IDENTIFIER_OVER_64_CHARACTERS".repeat(12)).is_err()); + + // Test for reserved keywords + let keywords = [ + "all", + "asc", + "desc", + "as", + "and", + "from", + "not", + "or", + "select", + "where", + "order", + "by", + "limit", + "offset", + "group", + "min", + "max", + "count", + "sum", + "true", + "false", + "timestamp", + "to_timestamp", + ]; + + for keyword in keywords.iter() { + assert!( + Identifier::from_str(keyword).is_err(), + "Should not parse keyword as identifier: {}", + keyword + ); + } } #[test] diff --git a/crates/proof-of-sql-parser/src/intermediate_ast.rs b/crates/proof-of-sql-parser/src/intermediate_ast.rs index ddda7e1c4..776b5955c 100644 --- a/crates/proof-of-sql-parser/src/intermediate_ast.rs +++ b/crates/proof-of-sql-parser/src/intermediate_ast.rs @@ -4,7 +4,9 @@ * https://docs.rs/vervolg/latest/vervolg/ast/enum.Statement.html ***/ -use crate::{intermediate_decimal::IntermediateDecimal, Identifier}; +use crate::{ + intermediate_decimal::IntermediateDecimal, posql_time::timestamp::PoSQLTimestamp, Identifier, +}; use serde::{Deserialize, Serialize}; /// Representation of a SetExpression, a collection of rows, each having one or more columns. @@ -328,6 +330,8 @@ pub enum Literal { VarChar(String), /// Decimal Literal Decimal(IntermediateDecimal), + /// Timestamp Literal + Timestamp(PoSQLTimestamp), } impl From for Literal { @@ -379,6 +383,12 @@ impl From for Literal { } } +impl From for Literal { + fn from(time: PoSQLTimestamp) -> Self { + Literal::Timestamp(time) + } +} + /// Helper function to append an item to a vector pub(crate) fn append(list: Vec, item: T) -> Vec { let mut result = list; diff --git a/crates/proof-of-sql-parser/src/lib.rs b/crates/proof-of-sql-parser/src/lib.rs index 1bd38217a..c53a282a5 100644 --- a/crates/proof-of-sql-parser/src/lib.rs +++ b/crates/proof-of-sql-parser/src/lib.rs @@ -2,6 +2,8 @@ /// Module for handling an intermediate decimal type received from the lexer. pub mod intermediate_decimal; +/// Module for handling an intermediate timestamp type received from the lexer. +pub mod posql_time; #[macro_use] extern crate lalrpop_util; @@ -16,7 +18,8 @@ pub(crate) mod test_utility; pub(crate) mod select_statement; pub use select_statement::SelectStatement; -pub(crate) mod error; +/// Error definitions for proof-of-sql-parser +pub mod error; pub use error::ParseError; pub(crate) use error::ParseResult; diff --git a/crates/proof-of-sql-parser/src/posql_time/mod.rs b/crates/proof-of-sql-parser/src/posql_time/mod.rs new file mode 100644 index 000000000..44731fc58 --- /dev/null +++ b/crates/proof-of-sql-parser/src/posql_time/mod.rs @@ -0,0 +1,6 @@ +/// Defines an RFC3339-formatted timestamp +pub mod timestamp; +/// Defines a timezone as count of seconds offset from UTC +pub mod timezone; +/// Defines the precision of the timestamp +pub mod unit; diff --git a/crates/proof-of-sql-parser/src/posql_time/timestamp.rs b/crates/proof-of-sql-parser/src/posql_time/timestamp.rs new file mode 100644 index 000000000..336dd5350 --- /dev/null +++ b/crates/proof-of-sql-parser/src/posql_time/timestamp.rs @@ -0,0 +1,251 @@ +use super::{timezone, unit::PoSQLTimeUnit}; +use crate::error::PoSQLTimestampError; +use chrono::{offset::LocalResult, DateTime, TimeZone, Utc}; +use serde::{Deserialize, Serialize}; + +/// Represents a fully parsed timestamp with detailed time unit and timezone information +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PoSQLTimestamp { + /// The datetime representation in UTC. + pub timestamp: DateTime, + + /// The precision of the datetime value, e.g., seconds, milliseconds. + pub timeunit: PoSQLTimeUnit, + + /// The timezone of the datetime, either UTC or a fixed offset from UTC. + pub timezone: timezone::PoSQLTimeZone, +} + +impl PoSQLTimestamp { + /// Attempts to parse a timestamp string into an [PoSQLTimestamp] structure. + /// This function supports two primary formats: + /// + /// 1. **RFC 3339 Parsing**: + /// - Parses the timestamp along with its timezone. + /// - If parsing succeeds, it extracts the timezone offset using `dt.offset().local_minus_utc()` + /// and then uses this to construct the appropriate `PoSQLTimeZone`. + /// + /// 2. **Timezone Parsing and Conversion**: + /// - The `from_offset` method is used to determine whether the timezone should be represented + /// as `Utc` or `FixedOffset`. This function simplifies the decision based on the offset value. + /// + /// # Examples + /// ``` + /// use chrono::{DateTime, Utc}; + /// use proof_of_sql_parser::posql_time::{timestamp::PoSQLTimestamp, timezone::PoSQLTimeZone}; + /// + /// // Parsing an RFC 3339 timestamp without a timezone: + /// let timestamp_str = "2009-01-03T18:15:05Z"; + /// let intermediate_timestamp = PoSQLTimestamp::try_from(timestamp_str).unwrap(); + /// assert_eq!(intermediate_timestamp.timezone, PoSQLTimeZone::Utc); + /// + /// // Parsing an RFC 3339 timestamp with a positive timezone offset: + /// let timestamp_str_with_tz = "2009-01-03T18:15:05+03:00"; + /// let intermediate_timestamp = PoSQLTimestamp::try_from(timestamp_str_with_tz).unwrap(); + /// assert_eq!(intermediate_timestamp.timezone, PoSQLTimeZone::FixedOffset(10800)); // 3 hours in seconds + /// ``` + pub fn try_from(timestamp_str: &str) -> Result { + let dt = DateTime::parse_from_rfc3339(timestamp_str) + .map_err(|e| PoSQLTimestampError::ParsingError(e.to_string()))?; + + let offset_seconds = dt.offset().local_minus_utc(); + let timezone = timezone::PoSQLTimeZone::from_offset(offset_seconds); + let nanoseconds = dt.timestamp_subsec_nanos(); + let timeunit = if nanoseconds % 1_000 != 0 { + PoSQLTimeUnit::Nanosecond + } else if nanoseconds % 1_000_000 != 0 { + PoSQLTimeUnit::Microsecond + } else if nanoseconds % 1_000_000_000 != 0 { + PoSQLTimeUnit::Millisecond + } else { + PoSQLTimeUnit::Second + }; + + Ok(PoSQLTimestamp { + timestamp: dt.with_timezone(&Utc), + timeunit, + timezone, + }) + } + + /// Attempts to parse a timestamp string into an `PoSQLTimestamp` structure. + /// This function supports two primary formats: + /// + /// **Unix Epoch Time Parsing**: + /// - Since Unix epoch timestamps don't inherently carry timezone information, + /// any Unix time parsed directly from an integer is assumed to be in UTC. + /// + /// # Examples + /// ``` + /// use chrono::{DateTime, Utc}; + /// use proof_of_sql_parser::posql_time::{timestamp::PoSQLTimestamp, timezone::PoSQLTimeZone}; + /// + /// // Parsing a Unix epoch timestamp (assumed to be seconds and UTC): + /// let unix_time = 1231006505; + /// let intermediate_timestamp = PoSQLTimestamp::to_timestamp(unix_time).unwrap(); + /// assert_eq!(intermediate_timestamp.timezone, PoSQLTimeZone::Utc); + /// ``` + pub fn to_timestamp(epoch: i64) -> Result { + match Utc.timestamp_opt(epoch, 0) { + LocalResult::Single(timestamp) => Ok(PoSQLTimestamp { + timestamp, + timeunit: PoSQLTimeUnit::Second, + timezone: timezone::PoSQLTimeZone::Utc, + }), + LocalResult::Ambiguous(earliest, latest) => Err(PoSQLTimestampError::Ambiguous( + format!("The local time is ambiguous because there is a fold in the local time: earliest: {} latest: {} ", earliest, latest), + )), + LocalResult::None => Err(PoSQLTimestampError::LocalTimeDoesNotExist), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_unix_epoch_time_timezone() { + let unix_time = 1231006505; // Unix time as string + let expected_timezone = timezone::PoSQLTimeZone::Utc; // Unix time should always be UTC + let result = PoSQLTimestamp::to_timestamp(unix_time).unwrap(); + assert_eq!(result.timezone, expected_timezone); + } + + #[test] + fn test_unix_epoch_timestamp_parsing() { + let unix_time = 1231006505; // Example Unix timestamp (seconds since epoch) + let expected_datetime = Utc.timestamp_opt(unix_time, 0).unwrap(); + let expected_unit = PoSQLTimeUnit::Second; // Assuming basic second precision for Unix timestamp + let input = unix_time; // Simulate input as string since Unix times are often transmitted as strings + let result = PoSQLTimestamp::to_timestamp(input).unwrap(); + + assert_eq!(result.timestamp, expected_datetime); + assert_eq!(result.timeunit, expected_unit); + } + + #[test] + fn test_basic_rfc3339_timestamp() { + let input = "2023-06-26T12:34:56Z"; + let expected = Utc.with_ymd_and_hms(2023, 6, 26, 12, 34, 56).unwrap(); + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timestamp, expected); + } + + #[test] + fn test_rfc3339_timestamp_with_positive_offset() { + let input = "2023-06-26T08:00:00+04:30"; + let expected = Utc.with_ymd_and_hms(2023, 6, 26, 3, 30, 0).unwrap(); // Adjusted to UTC + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timestamp, expected); + } + + #[test] + fn test_rfc3339_timestamp_with_negative_offset() { + let input = "2023-06-26T20:00:00-05:00"; + let expected = Utc.with_ymd_and_hms(2023, 6, 27, 1, 0, 0).unwrap(); // Adjusted to UTC + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timestamp, expected); + } + + #[test] + fn test_rfc3339_timestamp_with_utc_designator() { + let input = "2023-06-26T12:34:56Z"; + let expected = Utc.with_ymd_and_hms(2023, 6, 26, 12, 34, 56).unwrap(); + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timestamp, expected); + } + + #[test] + fn test_invalid_rfc3339_timestamp() { + let input = "not-a-timestamp"; + assert_eq!( + PoSQLTimestamp::try_from(input), + Err(PoSQLTimestampError::ParsingError( + "input contains invalid characters".into() + )) + ); + } + + #[test] + fn test_timestamp_with_seconds() { + let input = "2023-06-26T12:34:56Z"; + let expected_time = Utc.with_ymd_and_hms(2023, 6, 26, 12, 34, 56).unwrap(); + let expected_unit = PoSQLTimeUnit::Second; + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timestamp, expected_time); + assert_eq!(result.timeunit, expected_unit); + } + + #[test] + fn test_general_parsing_error() { + // This test assumes that there's a catch-all parsing error case that isn't covered by the more specific errors. + let malformed_input = "2009-01-03T::00Z"; // Intentionally malformed timestamp + let result = PoSQLTimestamp::try_from(malformed_input); + assert!(matches!(result, Err(PoSQLTimestampError::ParsingError(_)))); + } + + #[test] + fn test_basic_date_time_support() { + let inputs = ["2009-01-03T18:15:05Z", "2009-01-03T18:15:05+02:00"]; + for input in inputs { + assert!( + DateTime::parse_from_rfc3339(input).is_ok(), + "Should parse correctly: {}", + input + ); + } + } + + #[test] + fn test_leap_seconds() { + let input = "1998-12-31T23:59:60Z"; // fyi the 59:-->60<-- is the leap second + assert!(PoSQLTimestamp::try_from(input).is_ok()); + } + + #[test] + fn test_leap_seconds_ranges() { + // Timestamp just before the leap second + let before_leap_second = "1998-12-31T23:59:59Z"; + // Timestamp during the leap second + let leap_second = "1998-12-31T23:59:60Z"; + // Timestamp just after the leap second + let after_leap_second = "1999-01-01T00:00:00Z"; + + // Parse timestamps + let before_leap_dt = PoSQLTimestamp::try_from(before_leap_second).unwrap(); + let leap_second_dt = PoSQLTimestamp::try_from(leap_second).unwrap(); + dbg!(&leap_second_dt.timestamp.timestamp()); + let after_leap_dt = PoSQLTimestamp::try_from(after_leap_second).unwrap(); + + // Ensure that "23:59:60Z" - 1 second is considered equivalent to "23:59:59Z" + assert_eq!( + before_leap_dt.timestamp, + leap_second_dt.timestamp - chrono::Duration::seconds(1) + ); + + // Ensure that "23:59:60Z" + 1 second is "1999-01-01T00:00:00Z" + assert_eq!( + after_leap_dt.timestamp, + leap_second_dt.timestamp + chrono::Duration::seconds(1) + ); + } + + #[test] + fn test_rejecting_incorrect_formats() { + let incorrect_formats = [ + "2009-January-03", + "25:61:61", + "20090103", + "181505", + "18:15:05", + ]; + for input in incorrect_formats { + assert!( + DateTime::parse_from_rfc3339(input).is_err(), + "Should reject incorrect format: {}", + input + ); + } + } +} diff --git a/crates/proof-of-sql-parser/src/posql_time/timezone.rs b/crates/proof-of-sql-parser/src/posql_time/timezone.rs new file mode 100644 index 000000000..7f9a21b17 --- /dev/null +++ b/crates/proof-of-sql-parser/src/posql_time/timezone.rs @@ -0,0 +1,133 @@ +use crate::error::PoSQLTimestampError; +use core::fmt; +use serde::{Deserialize, Serialize}; +use std::sync::Arc; + +/// Captures a timezone from a timestamp query +#[derive(Debug, Clone, Copy, Hash, Serialize, Deserialize, PartialEq, Eq)] +pub enum PoSQLTimeZone { + /// Default variant for UTC timezone + Utc, + /// TImezone offset in seconds + FixedOffset(i32), +} + +impl PoSQLTimeZone { + /// Parse a timezone from a count of seconds + pub fn from_offset(offset: i32) -> Self { + if offset == 0 { + PoSQLTimeZone::Utc + } else { + PoSQLTimeZone::FixedOffset(offset) + } + } +} + +impl TryFrom<&Option>> for PoSQLTimeZone { + type Error = PoSQLTimestampError; + + fn try_from(value: &Option>) -> Result { + match value { + Some(tz_str) => { + let tz = Arc::as_ref(tz_str).to_uppercase(); + match tz.as_str() { + "Z" | "UTC" | "00:00" | "+00:00" | "0:00" | "+0:00" => Ok(PoSQLTimeZone::Utc), + tz if tz.chars().count() == 6 + && (tz.starts_with('+') || tz.starts_with('-')) => + { + let sign = if tz.starts_with('-') { -1 } else { 1 }; + let hours = tz[1..3] + .parse::() + .map_err(|_| PoSQLTimestampError::InvalidTimezoneOffset)?; + let minutes = tz[4..6] + .parse::() + .map_err(|_| PoSQLTimestampError::InvalidTimezoneOffset)?; + let total_seconds = sign * ((hours * 3600) + (minutes * 60)); + Ok(PoSQLTimeZone::FixedOffset(total_seconds)) + } + _ => Err(PoSQLTimestampError::InvalidTimezone(tz.to_string())), + } + } + None => Ok(PoSQLTimeZone::Utc), + } + } +} + +impl fmt::Display for PoSQLTimeZone { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match *self { + PoSQLTimeZone::Utc => { + write!(f, "00:00") + } + PoSQLTimeZone::FixedOffset(seconds) => { + let hours = seconds / 3600; + let minutes = (seconds.abs() % 3600) / 60; + if seconds < 0 { + write!(f, "-{:02}:{:02}", hours.abs(), minutes) + } else { + write!(f, "+{:02}:{:02}", hours, minutes) + } + } + } + } +} + +#[cfg(test)] +mod timezone_parsing_tests { + use crate::posql_time::timezone; + + #[test] + fn test_display_fixed_offset_positive() { + let timezone = timezone::PoSQLTimeZone::FixedOffset(4500); // +01:15 + assert_eq!(format!("{}", timezone), "+01:15"); + } + + #[test] + fn test_display_fixed_offset_negative() { + let timezone = timezone::PoSQLTimeZone::FixedOffset(-3780); // -01:03 + assert_eq!(format!("{}", timezone), "-01:03"); + } + + #[test] + fn test_display_utc() { + let timezone = timezone::PoSQLTimeZone::Utc; + assert_eq!(format!("{}", timezone), "00:00"); + } +} + +#[cfg(test)] +mod timezone_offset_tests { + use crate::posql_time::{timestamp::PoSQLTimestamp, timezone}; + + #[test] + fn test_utc_timezone() { + let input = "2023-06-26T12:34:56Z"; + let expected_timezone = timezone::PoSQLTimeZone::Utc; + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timezone, expected_timezone); + } + + #[test] + fn test_positive_offset_timezone() { + let input = "2023-06-26T12:34:56+03:30"; + let expected_timezone = timezone::PoSQLTimeZone::from_offset(12600); // 3 hours and 30 minutes in seconds + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timezone, expected_timezone); + } + + #[test] + fn test_negative_offset_timezone() { + let input = "2023-06-26T12:34:56-04:00"; + let expected_timezone = timezone::PoSQLTimeZone::from_offset(-14400); // -4 hours in seconds + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timezone, expected_timezone); + } + + #[test] + fn test_zero_offset_timezone() { + let input = "2023-06-26T12:34:56+00:00"; + let expected_timezone = timezone::PoSQLTimeZone::Utc; // Zero offset defaults to UTC + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timezone, expected_timezone); + } +} diff --git a/crates/proof-of-sql-parser/src/posql_time/unit.rs b/crates/proof-of-sql-parser/src/posql_time/unit.rs new file mode 100644 index 000000000..c97919f90 --- /dev/null +++ b/crates/proof-of-sql-parser/src/posql_time/unit.rs @@ -0,0 +1,85 @@ +use arrow::datatypes::TimeUnit as ArrowTimeUnit; +use core::fmt; +use serde::{Deserialize, Serialize}; + +/// An intermediate type representing the time units from a parsed query +#[derive(Debug, Clone, Copy, Hash, Serialize, Deserialize, PartialEq, Eq)] +pub enum PoSQLTimeUnit { + /// Represents seconds with precision 0: ex "2024-06-20 12:34:56" + Second, + /// Represents milliseconds with precision 3: ex "2024-06-20 12:34:56.123" + Millisecond, + /// Represents microseconds with precision 6: ex "2024-06-20 12:34:56.123456" + Microsecond, + /// Represents nanoseconds with precision 9: ex "2024-06-20 12:34:56.123456789" + Nanosecond, +} + +impl From for ArrowTimeUnit { + fn from(unit: PoSQLTimeUnit) -> Self { + match unit { + PoSQLTimeUnit::Second => ArrowTimeUnit::Second, + PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, + PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, + PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, + } + } +} + +impl From for PoSQLTimeUnit { + fn from(unit: ArrowTimeUnit) -> Self { + match unit { + ArrowTimeUnit::Second => PoSQLTimeUnit::Second, + ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, + ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, + ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, + } + } +} + +impl fmt::Display for PoSQLTimeUnit { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PoSQLTimeUnit::Second => write!(f, "seconds (precision: 0)"), + PoSQLTimeUnit::Millisecond => write!(f, "milliseconds (precision: 3)"), + PoSQLTimeUnit::Microsecond => write!(f, "microseconds (precision: 6)"), + PoSQLTimeUnit::Nanosecond => write!(f, "nanoseconds (precision: 9)"), + } + } +} + +// allow(deprecated) for the sole purpose of testing that +// timestamp precision is parsed correctly. +#[cfg(test)] +#[allow(deprecated)] +mod time_unit_tests { + + use crate::posql_time::{timestamp::PoSQLTimestamp, unit::PoSQLTimeUnit}; + use chrono::{TimeZone, Utc}; + + #[test] + fn test_rfc3339_timestamp_with_milliseconds() { + let input = "2023-06-26T12:34:56.123Z"; + let expected = Utc.ymd(2023, 6, 26).and_hms_milli(12, 34, 56, 123); + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timeunit, PoSQLTimeUnit::Millisecond); + assert_eq!(result.timestamp, expected); + } + + #[test] + fn test_rfc3339_timestamp_with_microseconds() { + let input = "2023-06-26T12:34:56.123456Z"; + let expected = Utc.ymd(2023, 6, 26).and_hms_micro(12, 34, 56, 123456); + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timeunit, PoSQLTimeUnit::Microsecond); + assert_eq!(result.timestamp, expected); + } + #[test] + fn test_rfc3339_timestamp_with_nanoseconds() { + let input = "2023-06-26T12:34:56.123456789Z"; + let expected = Utc.ymd(2023, 6, 26).and_hms_nano(12, 34, 56, 123456789); + let result = PoSQLTimestamp::try_from(input).unwrap(); + assert_eq!(result.timeunit, PoSQLTimeUnit::Nanosecond); + assert_eq!(result.timestamp, expected); + } +} diff --git a/crates/proof-of-sql-parser/src/sql.lalrpop b/crates/proof-of-sql-parser/src/sql.lalrpop index dec34a067..ae86ca4b3 100644 --- a/crates/proof-of-sql-parser/src/sql.lalrpop +++ b/crates/proof-of-sql-parser/src/sql.lalrpop @@ -2,7 +2,7 @@ use crate::intermediate_ast; use crate::select_statement; use crate::identifier; use lalrpop_util::ParseError::User; -use crate::intermediate_decimal::IntermediateDecimal; +use crate::{intermediate_decimal::IntermediateDecimal, posql_time::timestamp::PoSQLTimestamp}; grammar; @@ -337,6 +337,10 @@ LiteralValue: Box = { }, => Box::new(intermediate_ast::Literal::Decimal(value)), + + => Box::new(intermediate_ast::Literal::Timestamp(value)), + + => Box::new(intermediate_ast::Literal::Timestamp(value)), }; Int128UnaryNumericLiteral: i128 = { @@ -373,6 +377,20 @@ pub BooleanLiteral: bool = { "false" => false, }; +TimestampLiteral: PoSQLTimestamp = { + "timestamp" =>? { + PoSQLTimestamp::try_from(content.trim_matches('\'').trim()) + .map_err(|_| User { error: "unable to parse timestamp from query" }) + }, +}; + +UnixTimestampLiteral: PoSQLTimestamp = { + // Handling the to_timestamp function with numeric input + "to_timestamp" "(" ")" =>? { + PoSQLTimestamp::to_timestamp(epoch).map_err(|_| User { error: "unable to parse timestamp from query" }) + }, +}; + //////////////////////////////////////////////////////////////////////////////////////////////// // Tokens //////////////////////////////////////////////////////////////////////////////////////////////// @@ -413,7 +431,9 @@ match { r"[sS][uU][mM]" => "sum", r"[tT][rR][uU][eE]" => "true", r"[fF][aA][lL][sS][eE]" => "false", - + r"[tT][iI][mM][eE][sS][tT][aA][mM][pP]" => "timestamp", + r"[tT][oO]_[tT][iI][mM][eE][sS][tT][aA][mM][pP]" => "to_timestamp", + "," => ",", "." => ".", "(" => "(", diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index d6a0736e9..bd9a6808f 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -30,7 +30,7 @@ bumpalo = { workspace = true, features = ["collections"] } bytemuck = { workspace = true } byte-slice-cast = { workspace = true } curve25519-dalek = { workspace = true, features = ["serde"] } -chrono-tz = {workspace = true, features = ["serde"]} +chrono = {workspace = true, features = ["serde"]} derive_more = { workspace = true } dyn_partial_eq = { workspace = true } hashbrown = { workspace = true } diff --git a/crates/proof-of-sql/src/base/commitment/column_bounds.rs b/crates/proof-of-sql/src/base/commitment/column_bounds.rs index 0badc567f..2ee627088 100644 --- a/crates/proof-of-sql/src/base/commitment/column_bounds.rs +++ b/crates/proof-of-sql/src/base/commitment/column_bounds.rs @@ -288,13 +288,9 @@ impl ColumnBounds { #[cfg(test)] mod tests { use super::*; - use crate::base::{ - database::OwnedColumn, - math::decimal::Precision, - scalar::Curve25519Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, - }; + use crate::base::{database::OwnedColumn, math::decimal::Precision, scalar::Curve25519Scalar}; use itertools::Itertools; + use proof_of_sql_parser::posql_time::{timezone, unit::PoSQLTimeUnit}; #[test] fn we_can_construct_bounds_by_method() { @@ -537,7 +533,7 @@ mod tests { let timestamp_column = OwnedColumn::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + timezone::PoSQLTimeZone::Utc, vec![1_i64, 2, 3, 4], ); let committable_timestamp_column = CommittableColumn::from(×tamp_column); diff --git a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs index 2e0d9876d..42b8e7fa9 100644 --- a/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs +++ b/crates/proof-of-sql/src/base/commitment/column_commitment_metadata.rs @@ -163,13 +163,15 @@ impl ColumnCommitmentMetadata { #[cfg(test)] mod tests { + use super::*; use crate::base::{ - commitment::column_bounds::Bounds, - database::OwnedColumn, - math::decimal::Precision, + commitment::column_bounds::Bounds, database::OwnedColumn, math::decimal::Precision, scalar::Curve25519Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, + }; + use proof_of_sql_parser::posql_time::{ + timezone::{self, PoSQLTimeZone}, + unit::PoSQLTimeUnit, }; #[test] @@ -229,12 +231,12 @@ mod tests { assert_eq!( ColumnCommitmentMetadata::try_new( - ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc), ColumnBounds::TimestampTZ(Bounds::Empty), ) .unwrap(), ColumnCommitmentMetadata { - column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc), bounds: ColumnBounds::TimestampTZ(Bounds::Empty), } ); @@ -372,7 +374,7 @@ mod tests { let timestamp_column: OwnedColumn = OwnedColumn::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + timezone::PoSQLTimeZone::Utc, [1i64, 2, 3, 4, 5].to_vec(), ); let committable_timestamp_column = CommittableColumn::from(×tamp_column); @@ -380,7 +382,7 @@ mod tests { ColumnCommitmentMetadata::from_column(&committable_timestamp_column); assert_eq!( timestamp_metadata.column_type(), - &ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC) + &ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc) ); if let ColumnBounds::TimestampTZ(Bounds::Sharp(bounds)) = timestamp_metadata.bounds() { assert_eq!(bounds.min(), &1); @@ -534,7 +536,7 @@ mod tests { 1_625_072_400, 1_625_065_000, ]; - let timezone = PoSQLTimeZone::UTC; + let timezone = timezone::PoSQLTimeZone::Utc; let timeunit = PoSQLTimeUnit::Second; let timestamp_column_a = CommittableColumn::TimestampTZ(timeunit, timezone, ×[..2]); let timestamp_metadata_a = ColumnCommitmentMetadata::from_column(×tamp_column_a); @@ -560,7 +562,7 @@ mod tests { 1_625_072_400, 1_625_065_000, ]; - let timezone = PoSQLTimeZone::UTC; + let timezone = timezone::PoSQLTimeZone::Utc; let timeunit = PoSQLTimeUnit::Second; let timestamp_column_a = CommittableColumn::TimestampTZ(timeunit, timezone, ×[..2]); @@ -857,12 +859,18 @@ mod tests { .is_err()); let timestamp_tz_metadata_a = ColumnCommitmentMetadata { - column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + column_type: ColumnType::TimestampTZ( + PoSQLTimeUnit::Second, + timezone::PoSQLTimeZone::Utc, + ), bounds: ColumnBounds::TimestampTZ(Bounds::Empty), }; let timestamp_tz_metadata_b = ColumnCommitmentMetadata { - column_type: ColumnType::TimestampTZ(PoSQLTimeUnit::Millisecond, PoSQLTimeZone::UTC), + column_type: ColumnType::TimestampTZ( + PoSQLTimeUnit::Millisecond, + timezone::PoSQLTimeZone::Utc, + ), bounds: ColumnBounds::TimestampTZ(Bounds::Empty), }; diff --git a/crates/proof-of-sql/src/base/commitment/committable_column.rs b/crates/proof-of-sql/src/base/commitment/committable_column.rs index e8fc9dc16..e7a006dae 100644 --- a/crates/proof-of-sql/src/base/commitment/committable_column.rs +++ b/crates/proof-of-sql/src/base/commitment/committable_column.rs @@ -3,10 +3,10 @@ use crate::base::{ math::decimal::Precision, ref_into::RefInto, scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, }; #[cfg(feature = "blitzar")] use blitzar::sequence::Sequence; +use proof_of_sql_parser::posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}; /// Column data in "committable form". /// @@ -194,10 +194,7 @@ impl<'a, 'b> From<&'a CommittableColumn<'b>> for Sequence<'a> { #[cfg(all(test, feature = "blitzar"))] mod tests { use super::*; - use crate::{ - base::{scalar::Curve25519Scalar, time::timezone::PoSQLTimeZone}, - proof_primitive::dory::DoryScalar, - }; + use crate::{base::scalar::Curve25519Scalar, proof_primitive::dory::DoryScalar}; use blitzar::compute::compute_curve25519_commitments; use curve25519_dalek::ristretto::CompressedRistretto; @@ -227,24 +224,24 @@ mod tests { fn we_can_get_type_and_length_of_timestamp_column() { // empty case let committable_column = - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]); + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]); assert_eq!(committable_column.len(), 0); assert!(committable_column.is_empty()); assert_eq!( committable_column.column_type(), - ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC) + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc) ); let committable_column = CommittableColumn::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, &[12, 34, 56], ); assert_eq!(committable_column.len(), 3); assert!(!committable_column.is_empty()); assert_eq!( committable_column.column_type(), - ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC) + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc) ); } @@ -393,12 +390,12 @@ mod tests { let from_borrowed_column = CommittableColumn::from(&Column::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, &[], )); assert_eq!( from_borrowed_column, - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]) ); // non-empty case @@ -406,12 +403,12 @@ mod tests { let from_borrowed_column = CommittableColumn::from(&Column::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, ×tamps, )); assert_eq!( from_borrowed_column, - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, ×tamps) + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, ×tamps) ); } @@ -574,26 +571,26 @@ mod tests { // empty case let owned_column = OwnedColumn::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, Vec::new(), ); let from_owned_column = CommittableColumn::from(&owned_column); assert_eq!( from_owned_column, - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]) ); // non-empty case let timestamps = vec![1625072400, 1625076000, 1625083200]; let owned_column = OwnedColumn::::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, timestamps.clone(), ); let from_owned_column = CommittableColumn::from(&owned_column); assert_eq!( from_owned_column, - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, ×tamps) + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, ×tamps) ); } @@ -880,7 +877,7 @@ mod tests { fn we_can_commit_to_timestamp_column_through_committable_column() { // Empty case let committable_column = - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]); + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]); let sequence = Sequence::from(&committable_column); let mut commitment_buffer = [CompressedRistretto::default()]; compute_curve25519_commitments(&mut commitment_buffer, &[sequence], 0); @@ -889,7 +886,7 @@ mod tests { // Non-empty case let timestamps = [1625072400, 1625076000, 1625083200]; let committable_column = - CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, ×tamps); + CommittableColumn::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, ×tamps); let sequence_actual = Sequence::from(&committable_column); let sequence_expected = Sequence::from(timestamps.as_slice()); diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs index cc3b78cb8..8b4c1f712 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs @@ -1,11 +1,6 @@ use super::scalar_and_i256_conversions::convert_i256_to_scalar; use crate::{ - base::{ - database::Column, - math::decimal::Precision, - scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, - }, + base::{database::Column, math::decimal::Precision, scalar::Scalar}, sql::parse::ConversionError, }; use arrow::{ @@ -17,6 +12,10 @@ use arrow::{ datatypes::{i256, DataType, TimeUnit as ArrowTimeUnit}, }; use bumpalo::Bump; +use proof_of_sql_parser::{ + error::PoSQLTimestampError, + posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}, +}; use std::ops::Range; use thiserror::Error; @@ -38,9 +37,9 @@ pub enum ArrowArrayToColumnConversionError { /// Variant for conversion errors #[error("conversion error: {0}")] ConversionError(#[from] ConversionError), - /// Variant for timezone conversion errors, i.e. invalid timezone - #[error("Timezone conversion failed: {0}")] - TimezoneConversionError(String), + /// Using TimeError to handle all time-related errors + #[error(transparent)] + TimestampConversionError(#[from] PoSQLTimestampError), } /// This trait is used to provide utility functions to convert ArrayRefs into proof types (Column, Scalars, etc.) @@ -280,7 +279,7 @@ impl ArrayRefExt for ArrayRef { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::try_from(tz.clone())?, + PoSQLTimeZone::try_from(tz)?, &array.values()[range.start..range.end], )) } else { @@ -293,7 +292,7 @@ impl ArrayRefExt for ArrayRef { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( PoSQLTimeUnit::Millisecond, - PoSQLTimeZone::try_from(tz.clone())?, + PoSQLTimeZone::try_from(tz)?, &array.values()[range.start..range.end], )) } else { @@ -306,7 +305,7 @@ impl ArrayRefExt for ArrayRef { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( PoSQLTimeUnit::Microsecond, - PoSQLTimeZone::try_from(tz.clone())?, + PoSQLTimeZone::try_from(tz)?, &array.values()[range.start..range.end], )) } else { @@ -319,7 +318,7 @@ impl ArrayRefExt for ArrayRef { if let Some(array) = self.as_any().downcast_ref::() { Ok(Column::TimestampTZ( PoSQLTimeUnit::Nanosecond, - PoSQLTimeZone::try_from(tz.clone())?, + PoSQLTimeZone::try_from(tz)?, &array.values()[range.start..range.end], )) } else { @@ -371,13 +370,13 @@ mod tests { let data = vec![1625072400, 1625076000, 1625083200]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.clone().into(), - Some("UTC"), + Some("Z"), )); let result = array.to_column::(&alloc, &(1..3), None); assert_eq!( result.unwrap(), - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &data[1..3]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &data[1..3]) ); } @@ -387,7 +386,7 @@ mod tests { let data = vec![1625072400, 1625076000]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.into(), - Some("UTC"), + Some("+00:00"), )); let result = array @@ -395,7 +394,7 @@ mod tests { .unwrap(); assert_eq!( result, - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]) ); } @@ -405,13 +404,13 @@ mod tests { let data = vec![1625072400, 1625076000, 1625083200]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.into(), - Some("UTC"), + Some("+0:00"), )); let result = array.to_column::(&alloc, &(1..1), None); assert_eq!( result.unwrap(), - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]) ); } @@ -421,7 +420,7 @@ mod tests { let data = vec![1625072400, 1625076000, 1625083200]; let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.into(), - Some("UTC"), + Some("Utc"), )); let result = array.to_column::(&alloc, &(3..5), None); @@ -437,7 +436,7 @@ mod tests { let data = vec![Some(1625072400), None, Some(1625083200)]; let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.into(), - Some("UTC"), + Some("00:00"), )); let result = array.to_column::(&alloc, &(0..3), None); @@ -1004,7 +1003,7 @@ mod tests { .unwrap(); assert_eq!( result, - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &data[..]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &data[..]) ); } @@ -1058,7 +1057,7 @@ mod tests { let data = vec![1625072400, 1625076000, 1625083200]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.clone().into(), - Some("UTC"), + Some("Utc"), )); // Test using a range smaller than the array size @@ -1066,7 +1065,7 @@ mod tests { array .to_column::(&alloc, &(1..3), None) .unwrap(), - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &data[1..3]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &data[1..3]) ); } @@ -1117,14 +1116,14 @@ mod tests { let data = vec![1625072400, 1625076000]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.clone().into(), - Some("UTC"), + Some("Utc"), )); let result = array .to_column::(&alloc, &(0..0), None) .unwrap(); assert_eq!( result, - Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC, &[]) + Column::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, &[]) ); } @@ -1146,7 +1145,7 @@ mod tests { let data = vec![1625072400, 1625076000]; // Example Unix timestamps let array: ArrayRef = Arc::new(TimestampSecondArray::with_timezone_opt( data.clone().into(), - Some("UTC"), + Some("Utc"), )); assert_eq!( diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index fb5dd908c..caea8938c 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -2,11 +2,13 @@ use super::{LiteralValue, TableRef}; use crate::base::{ math::decimal::{scale_scalar, Precision}, scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, }; use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; use bumpalo::Bump; -use proof_of_sql_parser::Identifier; +use proof_of_sql_parser::{ + posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}, + Identifier, +}; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use serde::{Deserialize, Serialize}; use std::sync::Arc; @@ -334,9 +336,10 @@ impl From<&ColumnType> for DataType { } ColumnType::VarChar => DataType::Utf8, ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), - ColumnType::TimestampTZ(timeunit, timezone) => { - DataType::Timestamp(ArrowTimeUnit::from(*timeunit), Some(Arc::from(timezone))) - } + ColumnType::TimestampTZ(timeunit, timezone) => DataType::Timestamp( + ArrowTimeUnit::from(*timeunit), + Some(Arc::from(timezone.to_string())), + ), } } } @@ -357,7 +360,7 @@ impl TryFrom for ColumnType { } DataType::Timestamp(time_unit, timezone_option) => Ok(ColumnType::TimestampTZ( PoSQLTimeUnit::from(time_unit), - PoSQLTimeZone::try_from(timezone_option)?, + PoSQLTimeZone::try_from(&timezone_option)?, )), DataType::Utf8 => Ok(ColumnType::VarChar), _ => Err(format!("Unsupported arrow data type {:?}", data_type)), @@ -471,9 +474,9 @@ mod tests { #[test] fn column_type_serializes_to_string() { - let column_type = ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC); + let column_type = ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc); let serialized = serde_json::to_string(&column_type).unwrap(); - assert_eq!(serialized, r#"{"TimestampTZ":["Second","UTC"]}"#); + assert_eq!(serialized, r#"{"TimestampTZ":["Second","Utc"]}"#); let column_type = ColumnType::Boolean; let serialized = serde_json::to_string(&column_type).unwrap(); diff --git a/crates/proof-of-sql/src/base/database/literal_value.rs b/crates/proof-of-sql/src/base/database/literal_value.rs index e7ced4b93..e75148b9f 100644 --- a/crates/proof-of-sql/src/base/database/literal_value.rs +++ b/crates/proof-of-sql/src/base/database/literal_value.rs @@ -1,9 +1,5 @@ -use crate::base::{ - database::ColumnType, - math::decimal::Precision, - scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, -}; +use crate::base::{database::ColumnType, math::decimal::Precision, scalar::Scalar}; +use proof_of_sql_parser::posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}; use serde::{Deserialize, Serialize}; /// Represents a literal value. diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs index e0710e0ed..808e075ed 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs @@ -20,7 +20,6 @@ use crate::base::{ }, math::decimal::Precision, scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, }; use arrow::{ array::{ @@ -33,7 +32,11 @@ use arrow::{ record_batch::RecordBatch, }; use indexmap::IndexMap; -use proof_of_sql_parser::{Identifier, ParseError}; +use proof_of_sql_parser::{ + error::PoSQLTimestampError, + posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}, + Identifier, ParseError, +}; use std::sync::Arc; use thiserror::Error; @@ -56,12 +59,9 @@ pub enum OwnedArrowConversionError { /// This error occurs when trying to convert from an Arrow array with nulls. #[error("null values are not supported in OwnedColumn yet")] NullNotSupportedYet, - /// This error occurs when trying to convert from an unsupported timestamp unit. - #[error("unsupported timestamp unit: {0}")] - UnsupportedTimestampUnit(String), - /// This error occurs when trying to convert from an invalid timezone string. - #[error("invalid timezone string: {0}")] - InvalidTimezone(String), // New error variant for timezone strings + /// Using TimeError to handle all time-related errors + #[error(transparent)] + TimestampConversionError(#[from] PoSQLTimestampError), } impl From> for ArrayRef { @@ -193,15 +193,13 @@ impl TryFrom<&ArrayRef> for OwnedColumn { let array = value .as_any() .downcast_ref::() - .ok_or_else(|| { - OwnedArrowConversionError::UnsupportedTimestampUnit( - "Second".to_string(), - ) - })?; + .expect( + "This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits", + ); let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::try_from(timezone.clone())?, + PoSQLTimeZone::try_from(timezone)?, timestamps, )) } @@ -209,15 +207,13 @@ impl TryFrom<&ArrayRef> for OwnedColumn { let array = value .as_any() .downcast_ref::() - .ok_or_else(|| { - OwnedArrowConversionError::UnsupportedTimestampUnit( - "Millisecond".to_string(), - ) - })?; + .expect( + "This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits", + ); let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( PoSQLTimeUnit::Millisecond, - PoSQLTimeZone::try_from(timezone.clone())?, + PoSQLTimeZone::try_from(timezone)?, timestamps, )) } @@ -225,15 +221,13 @@ impl TryFrom<&ArrayRef> for OwnedColumn { let array = value .as_any() .downcast_ref::() - .ok_or_else(|| { - OwnedArrowConversionError::UnsupportedTimestampUnit( - "Microsecond".to_string(), - ) - })?; + .expect( + "This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits", + ); let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( PoSQLTimeUnit::Microsecond, - PoSQLTimeZone::try_from(timezone.clone())?, + PoSQLTimeZone::try_from(timezone)?, timestamps, )) } @@ -241,15 +235,13 @@ impl TryFrom<&ArrayRef> for OwnedColumn { let array = value .as_any() .downcast_ref::() - .ok_or_else(|| { - OwnedArrowConversionError::UnsupportedTimestampUnit( - "Nanosecond".to_string(), - ) - })?; + .expect( + "This cannot fail, all Arrow TimeUnits are mapped to PoSQL TimeUnits", + ); let timestamps = array.values().iter().copied().collect::>(); Ok(OwnedColumn::TimestampTZ( PoSQLTimeUnit::Nanosecond, - PoSQLTimeZone::try_from(timezone.clone())?, + PoSQLTimeZone::try_from(timezone)?, timestamps, )) } diff --git a/crates/proof-of-sql/src/base/database/owned_column.rs b/crates/proof-of-sql/src/base/database/owned_column.rs index ce1a3e321..14eb9ba6a 100644 --- a/crates/proof-of-sql/src/base/database/owned_column.rs +++ b/crates/proof-of-sql/src/base/database/owned_column.rs @@ -3,11 +3,9 @@ /// converting to the final result in either Arrow format or JSON. /// This is the analog of an arrow Array. use super::ColumnType; -use crate::base::{ - math::decimal::Precision, - scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, -}; +use crate::base::{math::decimal::Precision, scalar::Scalar}; +use proof_of_sql_parser::posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}; + #[derive(Debug, PartialEq, Clone, Eq)] #[non_exhaustive] /// Supported types for OwnedColumn diff --git a/crates/proof-of-sql/src/base/database/owned_table_test.rs b/crates/proof-of-sql/src/base/database/owned_table_test.rs index adbec791c..2abdf869a 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_test.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_test.rs @@ -2,12 +2,14 @@ use crate::{ base::{ database::{owned_table_utility::*, OwnedColumn, OwnedTable, OwnedTableError}, scalar::Curve25519Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, }, proof_primitive::dory::DoryScalar, }; use indexmap::IndexMap; -use proof_of_sql_parser::Identifier; +use proof_of_sql_parser::{ + posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}, + Identifier, +}; #[test] fn we_can_create_an_owned_table_with_no_columns() { @@ -58,18 +60,18 @@ fn we_can_create_an_owned_table_with_data() { [true, false, true, false, true, false, true, false, true], ), timestamptz( - "timestamp", + "time_stamp", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX], ), ]); let mut table = IndexMap::new(); table.insert( - Identifier::try_new("timestamp").unwrap(), + Identifier::try_new("time_stamp").unwrap(), OwnedColumn::TimestampTZ( PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [0, 1, 2, 3, 4, 5, 6, i64::MIN, i64::MAX].into(), ), ); @@ -125,9 +127,9 @@ fn we_get_inequality_between_tables_with_differing_column_order() { varchar("c", ["0"; 0]), boolean("d", [false; 0]), timestamptz( - "timestamp", + "time_stamp", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [0; 0], ), ]); @@ -137,9 +139,9 @@ fn we_get_inequality_between_tables_with_differing_column_order() { bigint("a", [0; 0]), varchar("c", ["0"; 0]), timestamptz( - "timestamp", + "time_stamp", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [0; 0], ), ]); @@ -153,9 +155,9 @@ fn we_get_inequality_between_tables_with_differing_data() { varchar("c", ["0"]), boolean("d", [true]), timestamptz( - "timestamp", + "time_stamp", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [1625072400], ), ]); @@ -165,9 +167,9 @@ fn we_get_inequality_between_tables_with_differing_data() { varchar("c", ["0"]), boolean("d", [true]), timestamptz( - "timestamp", + "time_stamp", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [1625076000], ), ]); diff --git a/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs b/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs index 5364cbb99..7c211eb94 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_test_accessor_test.rs @@ -5,9 +5,9 @@ use super::{ use crate::base::{ database::owned_table_utility::*, scalar::{compute_commitment_for_testing, Curve25519Scalar}, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, }; use blitzar::proof::InnerProductProof; +use proof_of_sql_parser::posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}; #[test] fn we_can_query_the_length_of_a_table() { @@ -52,7 +52,7 @@ fn we_can_access_the_columns_of_a_table() { timestamptz( "time", PoSQLTimeUnit::Second, - PoSQLTimeZone::UTC, + PoSQLTimeZone::Utc, [4, 5, 6, 5], ), ]); @@ -110,7 +110,7 @@ fn we_can_access_the_columns_of_a_table() { let column = ColumnRef::new( table_ref_2, "time".parse().unwrap(), - ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::UTC), + ColumnType::TimestampTZ(PoSQLTimeUnit::Second, PoSQLTimeZone::Utc), ); match accessor.get_column(column) { Column::TimestampTZ(_, _, col) => assert_eq!(col.to_vec(), vec![4, 5, 6, 5]), diff --git a/crates/proof-of-sql/src/base/database/owned_table_utility.rs b/crates/proof-of-sql/src/base/database/owned_table_utility.rs index 7867e918e..4f77f933d 100644 --- a/crates/proof-of-sql/src/base/database/owned_table_utility.rs +++ b/crates/proof-of-sql/src/base/database/owned_table_utility.rs @@ -14,12 +14,12 @@ //! ]); //! ``` use super::{OwnedColumn, OwnedTable}; -use crate::base::{ - scalar::Scalar, - time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone}, -}; +use crate::base::scalar::Scalar; use core::ops::Deref; -use proof_of_sql_parser::Identifier; +use proof_of_sql_parser::{ + posql_time::{timezone, unit::PoSQLTimeUnit}, + Identifier, +}; /// Creates an OwnedTable from a list of (Identifier, OwnedColumn) pairs. /// This is a convenience wrapper around OwnedTable::try_from_iter primarily for use in tests and @@ -212,18 +212,18 @@ pub fn decimal75( /// ``` /// use proof_of_sql::base::{database::owned_table_utility::*, /// scalar::Curve25519Scalar, -/// time::{timestamp::PoSQLTimeUnit, timezone::PoSQLTimeZone} /// }; -/// use chrono_tz::Europe::London; +/// use proof_of_sql_parser::{ +/// posql_time::{timezone::PoSQLTimeZone, unit::PoSQLTimeUnit}}; /// /// let result = owned_table::([ -/// timestamptz("event_time", PoSQLTimeUnit::Second, PoSQLTimeZone::new(London), vec![1625072400, 1625076000, 1625079600]), +/// timestamptz("event_time", PoSQLTimeUnit::Second, PoSQLTimeZone::Utc, vec![1625072400, 1625076000, 1625079600]), /// ]); /// ``` pub fn timestamptz( name: impl Deref, time_unit: PoSQLTimeUnit, - timezone: PoSQLTimeZone, + timezone: timezone::PoSQLTimeZone, data: impl IntoIterator, ) -> (Identifier, OwnedColumn) { ( diff --git a/crates/proof-of-sql/src/base/database/record_batch_utility.rs b/crates/proof-of-sql/src/base/database/record_batch_utility.rs index 7c67c8f7c..7cccd6424 100644 --- a/crates/proof-of-sql/src/base/database/record_batch_utility.rs +++ b/crates/proof-of-sql/src/base/database/record_batch_utility.rs @@ -1,8 +1,8 @@ -use crate::base::time::timestamp::{PoSQLTimeUnit, Time}; use arrow::array::{ TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, }; +use proof_of_sql_parser::posql_time::unit::PoSQLTimeUnit; use std::sync::Arc; /// Extension trait for Vec to convert it to an Arrow array @@ -23,6 +23,16 @@ impl ToArrow for Vec { } } +/// A wrapper around i64 to mitigate conflicting From +/// implementations +#[derive(Clone)] +pub struct Time { + /// i64 count of timeunits since unix epoch + pub timestamp: i64, + /// Timeunit of this time + pub unit: PoSQLTimeUnit, +} + impl ToArrow for Vec