Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: timestamp literal support #22

Closed
wants to merge 30 commits into from
Closed
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
e06f41e
feat: initial support for timestamp
Dustin-Ray Jun 13, 2024
24ef423
fix: cover dory commitment helper gpu matcharm
Dustin-Ray Jun 13, 2024
80f585c
feat: init timestamp support
Dustin-Ray Jun 18, 2024
1ec82de
feat: tests for timezone and timeunit conversions
Dustin-Ray Jun 18, 2024
80237d5
feat: update typing and Scalar bounds
Dustin-Ray Jun 18, 2024
aa82589
Merge branch 'main' into feat/timestamp
Dustin-Ray Jun 18, 2024
aae72a7
feat: support TimeStamp
Dustin-Ray Jun 18, 2024
a1870fd
fix: rename to align with postgres
Dustin-Ray Jun 18, 2024
5e0f468
fix: doctest
Dustin-Ray Jun 18, 2024
9211577
Merge branch 'main' into feat/timestamp
Dustin-Ray Jun 18, 2024
5989ae6
fix: breaking change and update to main
Dustin-Ray Jun 18, 2024
a2a04a1
fix: fmt
Dustin-Ray Jun 18, 2024
0e045bc
feat: add unit tests
Dustin-Ray Jun 18, 2024
6ed1218
Merge branch 'main' into feat/timestamp
Dustin-Ray Jun 18, 2024
b8ed589
fix: fmt
Dustin-Ray Jun 18, 2024
d21e1e8
feat: add tests
Dustin-Ray Jun 19, 2024
cca6f73
feat: init literal timestamp
Dustin-Ray Jun 20, 2024
bd8dbd1
feat: grammar, lexer, and parsing support for timestamp and timestamptz
Dustin-Ray Jun 20, 2024
606f00a
fix: idiomatic parsing
Dustin-Ray Jun 21, 2024
3c50102
Merge branch 'main' into feat/timestamp-literal-support
Dustin-Ray Jun 21, 2024
fe56393
fix: rebase
Dustin-Ray Jun 21, 2024
0dc7578
fix: deps and diff
Dustin-Ray Jun 21, 2024
8d2c3e0
fix: clippy
Dustin-Ray Jun 21, 2024
998f3bc
fix: refactor errors
Dustin-Ray Jun 21, 2024
de6ad9f
fix: error and timestamp module refactor
Dustin-Ray Jun 21, 2024
5d15ca1
fix: refactor timeunit
Dustin-Ray Jun 21, 2024
1648dc0
fix: error refactor
Dustin-Ray Jun 21, 2024
680d9c0
Merge branch 'main' into feat/timestamp-literal-support
Dustin-Ray Jun 21, 2024
8e467bc
fix: timestamp type
Dustin-Ray Jun 21, 2024
cc3421f
Merge branch 'main' into feat/timestamp-literal-support
Dustin-Ray Jun 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ byte-slice-cast = { version = "1.2.1" }
clap = { version = "4.5.4" }
criterion = { version = "0.5.1" }
chrono-tz = {version = "0.9.0", features = ["serde"]}
chrono = { version = "0.4.38" }
curve25519-dalek = { version = "4", features = ["rand_core"] }
derive_more = { version = "0.99" }
dyn_partial_eq = { version = "0.1.2" }
Expand All @@ -54,6 +55,7 @@ rayon = { version = "1.5" }
serde = { version = "1" }
serde_json = { version = "1" }
thiserror = { version = "1" }
time = {version = "0.3.36", features = ["serde"]}
tracing = { version = "0.1.36" }
tracing-opentelemetry = { version = "0.22.0" }
tracing-subscriber = { version = "0.3.0" }
Expand Down
2 changes: 2 additions & 0 deletions crates/proof-of-sql-parser/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ test = true
[dependencies]
arrayvec = { workspace = true, features = ["serde"] }
bigdecimal = { workspace = true }
chrono = { workspace = true }
time = { workspace = true }
lalrpop-util = { workspace = true, features = ["lexer", "unicode"] }
serde = { workspace = true, features = ["serde_derive"] }
thiserror = { workspace = true }
Expand Down
6 changes: 5 additions & 1 deletion crates/proof-of-sql-parser/src/intermediate_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
* https://docs.rs/vervolg/latest/vervolg/ast/enum.Statement.html
***/

use crate::{intermediate_decimal::IntermediateDecimal, Identifier};
use crate::{
intermediate_decimal::IntermediateDecimal, intermediate_time::IntermediateTimeStamp, Identifier,
};
use serde::{Deserialize, Serialize};

/// Representation of a SetExpression, a collection of rows, each having one or more columns.
Expand Down Expand Up @@ -328,6 +330,8 @@ pub enum Literal {
VarChar(String),
/// Decimal Literal
Decimal(IntermediateDecimal),
/// Timestamp Literal
TimestampTZ(IntermediateTimeStamp),
}

impl From<bool> for Literal {
Expand Down
320 changes: 320 additions & 0 deletions crates/proof-of-sql-parser/src/intermediate_time.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
use chrono::{DateTime, NaiveDateTime, Offset, TimeZone, Utc};
use core::fmt;
use serde::{Deserialize, Serialize};
use thiserror::Error;

#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum IntermediateTimeUnit {
Second,
Millisecond,
Microsecond,
Nanosecond,
}

impl fmt::Display for IntermediateTimeUnit {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IntermediateTimeUnit::Second => write!(f, "Second"),
IntermediateTimeUnit::Millisecond => write!(f, "Millisecond"),
IntermediateTimeUnit::Microsecond => write!(f, "Microsecond"),
IntermediateTimeUnit::Nanosecond => write!(f, "Nanosecond"),
}
}
}

#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub enum IntermediateTimeZone {
Utc,
FixedOffset(i32), // Offset in seconds
}

impl IntermediateTimeZone {
pub fn from_offset(offset: i32) -> Self {
if offset == 0 {
IntermediateTimeZone::Utc
} else {
IntermediateTimeZone::FixedOffset(offset)
}
}
}

impl fmt::Display for IntermediateTimeZone {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IntermediateTimeZone::Utc => write!(f, "Z"),
IntermediateTimeZone::FixedOffset(offset) => {
if *offset == 0 {
write!(f, "Z")
} else {
let total_minutes = offset / 60;
let hours = total_minutes / 60;
let minutes = total_minutes.abs() % 60;
write!(f, "{:+03}:{:02}", hours, minutes)
}
}
}
}
}

#[derive(Debug, Error)]
pub enum TimeParseError {
#[error("Invalid timestamp format")]
InvalidFormat,
}

#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
pub struct IntermediateTimeStamp {
pub timestamp: i64, // Use epoch time in the smallest unit (nanoseconds)
pub unit: IntermediateTimeUnit, // The unit of the timestamp value
pub timezone: IntermediateTimeZone, // The timezone of the timestamp
}

impl TryFrom<&str> for IntermediateTimeStamp {
type Error = TimeParseError;

fn try_from(value: &str) -> Result<Self, Self::Error> {
parse_intermediate_timestamp(value).map_err(|_| TimeParseError::InvalidFormat)
}
}

pub fn parse_intermediate_timestamp(ts: &str) -> Result<IntermediateTimeStamp, &'static str> {
let format_with_tz = "%Y-%m-%d %H:%M:%S%.f%:z";
let format_without_tz = "%Y-%m-%d %H:%M:%S%.f";

// Helper function to determine the precision of the fractional seconds
fn determine_precision(fraction: &str) -> IntermediateTimeUnit {
match fraction.len() {
0 => IntermediateTimeUnit::Second,
1..=3 => IntermediateTimeUnit::Millisecond,
4..=6 => IntermediateTimeUnit::Microsecond,
_ => IntermediateTimeUnit::Nanosecond,
}
}

// Extract the fractional part correctly
fn extract_fraction(ts: &str) -> &str {
if let Some((_, fractional)) = ts.split_once('.') {
if let Some((fractional, _)) = fractional.split_once(|c| c == '+' || c == '-') {
return fractional;
}
return fractional;
}
""
}

// First try parsing with timezone
if let Ok(dt) = DateTime::parse_from_str(ts, format_with_tz) {
if let Some(timestamp_nanos) = dt.timestamp_nanos_opt() {
let offset_seconds = dt.offset().fix().local_minus_utc();
let fraction = extract_fraction(ts);
let unit = determine_precision(fraction);
return Ok(IntermediateTimeStamp {
timestamp: timestamp_nanos,
unit,
timezone: IntermediateTimeZone::from_offset(offset_seconds),
});
} else {
return Err("Failed to convert datetime to nanoseconds");
}
}

// If that fails, try parsing without timezone and assume UTC
if let Ok(naive_dt) = NaiveDateTime::parse_from_str(ts, format_without_tz) {
let datetime_utc = Utc.from_utc_datetime(&naive_dt);
if let Some(timestamp_nanos) = datetime_utc.timestamp_nanos_opt() {
let fraction = extract_fraction(ts);
let unit = determine_precision(fraction);
return Ok(IntermediateTimeStamp {
timestamp: timestamp_nanos,
unit,
timezone: IntermediateTimeZone::Utc,
});
} else {
return Err("Failed to convert datetime to nanoseconds");
}
}

Err("Invalid timestamp format")
}

#[cfg(test)]
mod tests {
use super::*;
use chrono::{FixedOffset, TimeZone, Timelike, Utc};

#[test]
fn test_display_intermediate_timezone() {
// Test Utc
let tz_utc = IntermediateTimeZone::Utc;
assert_eq!(format!("{}", tz_utc), "Z");

// Test positive offsets
let tz_offset_1 = IntermediateTimeZone::FixedOffset(3600); // +01:00
assert_eq!(format!("{}", tz_offset_1), "+01:00");

let tz_offset_2 = IntermediateTimeZone::FixedOffset(19800); // +05:30
assert_eq!(format!("{}", tz_offset_2), "+05:30");

let tz_offset_3 = IntermediateTimeZone::FixedOffset(3600 * 12); // +12:00
assert_eq!(format!("{}", tz_offset_3), "+12:00");

// Test negative offsets
let tz_offset_4 = IntermediateTimeZone::FixedOffset(-3600); // -01:00
assert_eq!(format!("{}", tz_offset_4), "-01:00");

let tz_offset_5 = IntermediateTimeZone::FixedOffset(-12600); // -03:30
assert_eq!(format!("{}", tz_offset_5), "-03:30");

let tz_offset_6 = IntermediateTimeZone::FixedOffset(-3600 * 12); // -12:00
assert_eq!(format!("{}", tz_offset_6), "-12:00");

// Test edge cases
let tz_offset_7 = IntermediateTimeZone::FixedOffset(0); // +00:00
assert_eq!(format!("{}", tz_offset_7), "Z");

let tz_offset_8 = IntermediateTimeZone::FixedOffset(3600 * 14); // +14:00
assert_eq!(format!("{}", tz_offset_8), "+14:00");

let tz_offset_9 = IntermediateTimeZone::FixedOffset(-3600 * 14); // -14:00
assert_eq!(format!("{}", tz_offset_9), "-14:00");
}

#[test]
fn test_parse_with_timezone() {
let ts_with_tz = "2024-06-20 12:34:56+02:00";
let result = parse_intermediate_timestamp(ts_with_tz)
.expect("Failed to parse timestamp with timezone");

assert_eq!(result.unit, IntermediateTimeUnit::Second);

let ts_with_tz = "2024-06-20 12:34:56.123+02:00";
let result = parse_intermediate_timestamp(ts_with_tz)
.expect("Failed to parse timestamp with timezone");

assert_eq!(result.unit, IntermediateTimeUnit::Millisecond);

let ts_with_tz = "2024-06-20 12:34:56.123456+02:00";
let result = parse_intermediate_timestamp(ts_with_tz)
.expect("Failed to parse timestamp with timezone");

assert_eq!(result.unit, IntermediateTimeUnit::Microsecond);

let ts_with_tz = "2024-06-20 12:34:56.123456789+02:00";
let result = parse_intermediate_timestamp(ts_with_tz)
.expect("Failed to parse timestamp with timezone");

assert_eq!(result.unit, IntermediateTimeUnit::Nanosecond);
assert_eq!(result.timezone, IntermediateTimeZone::FixedOffset(7200)); // +02:00 is 7200 seconds
let expected_timestamp: DateTime<FixedOffset> = FixedOffset::east_opt(7200)
.unwrap()
.with_ymd_and_hms(2024, 6, 20, 12, 34, 56)
.unwrap()
.with_nanosecond(123_456_789)
.unwrap();
assert_eq!(
result.timestamp,
expected_timestamp.timestamp_nanos_opt().unwrap()
);
}

#[test]
fn test_parse_without_timezone() {
let ts_without_tz = "2024-06-20 12:34:56";
let result = parse_intermediate_timestamp(ts_without_tz)
.expect("Failed to parse timestamp without timezone");

assert_eq!(result.unit, IntermediateTimeUnit::Second);
assert_eq!(result.timezone, IntermediateTimeZone::Utc);

let ts_without_tz = "2024-06-20 12:34:56.123";
let result = parse_intermediate_timestamp(ts_without_tz)
.expect("Failed to parse timestamp without timezone");

assert_eq!(result.unit, IntermediateTimeUnit::Millisecond);
assert_eq!(result.timezone, IntermediateTimeZone::Utc);

let ts_without_tz = "2024-06-20 12:34:56.123456";
let result = parse_intermediate_timestamp(ts_without_tz)
.expect("Failed to parse timestamp without timezone");

assert_eq!(result.unit, IntermediateTimeUnit::Microsecond);
assert_eq!(result.timezone, IntermediateTimeZone::Utc);

let ts_without_tz = "2024-06-20 12:34:56.123456789";
let result = parse_intermediate_timestamp(ts_without_tz)
.expect("Failed to parse timestamp without timezone");

assert_eq!(result.unit, IntermediateTimeUnit::Nanosecond);
assert_eq!(result.timezone, IntermediateTimeZone::Utc);
let expected_timestamp = Utc
.with_ymd_and_hms(2024, 6, 20, 12, 34, 56)
.unwrap()
.with_nanosecond(123_456_789)
.unwrap();
assert_eq!(
result.timestamp,
expected_timestamp.timestamp_nanos_opt().unwrap()
);
}

#[test]
fn test_parse_invalid_format() {
let invalid_ts = "invalid timestamp";
let result = parse_intermediate_timestamp(invalid_ts);
assert!(result.is_err());
assert_eq!(result.unwrap_err(), "Invalid timestamp format");
}

#[test]
fn test_parse_missing_fractional_seconds() {
let ts_missing_fractional = "2024-06-20 12:34:56+02:00";
let result = parse_intermediate_timestamp(ts_missing_fractional)
.expect("Failed to parse timestamp without fractional seconds");

assert_eq!(result.unit, IntermediateTimeUnit::Second);
assert_eq!(result.timezone, IntermediateTimeZone::FixedOffset(7200));
let expected_timestamp: DateTime<FixedOffset> = FixedOffset::east_opt(7200)
.unwrap()
.with_ymd_and_hms(2024, 6, 20, 12, 34, 56)
.unwrap();
assert_eq!(
result.timestamp,
expected_timestamp.timestamp_nanos_opt().unwrap()
);
}

#[test]
fn test_parse_different_timezones() {
let timezones = [
("2024-06-20 12:34:56.123456789-05:00", -18000), // -05:00 is -18000 seconds
("2024-06-20 12:34:56.123456789+00:00", 0), // +00:00 is 0 seconds
("2024-06-20 12:34:56.123456789+05:30", 19800), // +05:30 is 19800 seconds
("2024-06-20 12:34:56.123456789-08:00", -28800), // -08:00 is -28800 seconds
("2024-06-20 12:34:56.123456789+09:00", 32400), // +09:00 is 32400 seconds
("2024-06-20 12:34:56.123456789-03:30", -12600), // -03:30 is -12600 seconds
("2024-06-20 12:34:56.123456789+12:00", 43200), // +12:00 is 43200 seconds
("2024-06-20 12:34:56.123456789-12:00", -43200), // -12:00 is -43200 seconds
];

for (ts, offset_seconds) in &timezones {
let result = parse_intermediate_timestamp(ts)
.unwrap_or_else(|_| panic!("Failed to parse timestamp with timezone {}", ts));

assert_eq!(result.unit, IntermediateTimeUnit::Nanosecond);
assert_eq!(
result.timezone,
IntermediateTimeZone::from_offset(*offset_seconds)
);
let expected_timestamp: DateTime<FixedOffset> = FixedOffset::east_opt(*offset_seconds)
.unwrap()
.with_ymd_and_hms(2024, 6, 20, 12, 34, 56)
.unwrap()
.with_nanosecond(123_456_789)
.unwrap();
assert_eq!(
result.timestamp,
expected_timestamp.timestamp_nanos_opt().unwrap()
);
}
}
}
1 change: 1 addition & 0 deletions crates/proof-of-sql-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

/// Module for handling an intermediate decimal type received from the lexer.
pub mod intermediate_decimal;
mod intermediate_time;
#[macro_use]
extern crate lalrpop_util;

Expand Down
Loading
Loading