From 668fc7096ea2387be4bb2e87ee2f63cd365777a7 Mon Sep 17 00:00:00 2001 From: Spencer Nelson Date: Mon, 28 Aug 2023 13:22:13 -0700 Subject: [PATCH] Calculate differences between timestamps, and use for equality --- adam_core/time/tests/test_time.py | 103 ++++++++++++++ adam_core/time/time.py | 217 +++++++++++++++++++++++++++--- 2 files changed, 300 insertions(+), 20 deletions(-) diff --git a/adam_core/time/tests/test_time.py b/adam_core/time/tests/test_time.py index 50752990..58082858 100644 --- a/adam_core/time/tests/test_time.py +++ b/adam_core/time/tests/test_time.py @@ -1,6 +1,7 @@ import astropy.time import astropy.units import numpy.testing as npt +import pyarrow.compute as pc import pytest import quivr as qv @@ -334,3 +335,105 @@ def test_add_days(self): have = self.t1.add_days([1, 2, 3]) assert have.days.to_pylist() == [1, 50002, 60003] assert have.nanos.to_pylist() == self.t1.nanos.to_pylist() + + def test_equals_array(self): + t1 = self.t1 + t2 = self.t1 + + assert pc.all(t1.equals_array(t2)).as_py() + + t3 = t2.add_nanos(1) + assert not pc.all(t1.equals_array(t3)).as_py() + assert pc.all(t1.equals_array(t3, precision="us")).as_py() + + t4 = time.Timestamp.from_kwargs( + days=t1.days, + nanos=t1.nanos, + scale="utc", + ) + with pytest.raises(ValueError): + t1.equals_array(t4) + + def test_equals_scalar(self): + t1 = time.Timestamp.from_kwargs( + days=[50000, 60000, 70000], + nanos=[0, 1, 2], + ) + + have = t1.equals_scalar(days=50000, nanos=0) + assert have.to_pylist() == [True, False, False] + + def test_equals_scalar_precision(self): + t1 = time.Timestamp.from_kwargs( + days=[0, 0, 1, 1, 2, 2], + nanos=[ + 500, + 86400_000_000_000 - 500, + 500, + 86400_000_000_000 - 500, + 500, + 86400_000_000_000 - 500, + ], + ) + have = t1.equals_scalar(days=1, nanos=0, precision="us") + assert have.to_pylist() == [False, True, True, False, False, False] + + def test_difference_scalar(self): + # Compute difference from days=1, nanos=100 + cases = [ + { + "in_days": 0, + "in_nanos": 0, + "out_days": -2, + "out_nanos": 86400_000_000_000 - 100, + }, + { + "in_days": 0, + "in_nanos": 50, + "out_days": -2, + "out_nanos": 86400_000_000_000 - 50, + }, + {"in_days": 0, "in_nanos": 200, "out_days": -1, "out_nanos": 100}, + { + "in_days": 1, + "in_nanos": 50, + "out_days": -1, + "out_nanos": 86400_000_000_000 - 50, + }, + {"in_days": 1, "in_nanos": 100, "out_days": 0, "out_nanos": 0}, + {"in_days": 1, "in_nanos": 200, "out_days": 0, "out_nanos": 100}, + { + "in_days": 2, + "in_nanos": 50, + "out_days": 0, + "out_nanos": 86400_000_000_000 - 50, + }, + {"in_days": 2, "in_nanos": 100, "out_days": 1, "out_nanos": 0}, + {"in_days": 2, "in_nanos": 200, "out_days": 1, "out_nanos": 100}, + ] + + for (i, c) in enumerate(cases): + t1 = time.Timestamp.from_kwargs( + days=[c["in_days"]], + nanos=[c["in_nanos"]], + ) + have_days, have_nanos = t1.difference_scalar(days=1, nanos=100) + assert have_days[0].as_py() == c["out_days"], f"case {i}" + assert have_nanos[0].as_py() == c["out_nanos"], f"case {i}" + + def test_difference(self): + t1 = time.Timestamp.from_kwargs( + days=[50000, 60000, 70000], + nanos=[0, 1, 2], + ) + t2 = time.Timestamp.from_kwargs( + days=[50000, 60000, 70000], + nanos=[100, 200, 300], + ) + have_days, have_nanos = t1.difference(t2) + assert have_days.to_pylist() == [-1, -1, -1] + assert have_nanos.to_pylist() == [ + 86400_000_000_000 - 100, + 86400_000_000_000 - 199, + 86400_000_000_000 - 298, + ] diff --git a/adam_core/time/time.py b/adam_core/time/time.py index feb415b8..740656de 100644 --- a/adam_core/time/time.py +++ b/adam_core/time/time.py @@ -64,35 +64,55 @@ def equals(self, other: Timestamp, precision: str = "ns") -> pa.BooleanArray: def equals_scalar( self, days: int, nanos: int, precision: str = "ns" ) -> pa.BooleanArray: - days_equal = pc.equal(self.days, days) + delta_days, delta_nanos = self.difference_scalar(days, nanos) if precision == "ns": - times_equal = pc.equal(self.nanos, nanos) + max_deviation = 0 elif precision == "us": - times_equal = pc.equal(self.micros(), nanos // 1_000) + max_deviation = 999 elif precision == "ms": - times_equal = pc.equal(self.millis(), nanos // 1_000_000) + max_deviation = 999_999 elif precision == "s": - times_equal = pc.equal(self.seconds(), nanos // 1_000_000_000) + max_deviation = 999_999_999 else: raise ValueError(f"Unsupported precision: {precision}") - return pc.and_(days_equal, times_equal) + return _duration_arrays_within_tolerance(delta_days, delta_nanos, max_deviation) def equals_array(self, other: Timestamp, precision: str = "ns") -> pa.BooleanArray: - days_equal = pc.equal(self.days, other.days) + """ + Compare two Timestamps, returning a BooleanArray indicating + whether each element is equal. + + The Timestamps must have the same scale, and the same length. + """ + if self.scale != other.scale: + raise ValueError("Cannot compare timestamps with different scales") + if len(self) != len(other): + raise ValueError("Timestamps must have the same length") + + delta_days, delta_nanos = self.difference(other) if precision == "ns": - times_equal = pc.equal(self.nanos, other.nanos) + max_deviation = 0 elif precision == "us": - times_equal = pc.equal(self.micros(), other.micros()) + max_deviation = 999 elif precision == "ms": - times_equal = pc.equal(self.millis(), other.millis()) + max_deviation = 999_999 elif precision == "s": - times_equal = pc.equal(self.seconds(), other.seconds()) + max_deviation = 999_999_999 else: raise ValueError(f"Unsupported precision: {precision}") - return pc.and_(days_equal, times_equal) + return _duration_arrays_within_tolerance(delta_days, delta_nanos, max_deviation) @classmethod def from_astropy(cls, astropy_time: astropy.time.Time) -> Timestamp: + """Convert an astropy time to a quivr timestamp. + + This is a lossy conversion, since astropy uses floating point + to represent times, while quivr uses integers. + + The astropy time must use a scale supported by quivr. The + supported scales are "tai", "tt", "ut1", "utc", and "tdb". + + """ if astropy_time.scale not in SCALES: raise ValueError(f"Unsupported scale: {astropy_time.scale}") if astropy_time.isscalar: @@ -143,6 +163,9 @@ def _from_astropy_scalar(cls, astropy_time: astropy.time.Time) -> Timestamp: ) def to_astropy(self) -> astropy.time.Time: + """ + Convert the timestamp to an astropy time. + """ fractional_days = self.fractional_days() return astropy.time.Time( val=self.days, @@ -155,16 +178,17 @@ def add_nanos( self, nanos: pa.lib.Int64Array | int, check_range: bool = True ) -> Timestamp: """ - Add nanoseconds to the timestamp. + Add nanoseconds to the timestamp. Negative nanoseconds are + allowed. - Args: - nanos: The nanoseconds to add. Can be a scalar or an array of + Parameters + ---------- + nanos : The nanoseconds to add. Can be a scalar or an array of the same length as the timestamp. Must be in the range [-86400e9, 86400e9). - check_range: If True, check that the nanoseconds are in the + check_range : If True, check that the nanoseconds are in the range [-86400e9, 86400e9). If False, the caller is responsible for ensuring that the nanoseconds are in the correct range. - """ if check_range: if isinstance(nanos, int): @@ -202,19 +226,172 @@ def add_nanos( def add_seconds(self, seconds: pa.lib.Int64Array | int) -> Timestamp: """ - Add seconds to the timestamp. + Add seconds to the timestamp. Negative seconds are supported. - Args: - seconds: The seconds to add. Can be a scalar or an array of + Parameters + ---------- + seconds : The seconds to add. Can be a scalar or an array of the same length as the timestamp. Must be in the range [-86400, 86400). + + See Also + -------- + add_nanos : Add nanoseconds to the timestamp. This method includes + a 'check_range' parameter that allows the caller to disable range + checking for performance reasons. """ return self.add_nanos(pc.multiply(seconds, 1_000_000_000)) def add_millis(self, millis: pa.lib.Int64Array | int) -> Timestamp: + """ + Add milliseconds to the timestamp. Negative milliseconds are + supported. + + Parameters + ---------- + millis : The milliseconds to add. Can be a scalar or an array of + the same length as the timestamp. Must be in the range [-86400e3, 86400e3). + + See Also + -------- + add_nanos : Add nanoseconds to the timestamp. This method includes + a 'check_range' parameter that allows the caller to disable range + checking for performance reasons. + """ return self.add_nanos(pc.multiply(millis, 1_000_000)) def add_micros(self, micros: pa.lib.Int64Array | int) -> Timestamp: + """ + Add microseconds to the timestamp. Negative microseconds are + supported. + + Parameters + ---------- + micros : The microseconds to add. Can be a scalar or an array of + the same length as the timestamp. Must be in the range [-86400e6, 86400e6). + + See Also + -------- + add_nanos : Add nanoseconds to the timestamp. This method includes + a 'check_range' parameter that allows the caller to disable range + checking for performance reasons. + """ return self.add_nanos(pc.multiply(micros, 1_000)) def add_days(self, days: pa.lib.Int64Array | int) -> Timestamp: + """Add days to the timestamp. + + Parameters + ---------- + days : The days to add. Can be a scalar or an array of the + same length as the timestamp. Use negative values to + subtract days. + + """ return self.set_column("days", pc.add(self.days, days)) + + def difference_scalar( + self, days: int, nanos: int + ) -> tuple[pa.Int64Array, pa.Int64Array]: + """ + Compute the difference between this timestamp and a scalar + timestamp. + + The difference is computed as (self - scalar). The result is + presented as a tuple of (days, nanos). The nanos value is + always non-negative, in the range [0, 86400e9). + + Parameters + ---------- + days : The days of the scalar timestamp. + nanos : The nanoseconds of the scalar timestamp. + + Returns + ------- + days : The difference in days. This value can be negative. + nanos : The difference in nanoseconds. This value is always + non-negative, in the range [0, 86400e9). + + Examples + -------- + >>> from pyarrow import Timestamp + >>> ts = Timestamp.from_kwargs(days=[0, 1, 2], nanos=[200, 0, 100]) + >>> have_days, have_nanos = ts.difference_scalar(1, 100) + >>> have_days.to_numpy() + array([-2, -1, 1]) + >>> have_nanos.to_numpy() + array([100, 86399999999900, 0]) + + """ + days1 = pc.subtract(self.days, days) + nanos1 = pc.subtract(self.nanos, nanos) + overflows = pc.greater_equal(nanos1, 86400 * 1e9) + underflows = pc.less(nanos1, 0) + mask = pa.StructArray.from_arrays( + [overflows, underflows], names=["overflows", "underflows"] + ) + nanos2 = pc.case_when( + mask, + pc.subtract(nanos1, int(86400 * 1e9)), + pc.add(nanos1, int(86400 * 1e9)), + nanos1, + ) + days2 = pc.case_when( + mask, + pc.add(days1, 1), + pc.subtract(days1, 1), + days1, + ) + return days2, nanos2 + + def difference(self, other: Timestamp) -> tuple[pa.Int64Array, pa.Int64Array]: + """ + Compute the element-wise difference between this timestamp and another. + """ + if self.scale != other.scale: + raise ValueError( + "Cannot compute difference between timestamps with different scales" + ) + days1 = pc.subtract(self.days, other.days) + nanos1 = pc.subtract(self.nanos, other.nanos) + + overflows = pc.greater_equal(nanos1, 86400 * 1e9) + underflows = pc.less(nanos1, 0) + mask = pa.StructArray.from_arrays( + [overflows, underflows], names=["overflows", "underflows"] + ) + nanos2 = pc.case_when( + mask, + pc.subtract(nanos1, int(86400 * 1e9)), + pc.add(nanos1, int(86400 * 1e9)), + nanos1, + ) + days2 = pc.case_when( + mask, + pc.add(days1, 1), + pc.subtract(days1, 1), + days1, + ) + return days2, nanos2 + + +def _duration_arrays_within_tolerance( + delta_days: pa.Int64Array, delta_nanos: pa.Int64Array, max_nanos_deviation: int +) -> pa.BooleanArray: + """Return a boolean array indicating whether the delta_days and delta_nanos + arrays are within the specified tolerance. + + The max_nanos_deviation should be the maximum number of + nanoseconds that the the two arrays can deviate to still be + considered 'within tolerance'. + """ + if max_nanos_deviation == 0: + return pc.and_(pc.equal(delta_days, 0), pc.equal(delta_nanos, 0)) + + cond1 = pc.and_( + pc.equal(delta_days, 0), pc.less(pc.abs(delta_nanos), max_nanos_deviation) + ) + cond2 = pc.and_( + pc.equal(delta_days, -1), + pc.greater_equal(pc.abs(delta_nanos), 86400 * 1e9 - max_nanos_deviation), + ) + return pc.or_(cond1, cond2)