From f1d5bc35bf5d8cc6d5ca01fb2aace29980edfee2 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 28 Jul 2025 16:12:46 -0700 Subject: [PATCH 01/14] API: offsets.Day is always calendar-day --- doc/source/whatsnew/v3.0.0.rst | 38 ++++++++ pandas/_libs/tslibs/__init__.py | 2 + pandas/_libs/tslibs/offsets.pyi | 2 +- pandas/_libs/tslibs/offsets.pyx | 92 ++++++++++++++++--- pandas/_libs/tslibs/period.pyx | 7 +- pandas/_libs/tslibs/timedeltas.pyx | 5 + pandas/core/arrays/_ranges.py | 10 +- pandas/core/arrays/datetimelike.py | 34 ++++++- pandas/core/arrays/datetimes.py | 7 +- pandas/core/arrays/period.py | 30 ++++-- pandas/core/arrays/timedeltas.py | 20 ++-- pandas/core/indexes/period.py | 3 +- pandas/core/resample.py | 26 +++--- pandas/tests/arithmetic/test_datetime64.py | 4 + pandas/tests/arithmetic/test_numeric.py | 12 ++- pandas/tests/arithmetic/test_timedelta64.py | 27 +++++- .../datetimes/methods/test_tz_convert.py | 7 ++ .../indexes/datetimes/test_arithmetic.py | 25 +++-- pandas/tests/resample/test_datetime_index.py | 12 ++- pandas/tests/resample/test_resample_api.py | 2 + .../scalar/timedelta/test_constructors.py | 8 +- pandas/tests/tseries/offsets/test_dst.py | 12 ++- pandas/tests/tseries/offsets/test_offsets.py | 7 +- pandas/tests/tseries/offsets/test_ticks.py | 2 +- pandas/tests/tslibs/test_api.py | 1 + pandas/tests/tslibs/test_to_offset.py | 2 +- 26 files changed, 331 insertions(+), 66 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index d1d1b5dc1afef..0ad40e2eb10f7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -297,6 +297,44 @@ This change also applies to :meth:`.DataFrameGroupBy.value_counts`. Here, there df.groupby("a", sort=True).value_counts(sort=False) +.. _whatsnew_300.api_breaking.offsets_day_not_a_tick: + +Changed behavior of ``pd.offsets.Day`` to always represent calendar-day +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions of pandas, :class:`offsets.Day` represented a fixed span +of 24 hours, disregarding Daylight Savings Time transitions. It now consistently +behaves as a calendar-day, preserving time-of-day across DST transitions: + +*Old behavior* + +.. code-block:: ipython + + In [5]: ts = pd.Timestamp("2025-03-08 08:00", tz="US/Eastern") + In [6]: ts + pd.offsets.Day(1) + Out[3]: Timestamp('2025-03-09 09:00:00-0400', tz='US/Eastern') + +*New behavior* + +.. ipython:: python + + ts = pd.Timestamp("2025-03-08 08:00", tz="US/Eastern") + ts + pd.offsets.Day(1) + +This change fixes a long-standing bug in ``pd.date_range`` (:issue:`51716`, :issue:`35388`), but causes several +small behavior differences as collateral: + +- ``pd.offsets.Day(n)`` no longer compares as equal to ``pd.offsets.Hour(24*n)`` +- :class:`offsets.Day` no longer supports division +- :class:`Timedelta` no longer accepts :class:`Day` objects as inputs +- :meth:`tseries.frequencies.to_offset` on a :class:`Timedelta` object returns +a :class:`offsets.Hour` object in cases where it used to return a :class:`Day` +object. +- Adding or subtracting a scalar from a timezone-aware :class:`DatetimeIndex` +with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute. +- Adding or subtracing a :class:`Day` with a :class:`Timedelta` is no longer supported. +- Adding or subtracting a :class:`Day` offset to a :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise. + .. _whatsnew_300.api_breaking.deps: Increased minimum version for Python diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py index f433a3acf356f..4c6bbb87baa2c 100644 --- a/pandas/_libs/tslibs/__init__.py +++ b/pandas/_libs/tslibs/__init__.py @@ -1,5 +1,6 @@ __all__ = [ "BaseOffset", + "Day", "IncompatibleFrequency", "NaT", "NaTType", @@ -61,6 +62,7 @@ ) from pandas._libs.tslibs.offsets import ( BaseOffset, + Day, Tick, to_offset, ) diff --git a/pandas/_libs/tslibs/offsets.pyi b/pandas/_libs/tslibs/offsets.pyi index a71aa42b4f671..7dab2b469c542 100644 --- a/pandas/_libs/tslibs/offsets.pyi +++ b/pandas/_libs/tslibs/offsets.pyi @@ -116,7 +116,7 @@ class Tick(SingleConstructorOffset): def delta_to_tick(delta: timedelta) -> Tick: ... -class Day(Tick): ... +class Day(BaseOffset): ... class Hour(Tick): ... class Minute(Tick): ... class Second(Tick): ... diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 87214c3758d5c..b1bb5b1d27eb7 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1023,8 +1023,6 @@ cdef class Tick(SingleConstructorOffset): # Note: Without making this cpdef, we get AttributeError when calling # from __mul__ cpdef Tick _next_higher_resolution(Tick self): - if type(self) is Day: - return Hour(self.n * 24) if type(self) is Hour: return Minute(self.n * 60) if type(self) is Minute: @@ -1173,7 +1171,7 @@ cdef class Tick(SingleConstructorOffset): self.normalize = False -cdef class Day(Tick): +cdef class Day(SingleConstructorOffset): """ Offset ``n`` days. @@ -1203,11 +1201,73 @@ cdef class Day(Tick): >>> ts + Day(-4) Timestamp('2022-12-05 15:00:00') """ + _adjust_dst = True + _attributes = tuple(["n", "normalize"]) _nanos_inc = 24 * 3600 * 1_000_000_000 _prefix = "D" _period_dtype_code = PeriodDtypeCode.D _creso = NPY_DATETIMEUNIT.NPY_FR_D + def __init__(self, n=1, normalize=False): + BaseOffset.__init__(self, n) + if normalize: + # GH#21427 + raise ValueError( + "Day offset with `normalize=True` are not allowed." + ) + + def is_on_offset(self, dt) -> bool: + return True + + @apply_wraps + def _apply(self, other): + if isinstance(other, Day): + # TODO: why isn't this handled in __add__? + return Day(self.n + other.n) + return other + np.timedelta64(self.n, "D") + + def _apply_array(self, dtarr): + return dtarr + np.timedelta64(self.n, "D") + + @cache_readonly + def freqstr(self) -> str: + """ + Return a string representing the frequency. + + Examples + -------- + >>> pd.Day(5).freqstr + '5D' + + >>> pd.offsets.Day(1).freqstr + 'D' + """ + if self.n != 1: + return str(self.n) + "D" + return "D" + + # Having this here isn't strictly-correct post-GH#??? + # but this gets called in timedelta.get_unit_for_round in cases where + # Day unambiguously means 24h. + @property + def nanos(self) -> int64_t: + """ + Returns an integer of the total number of nanoseconds. + + See Also + -------- + tseries.offsets.Hour.nanos : + Returns an integer of the total number of nanoseconds. + tseries.offsets.Day.nanos : + Returns an integer of the total number of nanoseconds. + + Examples + -------- + >>> pd.offsets.Hour(5).nanos + 18000000000000 + """ + return self.n * self._nanos_inc + cdef class Hour(Tick): """ @@ -1431,16 +1491,13 @@ cdef class Nano(Tick): def delta_to_tick(delta: timedelta) -> Tick: if delta.microseconds == 0 and getattr(delta, "nanoseconds", 0) == 0: # nanoseconds only for pd.Timedelta - if delta.seconds == 0: - return Day(delta.days) + seconds = delta.days * 86400 + delta.seconds + if seconds % 3600 == 0: + return Hour(seconds / 3600) + elif seconds % 60 == 0: + return Minute(seconds / 60) else: - seconds = delta.days * 86400 + delta.seconds - if seconds % 3600 == 0: - return Hour(seconds / 3600) - elif seconds % 60 == 0: - return Minute(seconds / 60) - else: - return Second(seconds) + return Second(seconds) else: nanos = delta_to_nanoseconds(delta) if nanos % 1_000_000 == 0: @@ -5332,6 +5389,17 @@ cpdef to_offset(freq, bint is_period=False): raise ValueError(INVALID_FREQ_ERR_MSG.format( f"{freq}, failed to parse with error message: {repr(err)}") ) from err + + # TODO(3.0?) once deprecation of "d" is enforced, the check for it here + # can be removed + if ( + isinstance(result, Hour) + and result.n % 24 == 0 + and ("d" in freq or "D" in freq) + ): + # Since Day is no longer a Tick, delta_to_tick returns Hour above, + # so we convert back here. + result = Day(result.n // 24) else: result = None diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index df5c17745b8a4..22c1b98bc54bb 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -113,6 +113,7 @@ from pandas._libs.tslibs.offsets cimport ( from pandas._libs.tslibs.offsets import ( INVALID_FREQ_ERR_MSG, BDay, + Day, ) from pandas.util._decorators import set_module @@ -1825,6 +1826,10 @@ cdef class _Period(PeriodMixin): # i.e. np.timedelta64("nat") return NaT + if isinstance(other, Day): + # Periods are timezone-naive, so we treat Day as Tick-like + other = np.timedelta64(other.n, "D") + try: inc = delta_to_nanoseconds(other, reso=self._dtype._creso, round_ok=False) except ValueError as err: @@ -1846,7 +1851,7 @@ cdef class _Period(PeriodMixin): @cython.overflowcheck(True) def __add__(self, other): - if is_any_td_scalar(other): + if is_any_td_scalar(other) or isinstance(other, Day): return self._add_timedeltalike_scalar(other) elif is_offset_object(other): return self._add_offset(other) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 6c76e05471577..5310165290d65 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -78,6 +78,7 @@ from pandas._libs.tslibs.np_datetime import ( ) from pandas._libs.tslibs.offsets cimport is_tick_object +from pandas._libs.tslibs.offsets import Day from pandas._libs.tslibs.util cimport ( is_array, is_float_object, @@ -2577,4 +2578,8 @@ cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1: freq = to_offset(freq) freq.nanos # raises on non-fixed freq + if isinstance(freq, Day): + # In the "round" context, Day unambiguously means 24h, not calendar-day + freq = Timedelta(days=freq.n) + return delta_to_nanoseconds(freq, creso) diff --git a/pandas/core/arrays/_ranges.py b/pandas/core/arrays/_ranges.py index 88f5ac4ebdea4..9d491220c9899 100644 --- a/pandas/core/arrays/_ranges.py +++ b/pandas/core/arrays/_ranges.py @@ -12,6 +12,7 @@ from pandas._libs.lib import i8max from pandas._libs.tslibs import ( BaseOffset, + Day, OutOfBoundsDatetime, Timedelta, Timestamp, @@ -55,8 +56,13 @@ def generate_regular_range( """ istart = start._value if start is not None else None iend = end._value if end is not None else None - freq.nanos # raises if non-fixed frequency - td = Timedelta(freq) + if isinstance(freq, Day): + # In contexts without a timezone, a Day offset is unambiguously + # interpretable as Timedelta-like. + td = Timedelta(days=freq.n) + else: + freq.nanos # raises if non-fixed frequency + td = Timedelta(freq) b: int e: int try: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index d5e654c95577e..bb2b5d99b0a01 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -29,6 +29,7 @@ ) from pandas._libs.tslibs import ( BaseOffset, + Day, IncompatibleFrequency, NaT, NaTType, @@ -44,6 +45,7 @@ ints_to_pydatetime, ints_to_pytimedelta, periods_per_day, + timezones, to_offset, ) from pandas._libs.tslibs.fields import ( @@ -1068,6 +1070,26 @@ def _get_arithmetic_result_freq(self, other) -> BaseOffset | None: elif isinstance(self.freq, Tick): # In these cases return self.freq + elif self.dtype.kind == "m" and isinstance(other, Timedelta): + return self.freq + elif ( + self.dtype.kind == "m" + and isinstance(other, Timestamp) + and (other.tz is None or timezones.is_utc(other.tz)) + ): + # e.g. test_td64arr_add_sub_datetimelike_scalar tdarr + timestamp + # gives a DatetimeArray. As long as the timestamp has no timezone + # or UTC, the result can retain a Day freq. + return self.freq + elif ( + lib.is_np_dtype(self.dtype, "M") + and isinstance(self.freq, Day) + and isinstance(other, Timedelta) + ): + # e.g. TestTimedelta64ArithmeticUnsorted::test_timedelta + # Day is unambiguously 24h + return self.freq + return None @final @@ -1358,6 +1380,10 @@ def __add__(self, other): result: np.ndarray | DatetimeLikeArrayMixin = self._add_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_timedeltalike_scalar(other) + elif isinstance(other, Day) and lib.is_np_dtype(self.dtype, "Mm"): + # We treat this as Tick-like + td = Timedelta(days=other.n).as_unit("s") + result = self._add_timedeltalike_scalar(td) elif isinstance(other, BaseOffset): # specifically _not_ a Tick result = self._add_offset(other) @@ -1418,6 +1444,10 @@ def __sub__(self, other): result: np.ndarray | DatetimeLikeArrayMixin = self._sub_nat() elif isinstance(other, (Tick, timedelta, np.timedelta64)): result = self._add_timedeltalike_scalar(-other) + elif isinstance(other, Day) and lib.is_np_dtype(self.dtype, "Mm"): + # We treat this as Tick-like + td = Timedelta(days=other.n).as_unit("s") + result = self._add_timedeltalike_scalar(-td) elif isinstance(other, BaseOffset): # specifically _not_ a Tick result = self._add_offset(-other) @@ -1982,7 +2012,7 @@ def freq(self, value) -> None: if value is not None: value = to_offset(value) self._validate_frequency(self, value) - if self.dtype.kind == "m" and not isinstance(value, Tick): + if self.dtype.kind == "m" and not isinstance(value, (Tick, Day)): raise TypeError("TimedeltaArray/Index freq must be a Tick") if self.ndim > 1: @@ -2279,7 +2309,7 @@ def _with_freq(self, freq) -> Self: pass elif len(self) == 0 and isinstance(freq, BaseOffset): # Always valid. In the TimedeltaArray case, we require a Tick offset - if self.dtype.kind == "m" and not isinstance(freq, Tick): + if self.dtype.kind == "m" and not isinstance(freq, (Tick, Day)): raise TypeError("TimedeltaArray/Index freq must be a Tick") else: # As an internal method, we can ensure this assertion always holds diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 57c138d9828bd..3866470809d7b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -474,7 +474,7 @@ def _generate_range( if end is not None: end = end.tz_localize(None) - if isinstance(freq, Tick): + if isinstance(freq, (Tick, Day)): i8values = generate_regular_range(start, end, periods, freq, unit=unit) else: xdr = _generate_range( @@ -928,7 +928,10 @@ def tz_convert(self, tz) -> Self: # No conversion since timestamps are all UTC to begin with dtype = tz_to_dtype(tz, unit=self.unit) - return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq) + new_freq = None + if isinstance(self.freq, Tick): + new_freq = self.freq + return self._simple_new(self._ndarray, dtype=dtype, freq=new_freq) @dtl.ravel_compat def tz_localize( diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index ae92e17332c76..a6b8dab00b1a9 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -21,6 +21,7 @@ from pandas._libs.arrays import NDArrayBacked from pandas._libs.tslibs import ( BaseOffset, + Day, NaT, NaTType, Timedelta, @@ -838,7 +839,16 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: # TODO: other cases? return dta else: - return dta._with_freq("infer") + dta = dta._with_freq("infer") + if freq is not None: + freq = to_offset(freq) + if ( + isinstance(dta.freq, Day) + and not isinstance(freq, Day) + and Timedelta(freq) == Timedelta(days=dta.freq.n) + ): + dta._freq = freq + return dta # -------------------------------------------------------------------- @@ -1014,6 +1024,9 @@ def _addsub_int_array_or_scalar( def _add_offset(self, other: BaseOffset): assert not isinstance(other, Tick) + if isinstance(other, Day): + return self + np.timedelta64(other.n, "D") + self._require_matching_freq(other, base=True) return self._addsub_int_array_or_scalar(other.n, operator.add) @@ -1028,7 +1041,7 @@ def _add_timedeltalike_scalar(self, other): ------- PeriodArray """ - if not isinstance(self.freq, Tick): + if not isinstance(self.freq, (Tick, Day)): # We cannot add timedelta-like to non-tick PeriodArray raise raise_on_incompatible(self, other) @@ -1036,7 +1049,10 @@ def _add_timedeltalike_scalar(self, other): # i.e. np.timedelta64("NaT") return super()._add_timedeltalike_scalar(other) - td = np.asarray(Timedelta(other).asm8) + if isinstance(other, Day): + td = np.asarray(Timedelta(days=other.n).asm8) + else: + td = np.asarray(Timedelta(other).asm8) return self._add_timedelta_arraylike(td) def _add_timedelta_arraylike( @@ -1457,8 +1473,10 @@ def _make_field_arrays(*fields) -> list[np.ndarray]: # "Union[Union[int, integer[Any]], Union[bool, bool_], ndarray, Sequence[Union[int, # integer[Any]]], Sequence[Union[bool, bool_]], Sequence[Sequence[Any]]]" return [ - np.asarray(x) - if isinstance(x, (np.ndarray, list, ABCSeries)) - else np.repeat(x, length) # type: ignore[arg-type] + ( + np.asarray(x) + if isinstance(x, (np.ndarray, list, ABCSeries)) + else np.repeat(x, length) + ) # type: ignore[arg-type] for x in fields ] diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 9012b9f36348a..d90fe812f0e7c 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -14,6 +14,7 @@ tslibs, ) from pandas._libs.tslibs import ( + Day, NaT, NaTType, Tick, @@ -23,6 +24,7 @@ iNaT, is_supported_dtype, periods_per_second, + to_offset, ) from pandas._libs.tslibs.conversion import cast_from_unit_vectorized from pandas._libs.tslibs.fields import ( @@ -216,7 +218,7 @@ def _validate_dtype(cls, values, dtype): def _simple_new( # type: ignore[override] cls, values: npt.NDArray[np.timedelta64], - freq: Tick | None = None, + freq: Tick | Day | None = None, dtype: np.dtype[np.timedelta64] = TD64NS_DTYPE, ) -> Self: # Require td64 dtype, not unit-less, matching values.dtype @@ -224,7 +226,7 @@ def _simple_new( # type: ignore[override] assert not tslibs.is_unitless(dtype) assert isinstance(values, np.ndarray), type(values) assert dtype == values.dtype - assert freq is None or isinstance(freq, Tick) + assert freq is None or isinstance(freq, (Tick, Day)) result = super()._simple_new(values=values, dtype=dtype) result._freq = freq @@ -462,7 +464,7 @@ def _format_native_types( # Arithmetic Methods def _add_offset(self, other): - assert not isinstance(other, Tick) + assert not isinstance(other, (Tick, Day)) raise TypeError( f"cannot add the type {type(other).__name__} to a {type(self).__name__}" ) @@ -544,7 +546,13 @@ def _scalar_divlike_op(self, other, op): if self.freq is not None: # Note: freq gets division, not floor-division, even if op # is floordiv. - freq = self.freq / other + if isinstance(self.freq, Day): + if self.freq.n % other == 0: + freq = Day(self.freq.n // other) + else: + freq = to_offset(Timedelta(days=self.freq.n)) / other + else: + freq = self.freq / other if freq.nanos == 0 and self.freq.nanos != 0: # e.g. if self.freq is Nano(1) then dividing by 2 # rounds down to zero @@ -1053,7 +1061,7 @@ def sequence_to_td64ns( copy: bool = False, unit=None, errors: DateTimeErrorChoices = "raise", -) -> tuple[np.ndarray, Tick | None]: +) -> tuple[np.ndarray, Tick | Day | None]: """ Parameters ---------- @@ -1071,7 +1079,7 @@ def sequence_to_td64ns( ------- converted : numpy.ndarray The sequence converted to a numpy array with dtype ``timedelta64[ns]``. - inferred_freq : Tick or None + inferred_freq : Tick, Day, or None The inferred frequency of the sequence. Raises diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 0a7a0319bed3a..d9f5d2e59eb4f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -11,6 +11,7 @@ from pandas._libs import index as libindex from pandas._libs.tslibs import ( BaseOffset, + Day, NaT, Period, Resolution, @@ -367,7 +368,7 @@ def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]: of self.freq. Note IncompatibleFrequency subclasses ValueError. """ if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)): - if isinstance(self.freq, Tick): + if isinstance(self.freq, (Tick, Day)): # _check_timedeltalike_freq_compat will raise if incompatible delta = self._data._check_timedeltalike_freq_compat(other) return delta diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 08e3beef99e60..128426131e453 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2159,8 +2159,10 @@ def __init__( fill_method=None, limit: int | None = None, convention: Literal["start", "end", "e", "s"] | None = None, - origin: Literal["epoch", "start", "start_day", "end", "end_day"] - | TimestampConvertibleTypes = "start_day", + origin: ( + Literal["epoch", "start", "start_day", "end", "end_day"] + | TimestampConvertibleTypes + ) = "start_day", offset: TimedeltaConvertibleTypes | None = None, group_keys: bool = False, **kwargs, @@ -2174,13 +2176,10 @@ def __init__( if convention not in {None, "start", "end", "e", "s"}: raise ValueError(f"Unsupported value {convention} for `convention`") - if ( - (key is None and obj is not None and isinstance(obj.index, PeriodIndex)) # type: ignore[attr-defined] - or ( - key is not None - and obj is not None - and getattr(obj[key], "dtype", None) == "period" # type: ignore[index] - ) + if (key is None and obj is not None and isinstance(obj.index, PeriodIndex)) or ( # type: ignore[attr-defined] + key is not None + and obj is not None + and getattr(obj[key], "dtype", None) == "period" # type: ignore[index] ): freq = to_offset(freq, is_period=True) else: @@ -2421,7 +2420,7 @@ def _get_time_delta_bins(self, ax: TimedeltaIndex): f"an instance of {type(ax).__name__}" ) - if not isinstance(self.freq, Tick): + if not isinstance(self.freq, (Tick, Day)): # GH#51896 raise ValueError( "Resampling on a TimedeltaIndex requires fixed-duration `freq`, " @@ -2632,7 +2631,7 @@ def _get_timestamp_range_edges( ------- A tuple of length 2, containing the adjusted pd.Timestamp objects. """ - if isinstance(freq, Tick): + if isinstance(freq, (Tick, Day)): index_tz = first.tz if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): raise ValueError("The origin must have the same timezone as the index.") @@ -2763,7 +2762,10 @@ def _adjust_dates_anchored( if offset is not None: offset = offset.as_unit(unit) - freq_value = Timedelta(freq).as_unit(unit)._value + if isinstance(freq, Day): + freq_value = Timedelta(days=freq.n).as_unit(unit)._value + else: + freq_value = Timedelta(freq).as_unit(unit)._value origin_timestamp = 0 # origin == "epoch" if origin == "start_day": diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 9251841bdb82f..52943f4e10148 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -835,6 +835,8 @@ def test_dt64arr_add_timedeltalike_scalar( rng = date_range("2000-01-01", "2000-02-01", tz=tz) expected = date_range("2000-01-01 02:00", "2000-02-01 02:00", tz=tz) + if tz is not None: + expected = expected._with_freq(None) rng = tm.box_expected(rng, box_with_array) expected = tm.box_expected(expected, box_with_array) @@ -855,6 +857,8 @@ def test_dt64arr_sub_timedeltalike_scalar( rng = date_range("2000-01-01", "2000-02-01", tz=tz) expected = date_range("1999-12-31 22:00", "2000-01-31 22:00", tz=tz) + if tz is not None: + expected = expected._with_freq(None) rng = tm.box_expected(rng, box_with_array) expected = tm.box_expected(expected, box_with_array) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index d205569270705..9fc148f492e48 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -290,10 +290,16 @@ def test_numeric_arr_rdiv_tdscalar(self, three_days, numeric_idx, box_with_array index = tm.box_expected(index, box) expected = tm.box_expected(expected, box) - result = three_days / index - tm.assert_equal(result, expected) + if isinstance(three_days, pd.offsets.Day): + # GH#41943 Day is no longer timedelta-like + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + three_days / index + else: + result = three_days / index + tm.assert_equal(result, expected) + msg = "cannot use operands with types dtype" - msg = "cannot use operands with types dtype" with pytest.raises(TypeError, match=msg): index / three_days diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 642420713aeba..a05dbfc3e57d1 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -8,6 +8,7 @@ import numpy as np import pytest +from pandas._libs.tslibs import timezones from pandas.compat import WASM from pandas.errors import OutOfBoundsDatetime @@ -1001,13 +1002,17 @@ def test_td64arr_add_sub_datetimelike_scalar( ts = dt_scalar.to_pydatetime() elif cls is np.datetime64: if tz_naive_fixture is not None: - pytest.skip(f"{cls} doesn support {tz_naive_fixture}") + pytest.skip(f"{cls} doesn't support {tz_naive_fixture}") ts = dt_scalar.to_datetime64() else: ts = dt_scalar tdi = timedelta_range("1 day", periods=3) expected = pd.date_range("2012-01-02", periods=3, tz=tz) + if tz is not None and not timezones.is_utc(expected.tz): + # Day is no longer preserved by timedelta add/sub in pandas3 because + # it represents Calendar-Day instead of 24h + expected = expected._with_freq(None) tdarr = tm.box_expected(tdi, box_with_array) expected = tm.box_expected(expected, box_with_array) @@ -1016,6 +1021,10 @@ def test_td64arr_add_sub_datetimelike_scalar( tm.assert_equal(tdarr + ts, expected) expected2 = pd.date_range("2011-12-31", periods=3, freq="-1D", tz=tz) + if tz is not None and not timezones.is_utc(expected2.tz): + # Day is no longer preserved by timedelta add/sub in pandas3 because + # it represents Calendar-Day instead of 24h + expected2 = expected2._with_freq(None) expected2 = tm.box_expected(expected2, box_with_array) tm.assert_equal(ts - tdarr, expected2) @@ -1828,6 +1837,16 @@ def test_td64arr_mod_tdscalar( expected = TimedeltaIndex(["1 Day", "2 Days", "0 Days"] * 3) expected = tm.box_expected(expected, box_with_array) + if isinstance(three_days, offsets.Day): + msg = "unsupported operand type" + with pytest.raises(TypeError, match=msg): + tdarr % three_days + with pytest.raises(TypeError, match=msg): + divmod(tdarr, three_days) + with pytest.raises(TypeError, match=msg): + tdarr // three_days + return + result = tdarr % three_days tm.assert_equal(result, expected) @@ -1871,6 +1890,12 @@ def test_td64arr_rmod_tdscalar(self, box_with_array, three_days): expected = TimedeltaIndex(expected) expected = tm.box_expected(expected, box_with_array) + if isinstance(three_days, offsets.Day): + msg = "Cannot divide Day by TimedeltaArray" + with pytest.raises(TypeError, match=msg): + three_days % tdarr + return + result = three_days % tdarr tm.assert_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/methods/test_tz_convert.py b/pandas/tests/indexes/datetimes/methods/test_tz_convert.py index 9eabb742b93a4..20dc35f7f8d49 100644 --- a/pandas/tests/indexes/datetimes/methods/test_tz_convert.py +++ b/pandas/tests/indexes/datetimes/methods/test_tz_convert.py @@ -283,3 +283,10 @@ def test_tz_convert_unsorted(self, tzstr): result = dr[::-1].hour exp = dr.hour[::-1] tm.assert_almost_equal(result, exp) + + def test_dti_tz_convert_day_freq_not_preserved(self): + # GH#51716 + dti = date_range("2020-3-28", periods=5, freq="D", tz="Europe/London") + result = dti.tz_convert("UTC") + assert (result == dti).all() + assert result.freq is None diff --git a/pandas/tests/indexes/datetimes/test_arithmetic.py b/pandas/tests/indexes/datetimes/test_arithmetic.py index bac849301d1f7..ba9f82f1064ca 100644 --- a/pandas/tests/indexes/datetimes/test_arithmetic.py +++ b/pandas/tests/indexes/datetimes/test_arithmetic.py @@ -1,13 +1,13 @@ # Arithmetic tests specific to DatetimeIndex are generally about `freq` # retention or inference. Other arithmetic tests belong in # tests/arithmetic/test_datetime64.py -import pytest from pandas import ( Timedelta, TimedeltaIndex, Timestamp, date_range, + offsets, timedelta_range, ) import pandas._testing as tm @@ -16,28 +16,30 @@ class TestDatetimeIndexArithmetic: def test_add_timedelta_preserves_freq(self): # GH#37295 should hold for any DTI with freq=None or Tick freq + # GH#41943 as of pandas3 Tick does not include Day tz = "Canada/Eastern" dti = date_range( start=Timestamp("2019-03-26 00:00:00-0400", tz=tz), end=Timestamp("2020-10-17 00:00:00-0400", tz=tz), - freq="D", + freq="12h", ) result = dti + Timedelta(days=1) assert result.freq == dti.freq def test_sub_datetime_preserves_freq(self, tz_naive_fixture): # GH#48818 - dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture) + # In pandas3 "D" preserves time-of-day across DST transitions, so + # is not preserved by subtraction. Ticks offsets like "24h" + # are still preserved + dti = date_range( + "2016-01-01", periods=12, tz=tz_naive_fixture, freq=offsets.Hour(24) + ) res = dti - dti[0] - expected = timedelta_range("0 Days", "11 Days") + expected = timedelta_range("0 Days", "11 Days", freq=offsets.Hour(24)) tm.assert_index_equal(res, expected) assert res.freq == expected.freq - @pytest.mark.xfail( - reason="The inherited freq is incorrect bc dti.freq is incorrect " - "https://github.com/pandas-dev/pandas/pull/48818/files#r982793461" - ) def test_sub_datetime_preserves_freq_across_dst(self): # GH#48818 ts = Timestamp("2016-03-11", tz="US/Pacific") @@ -54,3 +56,10 @@ def test_sub_datetime_preserves_freq_across_dst(self): ) tm.assert_index_equal(res, expected) assert res.freq == expected.freq + + def test_add_dti_day(self): + # GH#35388 + dti = date_range("2020-03-28", periods=4, freq="D", tz="Europe/Berlin") + result = (dti + dti.freq)[:-1] + expected = dti[1:] + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index f871c0bf0218c..f6719202db4d4 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -6,6 +6,7 @@ import pytest from pandas._libs import lib +from pandas._libs.tslibs import Day from pandas._typing import DatetimeNaTType from pandas.compat import is_platform_windows import pandas.util._test_decorators as td @@ -33,6 +34,7 @@ ) from pandas.tseries import offsets +from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import Minute @@ -884,7 +886,8 @@ def test_resample_origin_epoch_with_tz_day_vs_24h(unit): result_1 = ts_1.resample("D", origin="epoch").mean() result_2 = ts_1.resample("24h", origin="epoch").mean() - tm.assert_series_equal(result_1, result_2) + tm.assert_series_equal(result_1, result_2, check_freq=False) + # GH#41943 check_freq=False bc Day and Hour(24) no longer compare as equal # check that we have the same behavior with epoch even if we are not timezone aware ts_no_tz = ts_1.tz_localize(None) @@ -1845,6 +1848,10 @@ def test_resample_equivalent_offsets(n1, freq1, n2, freq2, k, unit): result1 = ser.resample(str(n1_) + freq1).mean() result2 = ser.resample(str(n2_) + freq2).mean() + if freq2 == "D" and isinstance(result2.index.freq, Day): + # GH#55502 Day is no longer a Tick so no longer compares as equivalent, + # but the actual values we expect should still match + result2.index.freq = to_offset(Timedelta(days=result2.index.freq.n)) tm.assert_series_equal(result1, result2) @@ -2059,7 +2066,8 @@ def test_resample_depr_lowercase_frequency(freq, freq_depr, data): exp_dti = DatetimeIndex(data=data, dtype="datetime64[ns]", freq=freq) expected = Series(2.0, index=exp_dti) - tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected, check_freq=False) + # GH#41943 check_freq=False bc 24H and D no longer compare as equal def test_resample_ms_closed_right(unit): diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index da1774cf22587..845b5ad7acc00 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -5,6 +5,7 @@ import pytest from pandas._libs import lib +from pandas._libs.tslibs import Day import pandas as pd from pandas import ( @@ -751,6 +752,7 @@ def test_resample_agg_readonly(): rs = ser.resample("1D") expected = Series([pd.Timestamp(0), pd.Timestamp(0)], index=index[::24]) + expected.index.freq = Day(1) # GH#41943 no longer equivalent to 24h result = rs.agg("last") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index 45caeb1733590..6b70049c64bfe 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -253,7 +253,13 @@ def test_from_tick_reso(): assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value tick = offsets.Day() - assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value + msg = ( + "Value must be Timedelta, string, integer, float, timedelta " + "or convertible, not Day" + ) + with pytest.raises(ValueError, match=msg): + # GH#41943 Day is no longer a Tick + Timedelta(tick) def test_construction(): diff --git a/pandas/tests/tseries/offsets/test_dst.py b/pandas/tests/tseries/offsets/test_dst.py index e75958843040d..e87b3757bb92d 100644 --- a/pandas/tests/tseries/offsets/test_dst.py +++ b/pandas/tests/tseries/offsets/test_dst.py @@ -214,7 +214,7 @@ def test_springforward_singular(self, performance_warning): QuarterEnd: ["11/2/2012", "12/31/2012"], BQuarterBegin: ["11/2/2012", "12/3/2012"], BQuarterEnd: ["11/2/2012", "12/31/2012"], - Day: ["11/4/2012", "11/4/2012 23:00"], + Day: ["11/4/2012", "11/5/2012"], }.items() @pytest.mark.parametrize("tup", offset_classes) @@ -263,3 +263,13 @@ def test_nontick_offset_with_ambiguous_time_error(original_dt, target_dt, offset msg = f"Cannot infer dst time from {target_dt}, try using the 'ambiguous' argument" with pytest.raises(ValueError, match=msg): localized_dt + offset + + +def test_add_day_nonexistent_raises(): + # https://github.com/pandas-dev/pandas/issues/41943#issuecomment-1466978225 + + ts = Timestamp("2015-03-28T2:30", tz="Europe/Warsaw") + + msg = "is a nonexistent time due to daylight savings time" + with pytest.raises(ValueError, match=msg): + ts + Day(1) diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index 0b2e66a2b3a0d..26b182fb4e9b1 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -47,6 +47,7 @@ CustomBusinessMonthBegin, CustomBusinessMonthEnd, DateOffset, + Day, Easter, FY5253Quarter, LastWeekOfMonth, @@ -244,7 +245,7 @@ def test_offset_freqstr(self, offset_types): assert offset.rule_code == code def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=False): - if normalize and issubclass(offset, Tick): + if normalize and issubclass(offset, (Tick, Day)): # normalize=True disallowed for Tick subclasses GH#21427 return @@ -464,7 +465,7 @@ def test_is_on_offset(self, offset_types, expecteds): assert offset_s.is_on_offset(dt) # when normalize=True, is_on_offset checks time is 00:00:00 - if issubclass(offset_types, Tick): + if issubclass(offset_types, (Tick, Day)): # normalize=True disallowed for Tick subclasses GH#21427 return offset_n = _create_offset(offset_types, normalize=True) @@ -496,7 +497,7 @@ def test_add(self, offset_types, tz_naive_fixture, expecteds): assert result == expected_localize # normalize=True, disallowed for Tick subclasses GH#21427 - if issubclass(offset_types, Tick): + if issubclass(offset_types, (Tick, Day)): return offset_s = _create_offset(offset_types, normalize=True) expected = Timestamp(expected.date()) diff --git a/pandas/tests/tseries/offsets/test_ticks.py b/pandas/tests/tseries/offsets/test_ticks.py index 46b6846ad1ec2..f1065690233af 100644 --- a/pandas/tests/tseries/offsets/test_ticks.py +++ b/pandas/tests/tseries/offsets/test_ticks.py @@ -54,7 +54,7 @@ def test_delta_to_tick(): delta = timedelta(3) tick = delta_to_tick(delta) - assert tick == offsets.Day(3) + assert tick == offsets.Hour(72) td = Timedelta(nanoseconds=5) tick = delta_to_tick(td) diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py index 42d055326c2a5..89835ff4b7694 100644 --- a/pandas/tests/tslibs/test_api.py +++ b/pandas/tests/tslibs/test_api.py @@ -29,6 +29,7 @@ def test_namespace(): "NaTType", "iNaT", "nat_strings", + "Day", "OutOfBoundsDatetime", "OutOfBoundsTimedelta", "Period", diff --git a/pandas/tests/tslibs/test_to_offset.py b/pandas/tests/tslibs/test_to_offset.py index add9213ae59fb..f3d323f315002 100644 --- a/pandas/tests/tslibs/test_to_offset.py +++ b/pandas/tests/tslibs/test_to_offset.py @@ -147,7 +147,7 @@ def test_to_offset_leading_plus(freqstr, expected, wrn): ({"days": -1, "seconds": 1}, offsets.Second(-86399)), ({"hours": 1, "minutes": 10}, offsets.Minute(70)), ({"hours": 1, "minutes": -10}, offsets.Minute(50)), - ({"weeks": 1}, offsets.Day(7)), + ({"weeks": 1}, offsets.Hour(168)), ({"hours": 1}, offsets.Hour(1)), ({"hours": 1}, to_offset("60min")), ({"microseconds": 1}, offsets.Micro(1)), From 270b859dd177bc23e916a37b2a9643db684504d5 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jul 2025 07:34:38 -0700 Subject: [PATCH 02/14] mypy fixup --- pandas/core/arrays/period.py | 4 ++-- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/resample.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index a6b8dab00b1a9..7b2a0e9d9e39e 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -1476,7 +1476,7 @@ def _make_field_arrays(*fields) -> list[np.ndarray]: ( np.asarray(x) if isinstance(x, (np.ndarray, list, ABCSeries)) - else np.repeat(x, length) - ) # type: ignore[arg-type] + else np.repeat(x, length) # type: ignore[arg-type] + ) for x in fields ] diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index d90fe812f0e7c..f9ad907e42cf1 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -553,7 +553,7 @@ def _scalar_divlike_op(self, other, op): freq = to_offset(Timedelta(days=self.freq.n)) / other else: freq = self.freq / other - if freq.nanos == 0 and self.freq.nanos != 0: + if freq.nanos == 0 and self.freq.nanos != 0: # type: ignore[union-attr] # e.g. if self.freq is Nano(1) then dividing by 2 # rounds down to zero freq = None diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 128426131e453..f5840f4d4699d 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2746,7 +2746,7 @@ def _insert_nat_bin( def _adjust_dates_anchored( first: Timestamp, last: Timestamp, - freq: Tick, + freq: Tick | Day, closed: Literal["right", "left"] = "right", origin: TimeGrouperOrigin = "start_day", offset: Timedelta | None = None, From 905bcff688007a11523a506213ca1c275e74ff5c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jul 2025 08:45:26 -0700 Subject: [PATCH 03/14] mypy, doc fixup --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/resample.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0ad40e2eb10f7..2c9ade4cafbfa 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -335,6 +335,7 @@ with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute. - Adding or subtracing a :class:`Day` with a :class:`Timedelta` is no longer supported. - Adding or subtracting a :class:`Day` offset to a :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise. + .. _whatsnew_300.api_breaking.deps: Increased minimum version for Python diff --git a/pandas/core/resample.py b/pandas/core/resample.py index f5840f4d4699d..01bdaaf9b76a9 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2650,7 +2650,13 @@ def _get_timestamp_range_edges( origin = origin.tz_localize(None) first, last = _adjust_dates_anchored( - first, last, freq, closed=closed, origin=origin, offset=offset, unit=unit + first, + last, + cast("Day | Tick", freq), + closed=closed, + origin=origin, + offset=offset, + unit=unit, ) if isinstance(freq, Day): first = first.tz_localize(index_tz) From 6f46e8dd8258ffa078a0e18e9779bfffe96cbc0a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jul 2025 09:00:26 -0700 Subject: [PATCH 04/14] troubleshoot docbuild --- doc/source/whatsnew/v3.0.0.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2c9ade4cafbfa..0ad40e2eb10f7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -335,7 +335,6 @@ with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute. - Adding or subtracing a :class:`Day` with a :class:`Timedelta` is no longer supported. - Adding or subtracting a :class:`Day` offset to a :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise. - .. _whatsnew_300.api_breaking.deps: Increased minimum version for Python From 6b24e8f5d3b62c22d53dfd47891124cff18e191a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jul 2025 09:22:05 -0700 Subject: [PATCH 05/14] lint ignore --- ci/code_checks.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index c710b7d34e9c5..cebb9cda1e480 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -163,6 +163,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.tseries.offsets.DateOffset.is_on_offset GL08" \ -i "pandas.tseries.offsets.DateOffset.n GL08" \ -i "pandas.tseries.offsets.DateOffset.normalize GL08" \ + -i "pandas.tseries.offsets.Day.freqstr SA01" \ -i "pandas.tseries.offsets.Day.is_on_offset GL08" \ -i "pandas.tseries.offsets.Day.n GL08" \ -i "pandas.tseries.offsets.Day.normalize GL08" \ From 39e0c2dc9255646866a75ec4b62845e52759e14d Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jul 2025 10:14:48 -0700 Subject: [PATCH 06/14] un-split line --- doc/source/whatsnew/v3.0.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0ad40e2eb10f7..b9e281810d68b 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -328,8 +328,7 @@ small behavior differences as collateral: - :class:`offsets.Day` no longer supports division - :class:`Timedelta` no longer accepts :class:`Day` objects as inputs - :meth:`tseries.frequencies.to_offset` on a :class:`Timedelta` object returns -a :class:`offsets.Hour` object in cases where it used to return a :class:`Day` -object. +a :class:`offsets.Hour` object in cases where it used to return a :class:`Day` object. - Adding or subtracting a scalar from a timezone-aware :class:`DatetimeIndex` with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute. - Adding or subtracing a :class:`Day` with a :class:`Timedelta` is no longer supported. From 477f5aa894c15a722d1b4ceb7516ecc6010b4e22 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jul 2025 12:30:21 -0700 Subject: [PATCH 07/14] Avoid passing Day to Timedelta in docs --- doc/source/user_guide/timedeltas.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst index 01df17bac5fd7..15b6de4b6a054 100644 --- a/doc/source/user_guide/timedeltas.rst +++ b/doc/source/user_guide/timedeltas.rst @@ -53,7 +53,7 @@ You can construct a ``Timedelta`` scalar through various arguments, including `I pd.Timedelta("P0DT0H1M0S") pd.Timedelta("P0DT0H0M0.000000123S") -:ref:`DateOffsets` (``Day, Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction. +:ref:`DateOffsets` (``Hour, Minute, Second, Milli, Micro, Nano``) can also be used in construction. .. ipython:: python @@ -63,7 +63,7 @@ Further, operations among the scalars yield another scalar ``Timedelta``. .. ipython:: python - pd.Timedelta(pd.offsets.Day(2)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta( + pd.Timedelta(pd.offsets.Hour(48)) + pd.Timedelta(pd.offsets.Second(2)) + pd.Timedelta( "00:00:00.000123" ) From 59d3e79eba090fac9286b98e349b9699fd323825 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jul 2025 13:56:02 -0700 Subject: [PATCH 08/14] extra newline --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b9e281810d68b..f5f1a158cf53c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -334,6 +334,7 @@ with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute. - Adding or subtracing a :class:`Day` with a :class:`Timedelta` is no longer supported. - Adding or subtracting a :class:`Day` offset to a :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise. + .. _whatsnew_300.api_breaking.deps: Increased minimum version for Python From 3a8cd5636afca0c7dd111b97bfa5f4a4172834e4 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jul 2025 14:57:07 -0700 Subject: [PATCH 09/14] typo fixup --- doc/source/whatsnew/v3.0.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index f5f1a158cf53c..e1494dfe56bc7 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -331,10 +331,9 @@ small behavior differences as collateral: a :class:`offsets.Hour` object in cases where it used to return a :class:`Day` object. - Adding or subtracting a scalar from a timezone-aware :class:`DatetimeIndex` with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute. -- Adding or subtracing a :class:`Day` with a :class:`Timedelta` is no longer supported. +- Adding or subtracting a :class:`Day` with a :class:`Timedelta` is no longer supported. - Adding or subtracting a :class:`Day` offset to a :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise. - .. _whatsnew_300.api_breaking.deps: Increased minimum version for Python From 13e3ffb5d298a4e1f5dfabbe71db1d4341613a6e Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Jul 2025 14:57:40 -0700 Subject: [PATCH 10/14] whitespace troubleshoot --- doc/source/whatsnew/v3.0.0.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e1494dfe56bc7..214b5485cdf07 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -327,10 +327,8 @@ small behavior differences as collateral: - ``pd.offsets.Day(n)`` no longer compares as equal to ``pd.offsets.Hour(24*n)`` - :class:`offsets.Day` no longer supports division - :class:`Timedelta` no longer accepts :class:`Day` objects as inputs -- :meth:`tseries.frequencies.to_offset` on a :class:`Timedelta` object returns -a :class:`offsets.Hour` object in cases where it used to return a :class:`Day` object. -- Adding or subtracting a scalar from a timezone-aware :class:`DatetimeIndex` -with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute. +- :meth:`tseries.frequencies.to_offset` on a :class:`Timedelta` object returns a :class:`offsets.Hour` object in cases where it used to return a :class:`Day` object. +- Adding or subtracting a scalar from a timezone-aware :class:`DatetimeIndex` with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute. - Adding or subtracting a :class:`Day` with a :class:`Timedelta` is no longer supported. - Adding or subtracting a :class:`Day` offset to a :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise. From c744b3d5bd66f80619f2cf6323cee5d783bbb19f Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Jul 2025 09:55:45 -0700 Subject: [PATCH 11/14] suggested edits --- doc/source/whatsnew/v3.0.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 214b5485cdf07..28b5ddf8281cb 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -321,7 +321,7 @@ behaves as a calendar-day, preserving time-of-day across DST transitions: ts = pd.Timestamp("2025-03-08 08:00", tz="US/Eastern") ts + pd.offsets.Day(1) -This change fixes a long-standing bug in ``pd.date_range`` (:issue:`51716`, :issue:`35388`), but causes several +This change fixes a long-standing bug in :func:`date_range` (:issue:`51716`, :issue:`35388`), but causes several small behavior differences as collateral: - ``pd.offsets.Day(n)`` no longer compares as equal to ``pd.offsets.Hour(24*n)`` @@ -330,7 +330,7 @@ small behavior differences as collateral: - :meth:`tseries.frequencies.to_offset` on a :class:`Timedelta` object returns a :class:`offsets.Hour` object in cases where it used to return a :class:`Day` object. - Adding or subtracting a scalar from a timezone-aware :class:`DatetimeIndex` with a :class:`Day` ``freq`` no longer preserves that ``freq`` attribute. - Adding or subtracting a :class:`Day` with a :class:`Timedelta` is no longer supported. -- Adding or subtracting a :class:`Day` offset to a :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise. +- Adding or subtracting a :class:`Day` offset to a timezone-aware :class:`Timestamp` or datetime-like may lead to an ambiguous or non-existent time, which will raise. .. _whatsnew_300.api_breaking.deps: From a41cdfd6c52f724926b1e9180cf53b689471dc00 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Jul 2025 10:07:10 -0700 Subject: [PATCH 12/14] GH ref --- pandas/_libs/tslibs/offsets.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index b1bb5b1d27eb7..aa95143c6ae62 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -1246,7 +1246,7 @@ cdef class Day(SingleConstructorOffset): return str(self.n) + "D" return "D" - # Having this here isn't strictly-correct post-GH#??? + # Having this here isn't strictly-correct post-GH#61985 # but this gets called in timedelta.get_unit_for_round in cases where # Day unambiguously means 24h. @property From 198ec6c81f60e883f14f5de309bcdc870a532188 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Jul 2025 10:52:44 -0700 Subject: [PATCH 13/14] move nanos check --- pandas/_libs/tslibs/timedeltas.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 5310165290d65..c719219639689 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -2577,9 +2577,9 @@ cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1: from pandas._libs.tslibs.offsets import to_offset freq = to_offset(freq) - freq.nanos # raises on non-fixed freq if isinstance(freq, Day): # In the "round" context, Day unambiguously means 24h, not calendar-day freq = Timedelta(days=freq.n) - + else: + freq.nanos # raises on non-fixed freq return delta_to_nanoseconds(freq, creso) From 1325c3449699b68e4a283e40c63f08ebec0bc148 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 5 Aug 2025 15:03:35 -0700 Subject: [PATCH 14/14] Adjust resample expecteds --- pandas/core/resample.py | 23 ++++---------------- pandas/tests/resample/test_datetime_index.py | 18 ++++++++------- pandas/tests/resample/test_period_index.py | 5 +++-- 3 files changed, 17 insertions(+), 29 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 01bdaaf9b76a9..bb3f45e6fb7d0 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2631,7 +2631,7 @@ def _get_timestamp_range_edges( ------- A tuple of length 2, containing the adjusted pd.Timestamp objects. """ - if isinstance(freq, (Tick, Day)): + if isinstance(freq, Tick): index_tz = first.tz if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None): raise ValueError("The origin must have the same timezone as the index.") @@ -2640,27 +2640,15 @@ def _get_timestamp_range_edges( # resampling on the same kind of indexes on different timezones origin = Timestamp("1970-01-01", tz=index_tz) - if isinstance(freq, Day): - # _adjust_dates_anchored assumes 'D' means 24h, but first/last - # might contain a DST transition (23h, 24h, or 25h). - # So "pretend" the dates are naive when adjusting the endpoints - first = first.tz_localize(None) - last = last.tz_localize(None) - if isinstance(origin, Timestamp): - origin = origin.tz_localize(None) - first, last = _adjust_dates_anchored( first, last, - cast("Day | Tick", freq), + freq, closed=closed, origin=origin, offset=offset, unit=unit, ) - if isinstance(freq, Day): - first = first.tz_localize(index_tz) - last = last.tz_localize(index_tz, nonexistent="shift_forward") else: first = first.normalize() last = last.normalize() @@ -2752,7 +2740,7 @@ def _insert_nat_bin( def _adjust_dates_anchored( first: Timestamp, last: Timestamp, - freq: Tick | Day, + freq: Tick, closed: Literal["right", "left"] = "right", origin: TimeGrouperOrigin = "start_day", offset: Timedelta | None = None, @@ -2768,10 +2756,7 @@ def _adjust_dates_anchored( if offset is not None: offset = offset.as_unit(unit) - if isinstance(freq, Day): - freq_value = Timedelta(days=freq.n).as_unit(unit)._value - else: - freq_value = Timedelta(freq).as_unit(unit)._value + freq_value = Timedelta(freq).as_unit(unit)._value origin_timestamp = 0 # origin == "epoch" if origin == "start_day": diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index f6719202db4d4..a7539f4bc0dc8 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -935,8 +935,9 @@ def _create_series(values, timestamps, freq="D"): rng = date_range(start, end, freq="1h").as_unit(unit) ts = Series(np.ones(len(rng)), index=rng) - expected_ts = ["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"] - expected = _create_series([23.0, 2.0], expected_ts) + # GH#61985 changed this to behave like "B" rather than "24h" + expected_ts = ["2013-11-03 00:00-05:00"] + expected = _create_series([25.0], expected_ts) result = ts.resample("D", origin="start", offset="-2h").sum() tm.assert_series_equal(result, expected) @@ -945,18 +946,19 @@ def _create_series(values, timestamps, freq="D"): result = ts.resample("24h", origin="start", offset="-2h").sum() tm.assert_series_equal(result, expected) - expected_ts = ["2013-11-02 02:00-05:00", "2013-11-03 02:00-06:00"] - expected = _create_series([3.0, 22.0], expected_ts) + # GH#61985 changed this to behave like "B" rather than "24h" + expected_ts = ["2013-11-03 00:00-05:00"] + expected = _create_series([25.0], expected_ts) result = ts.resample("D", origin="start", offset="2h").sum() tm.assert_series_equal(result, expected) - expected_ts = ["2013-11-02 23:00-05:00", "2013-11-03 23:00-06:00"] - expected = _create_series([24.0, 1.0], expected_ts) + expected_ts = ["2013-11-03 00:00-05:00"] + expected = _create_series([25.0], expected_ts) result = ts.resample("D", origin="start", offset="-1h").sum() tm.assert_series_equal(result, expected) - expected_ts = ["2013-11-02 01:00-05:00", "2013-11-03 01:00:00-0500"] - expected = _create_series([1.0, 24.0], expected_ts) + expected_ts = ["2013-11-03 00:00-05:00"] + expected = _create_series([25.0], expected_ts) result = ts.resample("D", origin="start", offset="1h").sum() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index e17529dfab00c..bcace30696662 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -560,9 +560,10 @@ def test_resample_tz_localized2(self): ) s = Series([1, 2], index=idx) + # GH#61985 changed this to behave like "B" rather than "24h" result = s.resample("D", closed="right", label="right").mean() - ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney") - expected = Series([1.5], index=ex_index) + ex_index = date_range("2001-09-20", periods=2, freq="D", tz="Australia/Sydney") + expected = Series([np.nan, 1.5], index=ex_index) tm.assert_series_equal(result, expected)