diff --git a/doc/whats-new.rst b/doc/whats-new.rst index aa8467cf260..7d452d74b6f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,15 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- Following pandas 3.0 (`pandas-dev/pandas#61985 + `_), ``Day`` is no longer + considered a ``Tick``-like frequency. Therefore non-``None`` values of + ``offset`` and non-``"start_day"`` values of ``origin`` will have no effect + when resampling to a daily frequency for objects indexed by a + :py:class:`xarray.CFTimeIndex`. As in `pandas-dev/pandas#62101 + `_ warnings will be emitted + if non default values are provided in this context (:issue:`10640`, + :pull:`10650`). By `Spencer Clark `_. Deprecations diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index e8d407b282c..e0038796c9b 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -609,14 +609,19 @@ def rollback(self, date): return date - YearEnd(month=self.month) -class Day(Tick): - _freq = "D" +class Day(BaseCFTimeOffset): + """Day offset following definition in pandas/_libs/tslibs/offsets.pyx""" - def as_timedelta(self) -> timedelta: - return timedelta(days=self.n) + _freq = "D" def __apply__(self, other): - return other + self.as_timedelta() + if isinstance(other, Day): + return Day(self.n + other.n) + else: + return other + timedelta(days=self.n) + + def onOffset(self, date) -> bool: + return True class Hour(Tick): @@ -718,8 +723,8 @@ def _generate_anchored_offsets( # pandas defines these offsets as "Tick" objects, which for instance have -# distinct behavior from monthly or longer frequencies in resample. -CFTIME_TICKS = (Day, Hour, Minute, Second) +# distinct behavior from daily or longer frequencies in resample. +CFTIME_TICKS = (Hour, Minute, Second) def _generate_anchored_deprecated_frequencies( @@ -801,16 +806,13 @@ def delta_to_tick(delta: timedelta | pd.Timedelta) -> Tick: "nanoseconds to 'CFTimeOffset' object" ) if delta.microseconds == 0: - if delta.seconds == 0: - return Day(n=delta.days) + seconds = delta.days * 86400 + delta.seconds + if seconds % 3600 == 0: + return Hour(n=seconds // 3600) + elif seconds % 60 == 0: + return Minute(n=seconds // 60) else: - seconds = delta.days * 86400 + delta.seconds - if seconds % 3600 == 0: - return Hour(n=seconds // 3600) - elif seconds % 60 == 0: - return Minute(n=seconds // 60) - else: - return Second(n=seconds) + return Second(n=seconds) # Regardless of the days and seconds this will always be a Millisecond # or Microsecond object elif delta.microseconds % 1_000 == 0: diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index 210bbf263b1..ed6b004ebc6 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -490,7 +490,7 @@ def shift( # type: ignore[override] # freq is typed Any, we are more precise >>> index.shift(1, "ME") CFTimeIndex([2000-02-29 00:00:00], dtype='object', length=1, calendar='standard', freq=None) - >>> index.shift(1.5, "D") + >>> index.shift(1.5, "24h") CFTimeIndex([2000-02-01 12:00:00], dtype='object', length=1, calendar='standard', freq=None) """ @@ -707,16 +707,22 @@ def freq(self): def _round_via_method(self, freq, method): """Round dates using a specified method.""" - from xarray.coding.cftime_offsets import CFTIME_TICKS, to_offset + from xarray.coding.cftime_offsets import CFTIME_TICKS, Day, to_offset if not self._data.size: return CFTimeIndex(np.array(self)) offset = to_offset(freq) - if not isinstance(offset, CFTIME_TICKS): + if isinstance(offset, Day): + # Following pandas, "In the 'round' context, Day unambiguously + # means 24h, not calendar-day" + offset_as_timedelta = timedelta(days=offset.n) + elif isinstance(offset, CFTIME_TICKS): + offset_as_timedelta = offset.as_timedelta() + else: raise ValueError(f"{offset} is a non-fixed frequency") - unit = _total_microseconds(offset.as_timedelta()) + unit = _total_microseconds(offset_as_timedelta) values = self.asi8 rounded = method(values, unit) return _cftimeindex_from_i8(rounded, self.date_type, self.name) diff --git a/xarray/core/resample_cftime.py b/xarray/core/resample_cftime.py index 9b636f6fc81..82e99fc9247 100644 --- a/xarray/core/resample_cftime.py +++ b/xarray/core/resample_cftime.py @@ -45,6 +45,7 @@ import pandas as pd from xarray.coding.cftime_offsets import ( + CFTIME_TICKS, BaseCFTimeOffset, MonthEnd, QuarterEnd, @@ -56,6 +57,7 @@ ) from xarray.coding.cftimeindex import CFTimeIndex from xarray.core.types import SideOptions +from xarray.core.utils import emit_user_level_warning if typing.TYPE_CHECKING: from xarray.core.types import CFTimeDatetime, ResampleCompatible @@ -84,6 +86,22 @@ def __init__( self.freq = to_offset(freq) self.origin = origin + if not isinstance(self.freq, CFTIME_TICKS): + if offset is not None: + message = ( + "The 'offset' keyword does not take effect when " + "resampling with a 'freq' that is not Tick-like (h, m, s, " + "ms, us)" + ) + emit_user_level_warning(message, category=RuntimeWarning) + if origin != "start_day": + message = ( + "The 'origin' keyword does not take effect when " + "resampling with a 'freq' that is not Tick-like (h, m, s, " + "ms, us)" + ) + emit_user_level_warning(message, category=RuntimeWarning) + if isinstance(self.freq, MonthEnd | QuarterEnd | YearEnd) or self.origin in [ "end", "end_day", diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 715eb712bdf..5c8de46664f 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -445,7 +445,6 @@ def test_eq(a, b): (Second(), 3, Second(n=3)), (Millisecond(), 3, Millisecond(n=3)), (Microsecond(), 3, Microsecond(n=3)), - (Day(), 0.5, Hour(n=12)), (Hour(), 0.5, Minute(n=30)), (Hour(), -0.5, Minute(n=-30)), (Minute(), 0.5, Second(n=30)), @@ -472,7 +471,15 @@ def test_mul_float_multiple_next_higher_resolution(): @pytest.mark.parametrize( "offset", - [YearBegin(), YearEnd(), QuarterBegin(), QuarterEnd(), MonthBegin(), MonthEnd()], + [ + YearBegin(), + YearEnd(), + QuarterBegin(), + QuarterEnd(), + MonthBegin(), + MonthEnd(), + Day(), + ], ids=_id_func, ) def test_nonTick_offset_multiplied_float_error(offset): @@ -534,6 +541,20 @@ def test_add_sub_monthly(offset, expected_date_args, calendar): assert result == expected +def test_add_daily_offsets() -> None: + offset = Day(n=2) + expected = Day(n=4) + result = offset + offset + assert result == expected + + +def test_subtract_daily_offsets() -> None: + offset = Day(n=2) + expected = Day(n=0) + result = offset - offset + assert result == expected + + @pytest.mark.parametrize(("offset", "expected_date_args"), _ADD_TESTS, ids=_id_func) def test_radd_sub_monthly(offset, expected_date_args, calendar): date_type = get_date_type(calendar) diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index a6b41ff9353..5dd541cc172 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -830,7 +830,6 @@ def test_cftimeindex_add_timedeltaindex(calendar) -> None: @pytest.mark.parametrize( "freq,units", [ - ("D", "D"), ("h", "h"), ("min", "min"), ("s", "s"), @@ -856,7 +855,7 @@ def test_cftimeindex_shift_float_us() -> None: @requires_cftime -@pytest.mark.parametrize("freq", ["YS", "YE", "QS", "QE", "MS", "ME"]) +@pytest.mark.parametrize("freq", ["YS", "YE", "QS", "QE", "MS", "ME", "D"]) def test_cftimeindex_shift_float_fails_for_non_tick_freqs(freq) -> None: a = xr.date_range("2000", periods=3, freq="D", use_cftime=True) with pytest.raises(TypeError, match="unsupported operand type"): diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index fa896da0ed6..d5dbe61aee9 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -6,12 +6,17 @@ import numpy as np import pandas as pd import pytest -from packaging.version import Version import xarray as xr -from xarray.coding.cftime_offsets import _new_to_legacy_freq +from xarray.coding.cftime_offsets import ( + CFTIME_TICKS, + Day, + _new_to_legacy_freq, + to_offset, +) from xarray.coding.cftimeindex import CFTimeIndex from xarray.core.resample_cftime import CFTimeGrouper +from xarray.tests import has_pandas_3 cftime = pytest.importorskip("cftime") @@ -54,6 +59,20 @@ ] +def has_tick_resample_freq(freqs): + resample_freq, _ = freqs + resample_freq_as_offset = to_offset(resample_freq) + return isinstance(resample_freq_as_offset, CFTIME_TICKS) + + +def has_non_tick_resample_freq(freqs): + return not has_tick_resample_freq(freqs) + + +FREQS_WITH_TICK_RESAMPLE_FREQ = list(filter(has_tick_resample_freq, FREQS)) +FREQS_WITH_NON_TICK_RESAMPLE_FREQ = list(filter(has_non_tick_resample_freq, FREQS)) + + def compare_against_pandas( da_datetimeindex, da_cftimeindex, @@ -110,22 +129,14 @@ def da(index) -> xr.DataArray: ) -@pytest.mark.parametrize("freqs", FREQS, ids=lambda x: "{}->{}".format(*x)) +@pytest.mark.parametrize( + "freqs", FREQS_WITH_TICK_RESAMPLE_FREQ, ids=lambda x: "{}->{}".format(*x) +) @pytest.mark.parametrize("closed", [None, "left", "right"]) @pytest.mark.parametrize("label", [None, "left", "right"]) @pytest.mark.parametrize("offset", [None, "5s"], ids=lambda x: f"{x}") -def test_resample(freqs, closed, label, offset) -> None: +def test_resample_with_tick_resample_freq(freqs, closed, label, offset) -> None: initial_freq, resample_freq = freqs - if ( - resample_freq == "4001D" - and closed == "right" - and Version(pd.__version__) < Version("2.2") - ): - pytest.skip( - "Pandas fixed a bug in this test case in version 2.2, which we " - "ported to xarray, so this test no longer produces the same " - "result as pandas for earlier pandas versions." - ) start = "2000-01-01T12:07:01" origin = "start" @@ -149,6 +160,43 @@ def test_resample(freqs, closed, label, offset) -> None: ) +@pytest.mark.parametrize( + "freqs", FREQS_WITH_NON_TICK_RESAMPLE_FREQ, ids=lambda x: "{}->{}".format(*x) +) +@pytest.mark.parametrize("closed", [None, "left", "right"]) +@pytest.mark.parametrize("label", [None, "left", "right"]) +def test_resample_with_non_tick_resample_freq(freqs, closed, label) -> None: + initial_freq, resample_freq = freqs + resample_freq_as_offset = to_offset(resample_freq) + if isinstance(resample_freq_as_offset, Day) and not has_pandas_3: + pytest.skip("Only valid for pandas >= 3.0") + start = "2000-01-01T12:07:01" + + # Set offset and origin to their default values since they have no effect + # on resampling data with a non-tick resample frequency. + offset = None + origin = "start_day" + + datetime_index = pd.date_range( + start=start, periods=5, freq=_new_to_legacy_freq(initial_freq) + ) + cftime_index = xr.date_range( + start=start, periods=5, freq=initial_freq, use_cftime=True + ) + da_datetimeindex = da(datetime_index) + da_cftimeindex = da(cftime_index) + + compare_against_pandas( + da_datetimeindex, + da_cftimeindex, + resample_freq, + closed=closed, + label=label, + offset=offset, + origin=origin, + ) + + @pytest.mark.parametrize( ("freq", "expected"), [ @@ -228,7 +276,7 @@ def test_invalid_offset_error(offset: str | int) -> None: cftime_index = xr.date_range("2000", periods=5, use_cftime=True) da_cftime = da(cftime_index) with pytest.raises(ValueError, match="offset must be"): - da_cftime.resample(time="2D", offset=offset) # type: ignore[arg-type] + da_cftime.resample(time="2h", offset=offset) # type: ignore[arg-type] def test_timedelta_offset() -> None: @@ -238,6 +286,15 @@ def test_timedelta_offset() -> None: cftime_index = xr.date_range("2000", periods=5, use_cftime=True) da_cftime = da(cftime_index) - timedelta_result = da_cftime.resample(time="2D", offset=timedelta).mean() - string_result = da_cftime.resample(time="2D", offset=string).mean() + timedelta_result = da_cftime.resample(time="2h", offset=timedelta).mean() + string_result = da_cftime.resample(time="2h", offset=string).mean() xr.testing.assert_identical(timedelta_result, string_result) + + +@pytest.mark.parametrize(("option", "value"), [("offset", "5s"), ("origin", "start")]) +def test_non_tick_option_warning(option, value) -> None: + cftime_index = xr.date_range("2000", periods=5, use_cftime=True) + da_cftime = da(cftime_index) + kwargs = {option: value} + with pytest.warns(RuntimeWarning, match=option): + da_cftime.resample(time="ME", **kwargs)