Skip to content

Commit 6b7f4bd

Browse files
committed
Port pandas 3.0 change to Day offset to cftime version
1 parent 4d05aa8 commit 6b7f4bd

File tree

7 files changed

+137
-30
lines changed

7 files changed

+137
-30
lines changed

doc/whats-new.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,15 @@ New Features
1616

1717
Breaking changes
1818
~~~~~~~~~~~~~~~~
19+
- Following pandas 3.0 (`pandas-dev/pandas#61985
20+
<https://github.com/pandas-dev/pandas/pull/61985>`_), ``Day`` is no longer
21+
considered a ``Tick``-like frequency. Therefore non-``None`` values of
22+
``offset`` and non-``"start_day"`` values of ``origin`` will have no effect
23+
when resampling to a daily frequency for objects indexed by a
24+
:py:class:`xarray.CFTimeIndex`. As in `pandas-dev/pandas#62101
25+
<https://github.com/pandas-dev/pandas/pull/62101>`_ warnings will be emitted
26+
if non default values are provided in this context (:issue:`10640`,
27+
:pull:`10650`). By `Spencer Clark <https://github.com/spencerkclark>`_.
1928

2029

2130
Deprecations

xarray/coding/cftime_offsets.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -609,14 +609,19 @@ def rollback(self, date):
609609
return date - YearEnd(month=self.month)
610610

611611

612-
class Day(Tick):
613-
_freq = "D"
612+
class Day(BaseCFTimeOffset):
613+
"""Day offset following definition in pandas/_libs/tslibs/offsets.pyx"""
614614

615-
def as_timedelta(self) -> timedelta:
616-
return timedelta(days=self.n)
615+
_freq = "D"
617616

618617
def __apply__(self, other):
619-
return other + self.as_timedelta()
618+
if isinstance(other, Day):
619+
return Day(self.n + other.n)
620+
else:
621+
return other + timedelta(days=self.n)
622+
623+
def onOffset(self, date) -> bool:
624+
return True
620625

621626

622627
class Hour(Tick):
@@ -719,7 +724,7 @@ def _generate_anchored_offsets(
719724

720725
# pandas defines these offsets as "Tick" objects, which for instance have
721726
# distinct behavior from monthly or longer frequencies in resample.
722-
CFTIME_TICKS = (Day, Hour, Minute, Second)
727+
CFTIME_TICKS = (Hour, Minute, Second)
723728

724729

725730
def _generate_anchored_deprecated_frequencies(

xarray/coding/cftimeindex.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -707,16 +707,22 @@ def freq(self):
707707

708708
def _round_via_method(self, freq, method):
709709
"""Round dates using a specified method."""
710-
from xarray.coding.cftime_offsets import CFTIME_TICKS, to_offset
710+
from xarray.coding.cftime_offsets import CFTIME_TICKS, Day, to_offset
711711

712712
if not self._data.size:
713713
return CFTimeIndex(np.array(self))
714714

715715
offset = to_offset(freq)
716-
if not isinstance(offset, CFTIME_TICKS):
716+
if isinstance(offset, Day):
717+
# Following pandas, "In the 'round' context, Day unambiguously
718+
# means 24h, not calendar-day"
719+
offset_as_timedelta = timedelta(days=offset.n)
720+
elif isinstance(offset, CFTIME_TICKS):
721+
offset_as_timedelta = offset.as_timedelta()
722+
else:
717723
raise ValueError(f"{offset} is a non-fixed frequency")
718724

719-
unit = _total_microseconds(offset.as_timedelta())
725+
unit = _total_microseconds(offset_as_timedelta)
720726
values = self.asi8
721727
rounded = method(values, unit)
722728
return _cftimeindex_from_i8(rounded, self.date_type, self.name)

xarray/core/resample_cftime.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
import pandas as pd
4646

4747
from xarray.coding.cftime_offsets import (
48+
CFTIME_TICKS,
4849
BaseCFTimeOffset,
4950
MonthEnd,
5051
QuarterEnd,
@@ -56,6 +57,7 @@
5657
)
5758
from xarray.coding.cftimeindex import CFTimeIndex
5859
from xarray.core.types import SideOptions
60+
from xarray.core.utils import emit_user_level_warning
5961

6062
if typing.TYPE_CHECKING:
6163
from xarray.core.types import CFTimeDatetime, ResampleCompatible
@@ -84,6 +86,22 @@ def __init__(
8486
self.freq = to_offset(freq)
8587
self.origin = origin
8688

89+
if not isinstance(self.freq, CFTIME_TICKS):
90+
if offset is not None:
91+
message = (
92+
"The 'offset' keyword does not take effect when "
93+
"resampling with a 'freq' that is not Tick-like (h, m, s, "
94+
"ms, us)"
95+
)
96+
emit_user_level_warning(message, category=RuntimeWarning)
97+
if origin != "start_day":
98+
message = (
99+
"The 'origin' keyword does not take effect when "
100+
"resampling with a 'freq' that is not Tick-like (h, m, s, "
101+
"ms, us)"
102+
)
103+
emit_user_level_warning(message, category=RuntimeWarning)
104+
87105
if isinstance(self.freq, MonthEnd | QuarterEnd | YearEnd) or self.origin in [
88106
"end",
89107
"end_day",

xarray/tests/test_cftime_offsets.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,6 @@ def test_eq(a, b):
445445
(Second(), 3, Second(n=3)),
446446
(Millisecond(), 3, Millisecond(n=3)),
447447
(Microsecond(), 3, Microsecond(n=3)),
448-
(Day(), 0.5, Hour(n=12)),
449448
(Hour(), 0.5, Minute(n=30)),
450449
(Hour(), -0.5, Minute(n=-30)),
451450
(Minute(), 0.5, Second(n=30)),
@@ -534,6 +533,20 @@ def test_add_sub_monthly(offset, expected_date_args, calendar):
534533
assert result == expected
535534

536535

536+
def test_add_daily_offsets() -> None:
537+
offset = Day(n=2)
538+
expected = Day(n=4)
539+
result = offset + offset
540+
assert result == expected
541+
542+
543+
def test_subtract_daily_offsets() -> None:
544+
offset = Day(n=2)
545+
expected = Day(n=0)
546+
result = offset - offset
547+
assert result == expected
548+
549+
537550
@pytest.mark.parametrize(("offset", "expected_date_args"), _ADD_TESTS, ids=_id_func)
538551
def test_radd_sub_monthly(offset, expected_date_args, calendar):
539552
date_type = get_date_type(calendar)

xarray/tests/test_cftimeindex.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,6 @@ def test_cftimeindex_add_timedeltaindex(calendar) -> None:
830830
@pytest.mark.parametrize(
831831
"freq,units",
832832
[
833-
("D", "D"),
834833
("h", "h"),
835834
("min", "min"),
836835
("s", "s"),

xarray/tests/test_cftimeindex_resample.py

Lines changed: 76 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,17 @@
66
import numpy as np
77
import pandas as pd
88
import pytest
9-
from packaging.version import Version
109

1110
import xarray as xr
12-
from xarray.coding.cftime_offsets import _new_to_legacy_freq
11+
from xarray.coding.cftime_offsets import (
12+
CFTIME_TICKS,
13+
Day,
14+
_new_to_legacy_freq,
15+
to_offset,
16+
)
1317
from xarray.coding.cftimeindex import CFTimeIndex
1418
from xarray.core.resample_cftime import CFTimeGrouper
19+
from xarray.tests import has_pandas_3
1520

1621
cftime = pytest.importorskip("cftime")
1722

@@ -54,6 +59,20 @@
5459
]
5560

5661

62+
def has_tick_resample_freq(freqs):
63+
resample_freq, _ = freqs
64+
resample_freq_as_offset = to_offset(resample_freq)
65+
return isinstance(resample_freq_as_offset, CFTIME_TICKS)
66+
67+
68+
def has_non_tick_resample_freq(freqs):
69+
return not has_tick_resample_freq(freqs)
70+
71+
72+
FREQS_WITH_TICK_RESAMPLE_FREQ = list(filter(has_tick_resample_freq, FREQS))
73+
FREQS_WITH_NON_TICK_RESAMPLE_FREQ = list(filter(has_non_tick_resample_freq, FREQS))
74+
75+
5776
def compare_against_pandas(
5877
da_datetimeindex,
5978
da_cftimeindex,
@@ -110,22 +129,14 @@ def da(index) -> xr.DataArray:
110129
)
111130

112131

113-
@pytest.mark.parametrize("freqs", FREQS, ids=lambda x: "{}->{}".format(*x))
132+
@pytest.mark.parametrize(
133+
"freqs", FREQS_WITH_TICK_RESAMPLE_FREQ, ids=lambda x: "{}->{}".format(*x)
134+
)
114135
@pytest.mark.parametrize("closed", [None, "left", "right"])
115136
@pytest.mark.parametrize("label", [None, "left", "right"])
116137
@pytest.mark.parametrize("offset", [None, "5s"], ids=lambda x: f"{x}")
117-
def test_resample(freqs, closed, label, offset) -> None:
138+
def test_resample_with_tick_resample_freq(freqs, closed, label, offset) -> None:
118139
initial_freq, resample_freq = freqs
119-
if (
120-
resample_freq == "4001D"
121-
and closed == "right"
122-
and Version(pd.__version__) < Version("2.2")
123-
):
124-
pytest.skip(
125-
"Pandas fixed a bug in this test case in version 2.2, which we "
126-
"ported to xarray, so this test no longer produces the same "
127-
"result as pandas for earlier pandas versions."
128-
)
129140
start = "2000-01-01T12:07:01"
130141
origin = "start"
131142

@@ -149,6 +160,43 @@ def test_resample(freqs, closed, label, offset) -> None:
149160
)
150161

151162

163+
@pytest.mark.parametrize(
164+
"freqs", FREQS_WITH_NON_TICK_RESAMPLE_FREQ, ids=lambda x: "{}->{}".format(*x)
165+
)
166+
@pytest.mark.parametrize("closed", [None, "left", "right"])
167+
@pytest.mark.parametrize("label", [None, "left", "right"])
168+
def test_resample_with_non_tick_resample_freq(freqs, closed, label) -> None:
169+
initial_freq, resample_freq = freqs
170+
resample_freq_as_offset = to_offset(resample_freq)
171+
if isinstance(resample_freq_as_offset, Day) and not has_pandas_3:
172+
pytest.skip("Only valid for pandas >= 3.0")
173+
start = "2000-01-01T12:07:01"
174+
175+
# Set offset and origin to their default values since they have no effect
176+
# on resampling data with a non-tick resample frequency.
177+
offset = None
178+
origin = "start_day"
179+
180+
datetime_index = pd.date_range(
181+
start=start, periods=5, freq=_new_to_legacy_freq(initial_freq)
182+
)
183+
cftime_index = xr.date_range(
184+
start=start, periods=5, freq=initial_freq, use_cftime=True
185+
)
186+
da_datetimeindex = da(datetime_index)
187+
da_cftimeindex = da(cftime_index)
188+
189+
compare_against_pandas(
190+
da_datetimeindex,
191+
da_cftimeindex,
192+
resample_freq,
193+
closed=closed,
194+
label=label,
195+
offset=offset,
196+
origin=origin,
197+
)
198+
199+
152200
@pytest.mark.parametrize(
153201
("freq", "expected"),
154202
[
@@ -225,19 +273,28 @@ def test_origin(closed, origin) -> None:
225273

226274
@pytest.mark.parametrize("offset", ["foo", "5MS", 10])
227275
def test_invalid_offset_error(offset: str | int) -> None:
228-
cftime_index = xr.date_range("2000", periods=5, use_cftime=True)
276+
cftime_index = xr.date_range("2000", periods=5, freq="h", use_cftime=True)
229277
da_cftime = da(cftime_index)
230278
with pytest.raises(ValueError, match="offset must be"):
231-
da_cftime.resample(time="2D", offset=offset) # type: ignore[arg-type]
279+
da_cftime.resample(time="2h", offset=offset) # type: ignore[arg-type]
232280

233281

234282
def test_timedelta_offset() -> None:
235283
timedelta = datetime.timedelta(seconds=5)
236284
string = "5s"
237285

238-
cftime_index = xr.date_range("2000", periods=5, use_cftime=True)
286+
cftime_index = xr.date_range("2000", periods=5, freq="h", use_cftime=True)
239287
da_cftime = da(cftime_index)
240288

241-
timedelta_result = da_cftime.resample(time="2D", offset=timedelta).mean()
242-
string_result = da_cftime.resample(time="2D", offset=string).mean()
289+
timedelta_result = da_cftime.resample(time="2h", offset=timedelta).mean()
290+
string_result = da_cftime.resample(time="2h", offset=string).mean()
243291
xr.testing.assert_identical(timedelta_result, string_result)
292+
293+
294+
@pytest.mark.parametrize(("option", "value"), [("offset", "5s"), ("origin", "start")])
295+
def test_non_tick_option_warning(option, value) -> None:
296+
cftime_index = xr.date_range("2000", periods=5, use_cftime=True)
297+
da_cftime = da(cftime_index)
298+
kwargs = {option: value}
299+
with pytest.warns(RuntimeWarning, match=option):
300+
da_cftime.resample(time="ME", **kwargs)

0 commit comments

Comments
 (0)