Skip to content

Following pandas 3.0, make Day cftime offset non-Tick-like #10650

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,15 @@ New Features

Breaking changes
~~~~~~~~~~~~~~~~
- Following pandas 3.0 (`pandas-dev/pandas#61985
<https://github.com/pandas-dev/pandas/pull/61985>`_), ``Day`` is no longer
considered a ``Tick``-like frequency. Therefore non-``None`` values of
``offset`` and non-``"start_day"`` values of ``origin`` will have no effect
when resampling to a daily frequency for objects indexed by a
:py:class:`xarray.CFTimeIndex`. As in `pandas-dev/pandas#62101
<https://github.com/pandas-dev/pandas/pull/62101>`_ warnings will be emitted
if non default values are provided in this context (:issue:`10640`,
:pull:`10650`). By `Spencer Clark <https://github.com/spencerkclark>`_.


Deprecations
Expand Down
32 changes: 17 additions & 15 deletions xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,14 +609,19 @@ def rollback(self, date):
return date - YearEnd(month=self.month)


class Day(Tick):
_freq = "D"
class Day(BaseCFTimeOffset):
"""Day offset following definition in pandas/_libs/tslibs/offsets.pyx"""

def as_timedelta(self) -> timedelta:
return timedelta(days=self.n)
_freq = "D"

def __apply__(self, other):
return other + self.as_timedelta()
if isinstance(other, Day):
return Day(self.n + other.n)
else:
return other + timedelta(days=self.n)

def onOffset(self, date) -> bool:
return True


class Hour(Tick):
Expand Down Expand Up @@ -719,7 +724,7 @@ def _generate_anchored_offsets(

# pandas defines these offsets as "Tick" objects, which for instance have
# distinct behavior from monthly or longer frequencies in resample.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# distinct behavior from monthly or longer frequencies in resample.
# distinct behavior from daily or longer frequencies in resample.

CFTIME_TICKS = (Day, Hour, Minute, Second)
CFTIME_TICKS = (Hour, Minute, Second)


def _generate_anchored_deprecated_frequencies(
Expand Down Expand Up @@ -801,16 +806,13 @@ def delta_to_tick(delta: timedelta | pd.Timedelta) -> Tick:
"nanoseconds to 'CFTimeOffset' object"
)
if delta.microseconds == 0:
if delta.seconds == 0:
return Day(n=delta.days)
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(n=seconds // 3600)
elif seconds % 60 == 0:
return Minute(n=seconds // 60)
else:
seconds = delta.days * 86400 + delta.seconds
if seconds % 3600 == 0:
return Hour(n=seconds // 3600)
elif seconds % 60 == 0:
return Minute(n=seconds // 60)
else:
return Second(n=seconds)
return Second(n=seconds)
# Regardless of the days and seconds this will always be a Millisecond
# or Microsecond object
elif delta.microseconds % 1_000 == 0:
Expand Down
14 changes: 10 additions & 4 deletions xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def shift( # type: ignore[override] # freq is typed Any, we are more precise
>>> index.shift(1, "ME")
CFTimeIndex([2000-02-29 00:00:00],
dtype='object', length=1, calendar='standard', freq=None)
>>> index.shift(1.5, "D")
>>> index.shift(1.5, "24h")
CFTimeIndex([2000-02-01 12:00:00],
dtype='object', length=1, calendar='standard', freq=None)
"""
Expand Down Expand Up @@ -707,16 +707,22 @@ def freq(self):

def _round_via_method(self, freq, method):
"""Round dates using a specified method."""
from xarray.coding.cftime_offsets import CFTIME_TICKS, to_offset
from xarray.coding.cftime_offsets import CFTIME_TICKS, Day, to_offset

if not self._data.size:
return CFTimeIndex(np.array(self))

offset = to_offset(freq)
if not isinstance(offset, CFTIME_TICKS):
if isinstance(offset, Day):
# Following pandas, "In the 'round' context, Day unambiguously
# means 24h, not calendar-day"
offset_as_timedelta = timedelta(days=offset.n)
elif isinstance(offset, CFTIME_TICKS):
offset_as_timedelta = offset.as_timedelta()
else:
raise ValueError(f"{offset} is a non-fixed frequency")

unit = _total_microseconds(offset.as_timedelta())
unit = _total_microseconds(offset_as_timedelta)
values = self.asi8
rounded = method(values, unit)
return _cftimeindex_from_i8(rounded, self.date_type, self.name)
Expand Down
18 changes: 18 additions & 0 deletions xarray/core/resample_cftime.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import pandas as pd

from xarray.coding.cftime_offsets import (
CFTIME_TICKS,
BaseCFTimeOffset,
MonthEnd,
QuarterEnd,
Expand All @@ -56,6 +57,7 @@
)
from xarray.coding.cftimeindex import CFTimeIndex
from xarray.core.types import SideOptions
from xarray.core.utils import emit_user_level_warning

if typing.TYPE_CHECKING:
from xarray.core.types import CFTimeDatetime, ResampleCompatible
Expand Down Expand Up @@ -84,6 +86,22 @@ def __init__(
self.freq = to_offset(freq)
self.origin = origin

if not isinstance(self.freq, CFTIME_TICKS):
if offset is not None:
message = (
"The 'offset' keyword does not take effect when "
"resampling with a 'freq' that is not Tick-like (h, m, s, "
"ms, us)"
)
emit_user_level_warning(message, category=RuntimeWarning)
if origin != "start_day":
message = (
"The 'origin' keyword does not take effect when "
"resampling with a 'freq' that is not Tick-like (h, m, s, "
"ms, us)"
)
emit_user_level_warning(message, category=RuntimeWarning)

if isinstance(self.freq, MonthEnd | QuarterEnd | YearEnd) or self.origin in [
"end",
"end_day",
Expand Down
25 changes: 23 additions & 2 deletions xarray/tests/test_cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,6 @@ def test_eq(a, b):
(Second(), 3, Second(n=3)),
(Millisecond(), 3, Millisecond(n=3)),
(Microsecond(), 3, Microsecond(n=3)),
(Day(), 0.5, Hour(n=12)),
Copy link
Member Author

@spencerkclark spencerkclark Aug 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that Day is non-Tick-like, multiplying by a float is no longer valid in pandas:

>>> import pandas as pd
>>> 2.0 * pd.tseries.frequencies.Day(2)
Traceback (most recent call last):
  File "<python-input-2>", line 1, in <module>
    2.0 * pd.tseries.frequencies.Day(2)
    ~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
TypeError: unsupported operand type(s) for *: 'float' and 'pandas._libs.tslibs.offsets.Day'

A similar error is raised now for the cftime version of this offset. This is tested in b87bb35.

(Hour(), 0.5, Minute(n=30)),
(Hour(), -0.5, Minute(n=-30)),
(Minute(), 0.5, Second(n=30)),
Expand All @@ -472,7 +471,15 @@ def test_mul_float_multiple_next_higher_resolution():

@pytest.mark.parametrize(
"offset",
[YearBegin(), YearEnd(), QuarterBegin(), QuarterEnd(), MonthBegin(), MonthEnd()],
[
YearBegin(),
YearEnd(),
QuarterBegin(),
QuarterEnd(),
MonthBegin(),
MonthEnd(),
Day(),
],
ids=_id_func,
)
def test_nonTick_offset_multiplied_float_error(offset):
Expand Down Expand Up @@ -534,6 +541,20 @@ def test_add_sub_monthly(offset, expected_date_args, calendar):
assert result == expected


def test_add_daily_offsets() -> None:
offset = Day(n=2)
expected = Day(n=4)
result = offset + offset
assert result == expected


def test_subtract_daily_offsets() -> None:
offset = Day(n=2)
expected = Day(n=0)
result = offset - offset
assert result == expected


@pytest.mark.parametrize(("offset", "expected_date_args"), _ADD_TESTS, ids=_id_func)
def test_radd_sub_monthly(offset, expected_date_args, calendar):
date_type = get_date_type(calendar)
Expand Down
3 changes: 1 addition & 2 deletions xarray/tests/test_cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,7 +830,6 @@ def test_cftimeindex_add_timedeltaindex(calendar) -> None:
@pytest.mark.parametrize(
"freq,units",
[
("D", "D"),
Copy link
Member Author

@spencerkclark spencerkclark Aug 17, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that Day is non-Tick-like, multiplying by a float is no longer valid in pandas, so shift with a float value and Day offset will raise an error:

>>> import pandas as pd
>>> times = pd.date_range("2000", periods=5)
>>> times.shift(0.5, "D")
Traceback (most recent call last):
  File "<python-input-2>", line 1, in <module>
    times.shift(0.5, "D")
    ~~~~~~~~~~~^^^^^^^^^^
  File "/Users/spencer/software/pandas/pandas/core/indexes/datetimelike.py", line 514, in shift
    start = self[0] + periods * self.freq
                      ~~~~~~~~^~~~~~~~~~~
TypeError: unsupported operand type(s) for *: 'float' and 'pandas._libs.tslibs.offsets.Day'

A similar error is raised now for attempting to shift a CFTimeIndex in this way. This is tested in b87bb35.

("h", "h"),
("min", "min"),
("s", "s"),
Expand All @@ -856,7 +855,7 @@ def test_cftimeindex_shift_float_us() -> None:


@requires_cftime
@pytest.mark.parametrize("freq", ["YS", "YE", "QS", "QE", "MS", "ME"])
@pytest.mark.parametrize("freq", ["YS", "YE", "QS", "QE", "MS", "ME", "D"])
def test_cftimeindex_shift_float_fails_for_non_tick_freqs(freq) -> None:
a = xr.date_range("2000", periods=3, freq="D", use_cftime=True)
with pytest.raises(TypeError, match="unsupported operand type"):
Expand Down
91 changes: 74 additions & 17 deletions xarray/tests/test_cftimeindex_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@
import numpy as np
import pandas as pd
import pytest
from packaging.version import Version

import xarray as xr
from xarray.coding.cftime_offsets import _new_to_legacy_freq
from xarray.coding.cftime_offsets import (
CFTIME_TICKS,
Day,
_new_to_legacy_freq,
to_offset,
)
from xarray.coding.cftimeindex import CFTimeIndex
from xarray.core.resample_cftime import CFTimeGrouper
from xarray.tests import has_pandas_3

cftime = pytest.importorskip("cftime")

Expand Down Expand Up @@ -54,6 +59,20 @@
]


def has_tick_resample_freq(freqs):
resample_freq, _ = freqs
resample_freq_as_offset = to_offset(resample_freq)
return isinstance(resample_freq_as_offset, CFTIME_TICKS)


def has_non_tick_resample_freq(freqs):
return not has_tick_resample_freq(freqs)


FREQS_WITH_TICK_RESAMPLE_FREQ = list(filter(has_tick_resample_freq, FREQS))
FREQS_WITH_NON_TICK_RESAMPLE_FREQ = list(filter(has_non_tick_resample_freq, FREQS))


def compare_against_pandas(
da_datetimeindex,
da_cftimeindex,
Expand Down Expand Up @@ -110,22 +129,14 @@ def da(index) -> xr.DataArray:
)


@pytest.mark.parametrize("freqs", FREQS, ids=lambda x: "{}->{}".format(*x))
@pytest.mark.parametrize(
"freqs", FREQS_WITH_TICK_RESAMPLE_FREQ, ids=lambda x: "{}->{}".format(*x)
)
@pytest.mark.parametrize("closed", [None, "left", "right"])
@pytest.mark.parametrize("label", [None, "left", "right"])
@pytest.mark.parametrize("offset", [None, "5s"], ids=lambda x: f"{x}")
def test_resample(freqs, closed, label, offset) -> None:
def test_resample_with_tick_resample_freq(freqs, closed, label, offset) -> None:
initial_freq, resample_freq = freqs
if (
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since our minimum version of pandas is greater than or equal to 2.2, I took the opportunity to remove this old test-skipping logic.

resample_freq == "4001D"
and closed == "right"
and Version(pd.__version__) < Version("2.2")
):
pytest.skip(
"Pandas fixed a bug in this test case in version 2.2, which we "
"ported to xarray, so this test no longer produces the same "
"result as pandas for earlier pandas versions."
)
start = "2000-01-01T12:07:01"
origin = "start"

Expand All @@ -149,6 +160,43 @@ def test_resample(freqs, closed, label, offset) -> None:
)


@pytest.mark.parametrize(
"freqs", FREQS_WITH_NON_TICK_RESAMPLE_FREQ, ids=lambda x: "{}->{}".format(*x)
)
@pytest.mark.parametrize("closed", [None, "left", "right"])
@pytest.mark.parametrize("label", [None, "left", "right"])
def test_resample_with_non_tick_resample_freq(freqs, closed, label) -> None:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the offset and origin options are not relevant when resampling to a non-Tick frequency, I split these tests off into their function to avoid warnings and avoid unnecessary duplication of tests.

initial_freq, resample_freq = freqs
resample_freq_as_offset = to_offset(resample_freq)
if isinstance(resample_freq_as_offset, Day) and not has_pandas_3:
pytest.skip("Only valid for pandas >= 3.0")
start = "2000-01-01T12:07:01"

# Set offset and origin to their default values since they have no effect
# on resampling data with a non-tick resample frequency.
offset = None
origin = "start_day"

datetime_index = pd.date_range(
start=start, periods=5, freq=_new_to_legacy_freq(initial_freq)
)
cftime_index = xr.date_range(
start=start, periods=5, freq=initial_freq, use_cftime=True
)
da_datetimeindex = da(datetime_index)
da_cftimeindex = da(cftime_index)

compare_against_pandas(
da_datetimeindex,
da_cftimeindex,
resample_freq,
closed=closed,
label=label,
offset=offset,
origin=origin,
)


@pytest.mark.parametrize(
("freq", "expected"),
[
Expand Down Expand Up @@ -228,7 +276,7 @@ def test_invalid_offset_error(offset: str | int) -> None:
cftime_index = xr.date_range("2000", periods=5, use_cftime=True)
da_cftime = da(cftime_index)
with pytest.raises(ValueError, match="offset must be"):
da_cftime.resample(time="2D", offset=offset) # type: ignore[arg-type]
da_cftime.resample(time="2h", offset=offset) # type: ignore[arg-type]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switched to "2h", since "2D" is no longer a Tick-like frequency.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this seems like a major break. I've certainly used "D" for resampling to a daily frequency; and almost certainly used "5D" or "7D". I think we should do a deprecation cycle.



def test_timedelta_offset() -> None:
Expand All @@ -238,6 +286,15 @@ def test_timedelta_offset() -> None:
cftime_index = xr.date_range("2000", periods=5, use_cftime=True)
da_cftime = da(cftime_index)

timedelta_result = da_cftime.resample(time="2D", offset=timedelta).mean()
string_result = da_cftime.resample(time="2D", offset=string).mean()
timedelta_result = da_cftime.resample(time="2h", offset=timedelta).mean()
string_result = da_cftime.resample(time="2h", offset=string).mean()
Comment on lines +289 to +290
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switched to "2h", since "2D" is no longer a Tick-like frequency.

xr.testing.assert_identical(timedelta_result, string_result)


@pytest.mark.parametrize(("option", "value"), [("offset", "5s"), ("origin", "start")])
def test_non_tick_option_warning(option, value) -> None:
cftime_index = xr.date_range("2000", periods=5, use_cftime=True)
da_cftime = da(cftime_index)
kwargs = {option: value}
with pytest.warns(RuntimeWarning, match=option):
da_cftime.resample(time="ME", **kwargs)
Loading