Finish sketching out major APIs

znicholls · znicholls · commit 56ef4578a574 · 2025-01-22T11:20:02.000+01:00
diff --git a/docs/tutorials/pandas_accessor_tutorial.py b/docs/tutorials/pandas_accessor_tutorial.py
@@ -374,53 +374,304 @@ def create_df(
 )
 
 # %%
-plumes_over = ["run"]
-increase_resolution = 100
+from itertools import cycle
+
+import matplotlib.lines as mlines
+import matplotlib.patches as mpatches
+
+fig, ax = plt.subplots()
+in_ts = small_ts.loc[pix.isin(variable="variable_0")]
+quantile_over = "run"
+pre_calculated = False
+observed = True
 quantiles_plumes = (
-    (0.5, 0.8),
+    ((0.5,), 0.8),
+    ((0.25, 0.75), 0.75),
     ((0.05, 0.95), 0.5),
 )
+hue_var = "scenario"
+hue_var_label = None
+style_var = "variable"
+style_var_label = None
+palette = None
+dashes = None
+observed = True
+increase_resolution = 100
+linewidth = 2
+
+# The joy of plotting, you create everything yourself.
+# TODO: split creation from use?
+if hue_var_label is None:
+    hue_var_label = hue_var.capitalize()
+if style_var_label is None:
+    style_var_label = style_var.capitalize()
+
+quantiles = []
+for quantile_plot_def in quantiles_plumes:
+    q_def = quantile_plot_def[0]
+    try:
+        for q in q_def:
+            quantiles.append(q)
+    except TypeError:
+        quantiles.append(q_def)
+
+_palette = {} if palette is None else palette
+
+if dashes is None:
+    _dashes = {}
+    lines = ["-", "--", "-.", ":"]
+    linestyle_cycler = cycle(lines)
+else:
+    _dashes = dashes
+
+# Need to keep track of this, just in case we end up plotting only plumes
+_plotted_lines = False
+
+quantile_labels = {}
+plotted_hues = []
+plotted_styles = []
+units_l = []
+for q, alpha in quantiles_plumes:
+    for hue_value, hue_ts in in_ts.groupby(hue_var, observed=observed):
+        for style_value, hue_style_ts in hue_ts.groupby(style_var, observed=observed):
+            # Remake in inner loop to avoid leaking between plots
+            pkwargs = {"alpha": alpha}
+
+            if pre_calculated:
+                # Should add some checks here
+                raise NotImplementedError()
+                # Maybe something like the below
+                # missing_quantile = False
+                # for qt in q:
+                #     if qt not in quantiles:
+                #         warnings.warn(
+                #             f"Quantile {qt} not available for {hue_value=} {style_value=}"
+                #         )
+                #         missing_quantile = True
+
+                # if missing_quantile:
+                #     continue
+            else:
+                _pdf = (
+                    hue_ts.ct.to_df(increase_resolution=increase_resolution)
+                    .ct.groupby_except(quantile_over)
+                    .quantile(quantiles)
+                    .ct.fix_index_name_after_groupby_quantile()
+                )
 
-fig, ax = plt.subplots()
-for scenario, s_ts in small_ts.loc[pix.isin(variable="variable_0")].groupby(
-    "scenario", observed=True
-):
-    for quantiles, alpha in quantiles_plumes:
-        s_quants = (
-            s_ts.ct.to_df(increase_resolution=increase_resolution)
-            .groupby(small_ts.index.names.difference(plumes_over), observed=True)
-            .quantile(quantiles)
-        )
-        if isinstance(quantiles, tuple):
-            ax.fill_between(
-                s_quants.columns.values.squeeze(),
-                # As long as there are only two rows,
-                # doesn't matter which way around you do this.
-                s_quants.iloc[0, :].values.squeeze(),
-                s_quants.iloc[1, :].values.squeeze(),
-                alpha=alpha,
-                # label=scenario,
-            )
-        else:
-            ax.plot(
-                s_quants.columns.values.squeeze(),
-                s_quants.values.squeeze(),
-                alpha=alpha,
-                label=scenario,
+            if hue_value not in plotted_hues:
+                plotted_hues.append(hue_value)
+
+            x_vals = _pdf.columns.values.squeeze()
+            # Require ur for this to work
+            # x_vals = get_plot_vals(
+            #     self.time_axis.bounds,
+            #     "self.time_axis.bounds",
+            #     warn_if_magnitudes=warn_if_plotting_magnitudes,
+            # )
+
+            if palette is not None:
+                try:
+                    pkwargs["color"] = _palette[hue_value]
+                except KeyError:
+                    error_msg = f"{hue_value} not in palette. {palette=}"
+                    raise KeyError(error_msg)
+            elif hue_value in _palette:
+                pkwargs["color"] = _palette[hue_value]
+            # else:
+            #     # Let matplotlib default cycling do its thing
+
+            n_q_for_plume = 2
+            plot_plume = len(q) == n_q_for_plume
+            plot_line = len(q) == 1
+
+            if plot_plume:
+                label = f"{q[0] * 100:.0f}th - {q[1] * 100:.0f}th"
+
+                y_lower_vals = _pdf.loc[pix.ismatch(quantile=q[0])].values.squeeze()
+                y_upper_vals = _pdf.loc[pix.ismatch(quantile=q[1])].values.squeeze()
+                # Require ur for this to work
+                # Also need the 1D check back in too
+                # y_lower_vals = get_plot_vals(
+                #     self.time_axis.bounds,
+                #     "self.time_axis.bounds",
+                #     warn_if_magnitudes=warn_if_plotting_magnitudes,
+                # )
+                p = ax.fill_between(
+                    x_vals,
+                    y_lower_vals,
+                    y_upper_vals,
+                    label=label,
+                    **pkwargs,
+                )
+
+                if palette is None:
+                    _palette[hue_value] = p.get_facecolor()[0]
+
+            elif plot_line:
+                if style_value not in plotted_styles:
+                    plotted_styles.append(style_value)
+
+                _plotted_lines = True
+
+                if dashes is not None:
+                    try:
+                        pkwargs["linestyle"] = _dashes[style_value]
+                    except KeyError:
+                        error_msg = f"{style_value} not in dashes. {dashes=}"
+                        raise KeyError(error_msg)
+                else:
+                    if style_value not in _dashes:
+                        _dashes[style_value] = next(linestyle_cycler)
+
+                    pkwargs["linestyle"] = _dashes[style_value]
+
+                if isinstance(q[0], str):
+                    label = q[0]
+                else:
+                    label = f"{q[0] * 100:.0f}th"
+
+                y_vals = _pdf.loc[pix.ismatch(quantile=q[0])].values.squeeze()
+                # Require ur for this to work
+                # Also need the 1D check back in too
+                # y_vals = get_plot_vals(
+                #     self.time_axis.bounds,
+                #     "self.time_axis.bounds",
+                #     warn_if_magnitudes=warn_if_plotting_magnitudes,
+                # )
+                p = ax.plot(
+                    x_vals,
+                    y_vals,
+                    label=label,
+                    linewidth=linewidth,
+                    **pkwargs,
+                )[0]
+
+                if dashes is None:
+                    _dashes[style_value] = p.get_linestyle()
+
+                if palette is None:
+                    _palette[hue_value] = p.get_color()
+
+            else:
+                msg = f"quantiles to plot must be of length one or two, received: {q}"
+                raise ValueError(msg)
+
+            if label not in quantile_labels:
+                quantile_labels[label] = p
+
+            # Once we have unit handling with matplotlib, we can remove this
+            # (and if matplotlib isn't set up, we just don't do unit handling)
+            units_l.extend(_pdf.pix.unique("units").unique().tolist())
+
+    # Fake the line handles for the legend
+    hue_val_lines = [
+        mlines.Line2D([0], [0], color=_palette[hue_value], label=hue_value)
+        for hue_value in plotted_hues
+    ]
+
+    legend_items = [
+        mpatches.Patch(alpha=0, label="Quantiles"),
+        *quantile_labels.values(),
+        mpatches.Patch(alpha=0, label=hue_var_label),
+        *hue_val_lines,
+    ]
+
+    if _plotted_lines:
+        style_val_lines = [
+            mlines.Line2D(
+                [0],
+                [0],
+                linestyle=_dashes[style_value],
+                label=style_value,
+                color="gray",
+                linewidth=linewidth,
             )
+            for style_value in plotted_styles
+        ]
+        legend_items += [
+            mpatches.Patch(alpha=0, label=style_var_label),
+            *style_val_lines,
+        ]
+    elif dashes is not None:
+        warnings.warn(
+            "`dashes` was passed but no lines were plotted, the style settings "
+            "will not be used"
+        )
 
-ax.legend()
+    ax.legend(handles=legend_items, loc="best")
+
+    if len(set(units_l)) == 1:
+        ax.set_ylabel(units_l[0])
+
+    # return ax, legend_items
+
+
+quantiles
 
 # %%
-(
+demo_q = (
     small_ts.ct.to_df(increase_resolution=5)
-    .groupby(small_ts.index.names.difference(["run"]), observed=True)
+    .ct.groupby_except("run")
     .quantile([0.05, 0.5, 0.95])
+    .ct.fix_index_name_after_groupby_quantile()
+)
+demo_q
+
+# %%
+units_col = "units"
+indf = demo_q
+out_l = []
+
+# The 'shortcut'
+target_units = "Gt / yr"
+locs_target_units = ((pix.ismatch(**{units_col: "**"}), target_units),)
+locs_target_units = (
+    (pix.ismatch(scenario="scenario_2"), "Gt / yr"),
+    (pix.ismatch(scenario="scenario_0"), "kt / yr"),
+    (
+        demo_q.index.get_level_values("scenario").isin(["scenario_1"])
+        & demo_q.index.get_level_values("variable").isin(["variable_1"]),
+        "t / yr",
+    ),
 )
+# locs_target_units = (
+#     (pix.ismatch(scenario="*"), "t / yr"),
+# )
+
+converted = None
+for locator, target_unit in locs_target_units:
+    if converted is None:
+        converted = locator
+    else:
+        converted = converted | locator
+
+    def _convert_unit(idf: pd.DataFrame) -> pd.DataFrame:
+        start_units = idf.pix.unique(units_col).tolist()
+        if len(start_units) > 1:
+            msg = f"{start_units=}"
+            raise AssertionError(msg)
+
+        start_units = start_units[0]
+        conversion_factor = UR.Quantity(1, start_units).to(target_unit).m
+
+        return (idf * conversion_factor).pix.assign(**{units_col: target_unit})
+
+    out_l.append(
+        indf.loc[locator]
+        .groupby(units_col, observed=True, group_keys=False)
+        .apply(_convert_unit)
+    )
+
+out = pix.concat([*out_l, indf.loc[~converted]])
+if isinstance(indf.index.dtypes[units_col], pd.CategoricalDtype):
+    # Make sure that units stay as a category, if it started as one.
+    out = out.reset_index(units_col)
+    out[units_col] = out[units_col].astype("category")
+    out = out.set_index(units_col, append=True).reorder_levels(indf.index.names)
+
+out
 
 # %% [markdown]
-# - plot with basic control over labels
-# - plot with grouping and plumes for ranges (basically reproduce scmdata API)
 # - convert with more fine-grained control over interpolation
 #   (e.g. interpolation being passed as pd.Series)
-# - unit conversion
diff --git a/src/continuous_timeseries/pandas_accessors.py b/src/continuous_timeseries/pandas_accessors.py
@@ -218,6 +218,16 @@ def differentiate(
 
         return res
 
+    def groupby_except(
+        self, non_groupers: str | list[str], observed: bool = True
+    ) -> pd.core.groupby.generic.SeriesGroupBy:
+        if isinstance(non_groupers, str):
+            non_groupers = [non_groupers]
+
+        return self._series.groupby(
+            self._series.index.names.difference(non_groupers), observed=observed
+        )
+
     def plot(
         self,
         label: str | tuple[str, ...] | None = None,
@@ -341,6 +351,9 @@ def get_timeseries_parallel_helper(
 
     res = getattr(df, meth_to_call)(
         # TODO: make this injectable too
+        # This will also allow us to introduce an extra layer
+        # to handle the case when interpolation is a Series,
+        # rather than the same across all rows.
         Timeseries.from_pandas_series,
         axis="columns",
         interpolation=interpolation,
@@ -421,6 +434,23 @@ def to_timeseries(  # noqa: PLR0913
 
         return res
 
+    def groupby_except(
+        self, non_groupers: str | list[str], observed: bool = True
+    ) -> pd.core.groupby.generic.DataFrameGroupBy:
+        if isinstance(non_groupers, str):
+            non_groupers = [non_groupers]
+
+        return self._df.groupby(
+            self._df.index.names.difference(non_groupers), observed=observed
+        )
+
+    def fix_index_name_after_groupby_quantile(self) -> pd.DataFrame:
+        # TODO: think about doing in place
+        res = self._df.copy()
+        res.index = res.index.rename({None: "quantile"})
+
+        return res
+
 
 def register_pandas_accessor(namespace: str = "ct") -> None:
     """