Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 83 additions & 67 deletions movement/io/save_poses.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,39 +16,45 @@
def _ds_to_dlc_style_df(
ds: xr.Dataset, columns: pd.MultiIndex
) -> pd.DataFrame:
"""Convert a ``movement`` dataset to a DeepLabCut-style DataFrame.

Parameters
----------
ds : xarray.Dataset
``movement`` dataset containing pose tracks, confidence scores,
and associated metadata.
columns : pandas.MultiIndex
DeepLabCut-style multi-index columns

Returns
-------
pandas.DataFrame

"""
# Concatenate the pose tracks and confidence scores into one array
"""Convert a ``movement`` dataset to a DLC-style DataFrame."""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You seem to have (inadvertedly?) deleted a big part of the docstring. The docstring should be restored to its previouse state.

# Check shapes of position and confidence data
position_shape = ds.position.data.shape
confidence_shape = ds.confidence.data.shape
print("Position shape:", position_shape)
print("Confidence shape:", confidence_shape)
Comment on lines +23 to +24
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Print statements can be useful for debugging but we don't want them in the final code. Please remove these and all other print statements you've introduced.


# Concatenate the pose tracks and confi scores into one array
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why change this comment?

tracks_with_scores = np.concatenate(
(
ds.position.data,
ds.confidence.data[:, np.newaxis, ...],
),
axis=1,
)

# Check the shape after concatenation
print("Tracks with scores shape:", tracks_with_scores.shape)

# Reverse the order of the dimensions except for the time dimension
transpose_order = [0] + list(range(tracks_with_scores.ndim - 1, 0, -1))
tracks_with_scores = tracks_with_scores.transpose(transpose_order)

# Check the shape of the data
expected_columns = columns.shape[0]
actual_shape = tracks_with_scores.reshape(ds.sizes["time"], -1).shape[1]

if actual_shape != expected_columns:
raise ValueError(f"""Shape of passed values is {actual_shape},
but indices imply {expected_columns}.""")
Comment on lines +42 to +48
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why this additional check was introduced here. Also, we prefer to log our error messages, as decribed in the contributing guide here: https://movement.neuroinformatics.dev/community/contributing.html#logging


# Create DataFrame with multi-index columns
df = pd.DataFrame(
data=tracks_with_scores.reshape(ds.sizes["time"], -1),
index=np.arange(ds.sizes["time"], dtype=int),
columns=columns,
dtype=float,
)

return df


Expand Down Expand Up @@ -77,7 +83,9 @@ def _save_dlc_df(filepath: Path, df: pd.DataFrame) -> None:


def to_dlc_style_df(
ds: xr.Dataset, split_individuals: bool = False
ds: xr.Dataset,
split_individuals: bool = False,
dlc_df_format: Literal["single-animal", "multi-animal"] = "multi-animal",
) -> pd.DataFrame | dict[str, pd.DataFrame]:
"""Convert a ``movement`` dataset to DeepLabCut-style DataFrame(s).

Expand All @@ -89,28 +97,18 @@ def to_dlc_style_df(
split_individuals : bool, optional
If True, return a dictionary of DataFrames per individual, with
individual names as keys. If False (default), return a single
DataFrame for all individuals (see Notes).
DataFrame for all individuals.
dlc_df_format : {"single-animal", "multi-animal"}, optional
Specifies the DLC dataframe format. "single-animal" produces the
older format (<DLC 2.0) without the "individuals" column level,
while "multi-animal" includes it (DLC >=2.0).
Defaults to "multi-animal".
Comment on lines 97 to +105
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's nice that we can now specify the DLC dataframe format independently of splitting, but there is one combination which doesn't make sense: datasets with a single individual are incompatible with split_individuals=True, i.e. "splitting" doesn't make sense if there's nothing to split. If a user passes split_individuals=True for a single-animal dataset, we should treat split_individuals as False and raise a warning to the user, with warnings.warn.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case should be caught in the code and also mentioned in the docstring.


Returns
-------
pandas.DataFrame or dict
DeepLabCut-style pandas DataFrame or dictionary of DataFrames.

Notes
-----
The DataFrame(s) will have a multi-index column with the following levels:
"scorer", "bodyparts", "coords" (if split_individuals is True),
or "scorer", "individuals", "bodyparts", "coords"
(if split_individuals is False).

Regardless of the provenance of the points-wise confidence scores,
they will be referred to as "likelihood", and stored in
the "coords" level (as DeepLabCut expects).

See Also
--------
to_dlc_file : Save dataset directly to a DeepLabCut-style .h5 or .csv file.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You seem to have deleted the docstrign sections for "Notes" and "See Also". The should be brought back and the Notes adjusted according to the new implementation.

"""
_validate_dataset(ds)
scorer = ["movement"]
Expand All @@ -125,34 +123,52 @@ def to_dlc_style_df(
individual_data = ds.sel(individuals=individual)

index_levels = ["scorer", "bodyparts", "coords"]
if dlc_df_format == "multi-animal":
index_levels.insert(1, "individuals")

columns = pd.MultiIndex.from_product(
[scorer, bodyparts, coords], names=index_levels
[scorer]
+ ([individuals] if dlc_df_format == "multi-animal" else [])
+ [bodyparts, coords],
names=index_levels,
)

df = _ds_to_dlc_style_df(individual_data, columns)
df_dict[individual] = df

logger.info(
"Converted poses dataset to DeepLabCut-style DataFrames "
"per individual."
f"""Converted poses dataset to DeepLabCut-style DataFrames
per individual using '{dlc_df_format}' format."""
)
return df_dict
else:
index_levels = ["scorer", "individuals", "bodyparts", "coords"]
index_levels = (
["scorer", "individuals", "bodyparts", "coords"]
if dlc_df_format == "multi-animal"
else ["scorer", "bodyparts", "coords"]
)

columns = pd.MultiIndex.from_product(
[scorer, individuals, bodyparts, coords], names=index_levels
[scorer]
+ ([individuals] if dlc_df_format == "multi-animal" else [])
+ [bodyparts, coords],
names=index_levels,
)

df_all = _ds_to_dlc_style_df(ds, columns)

logger.info("Converted poses dataset to DeepLabCut-style DataFrame.")
logger.info(
f"""Converted poses dataset to DeepLabCut-style
DataFrame using '{dlc_df_format}' format."""
)
return df_all


def to_dlc_file(
ds: xr.Dataset,
file_path: str | Path,
split_individuals: bool | Literal["auto"] = "auto",
dlc_df_format: Literal["single-animal", "multi-animal"] = "multi-animal",
) -> None:
"""Save a ``movement`` dataset to DeepLabCut file(s).

Expand All @@ -165,38 +181,27 @@ def to_dlc_file(
Path to the file to save the poses to. The file extension
must be either .h5 (recommended) or .csv.
split_individuals : bool or "auto", optional
Whether to save individuals to separate files or to the same file
(see Notes). Defaults to "auto".
Whether to save individuals to separate files or to the same file.
Defaults to "auto" (determined based on dataset individuals).
dlc_df_format : {"single-animal", "multi-animal"}, optional
Specifies the DLC dataframe format. "single-animal" produces the
older format (<DLC 2.0) without the "individuals" column level,
while "multi-animal" includes it (DLC >=2.0).
Defaults to "multi-animal".

Notes
-----
If ``split_individuals`` is True, each individual will be saved to a
separate file, formatted as in a single-animal DeepLabCut project
(without the "individuals" column level). The individual's name will be
appended to the file path, just before the file extension, e.g.
"/path/to/filename_individual1.h5". If False, all individuals will be
saved to the same file, formatted as in a multi-animal DeepLabCut project
(with the "individuals" column level). The file path will not be modified.
If "auto", the argument's value is determined based on the number of
individuals in the dataset: True if there is only one, False otherwise.

See Also
--------
to_dlc_style_df : Convert dataset to DeepLabCut-style DataFrame(s).

Examples
--------
>>> from movement.io import save_poses, load_poses
>>> ds = load_poses.from_sleap_file("/path/to/file_sleap.analysis.h5")
>>> save_poses.to_dlc_file(ds, "/path/to/file_dlc.h5")
Comment on lines 183 to -191
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also here you've deleted the See Also and Examples sections, they need to be brought back.

separate file, but the DLC format is determined by ``dlc_df_format``.
If False, all individuals will be saved to the same file, also formatted
according to ``dlc_df_format``.

""" # noqa: D301
file = _validate_file_path(file_path, expected_suffix=[".csv", ".h5"])

# Sets default behaviour for the function
# Determine splitting behavior
if split_individuals == "auto":
split_individuals = _auto_split_individuals(ds)

elif not isinstance(split_individuals, bool):
raise logger.error(
ValueError(
Expand All @@ -205,19 +210,30 @@ def to_dlc_file(
)
)

# Validate DLC format
if dlc_df_format not in ["single-animal", "multi-animal"]:
raise log_error(
ValueError,
f"""Invalid value for 'dlc_df_format': {dlc_df_format}.
Expected 'single-animal' or 'multi-animal'.""",
)
Comment on lines +214 to +219
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


if split_individuals:
# split the dataset into a dictionary of dataframes per individual
df_dict = to_dlc_style_df(ds, split_individuals=True)
# Split dataset into multiple files while maintaining DLC format
df_dict = to_dlc_style_df(
ds, split_individuals=True, dlc_df_format=dlc_df_format
)

for key, df in df_dict.items():
# the key is the individual's name
filepath = f"{file.path.with_suffix('')}_{key}{file.path.suffix}"
if isinstance(df, pd.DataFrame):
_save_dlc_df(Path(filepath), df)
logger.info(f"Saved poses for individual {key} to {file.path}.")
logger.info(f"Saved poses for individual {key} to {filepath}.")
else:
# convert the dataset to a single dataframe for all individuals
df_all = to_dlc_style_df(ds, split_individuals=False)
# Convert dataset to a single dataframe using the chosen DLC format
df_all = to_dlc_style_df(
ds, split_individuals=False, dlc_df_format=dlc_df_format
)
if isinstance(df_all, pd.DataFrame):
_save_dlc_df(file.path, df_all)
logger.info(f"Saved poses dataset to {file.path}.")
Expand Down
Loading