diff --git a/datashuttle/configs/canonical_tags.py b/datashuttle/configs/canonical_tags.py index ee7295665..25af6b085 100644 --- a/datashuttle/configs/canonical_tags.py +++ b/datashuttle/configs/canonical_tags.py @@ -9,5 +9,24 @@ def tags(tag_name: str) -> str: "datetime": "@DATETIME@", "to": "@TO@", "*": "@*@", + "DATETO": "@DATETO@", + "TIMETO": "@TIMETO@", + "DATETIMETO": "@DATETIMETO@", } return tags[tag_name] + + +def get_datetime_formats() -> dict: + """Get all datetime format strings. + + Returns + ------- + dict + A dictionary containing format strings for datetime, time, and date + + """ + return { + "datetime": "%Y%m%dT%H%M%S", + "time": "%H%M%S", + "date": "%Y%m%d", + } diff --git a/datashuttle/utils/data_transfer.py b/datashuttle/utils/data_transfer.py index 4129bb712..7eb56dffe 100644 --- a/datashuttle/utils/data_transfer.py +++ b/datashuttle/utils/data_transfer.py @@ -464,7 +464,7 @@ def get_processed_names( processed_names = formatting.check_and_format_names( names_checked, prefix ) - processed_names = folders.search_for_wildcards( + processed_names = folders.search_with_tags( self.__cfg, self.__base_folder, self.__local_or_central, diff --git a/datashuttle/utils/folders.py b/datashuttle/utils/folders.py index bf3e6c973..9470dc1bc 100644 --- a/datashuttle/utils/folders.py +++ b/datashuttle/utils/folders.py @@ -6,9 +6,11 @@ Any, Dict, List, + Literal, Optional, Tuple, Union, + overload, ) if TYPE_CHECKING: @@ -18,11 +20,14 @@ from datashuttle.utils.custom_types import TopLevelFolder import fnmatch +import re +from datetime import datetime from pathlib import Path from datashuttle.configs import canonical_folders, canonical_tags from datashuttle.utils import rclone, utils, validation from datashuttle.utils.custom_exceptions import NeuroBlueprintError +from datashuttle.utils.utils import get_values_from_bids_formatted_name # ----------------------------------------------------------------------------- # Create Folders @@ -71,6 +76,12 @@ def create_folder_trees( "datatype is deprecated in 0.6.0" ) + # Initialize all_paths with required keys + all_paths: dict = { + "sub": [], + "ses": [], + } + if datatype_passed: error_message = validation.check_datatypes_are_valid( datatype, allow_all=True @@ -78,13 +89,6 @@ def create_folder_trees( if error_message: utils.log_and_raise_error(error_message, NeuroBlueprintError) - all_paths: Dict = {} - else: - all_paths = { - "sub": [], - "ses": [], - } - for sub in sub_names: sub_path = cfg.build_project_path( "local", @@ -260,7 +264,7 @@ def search_project_for_sub_or_ses_names( """ # Search local and central for folders that begin with "sub-*" - local_foldernames, _ = search_sub_or_ses_level( + local_foldernames, _ = search_sub_or_ses_level( # type: ignore cfg, cfg.get_base_folder("local", top_level_folder), "local", @@ -273,7 +277,7 @@ def search_project_for_sub_or_ses_names( central_foldernames: List if include_central: - central_foldernames, _ = search_sub_or_ses_level( + central_foldernames, _ = search_sub_or_ses_level( # type: ignore cfg, cfg.get_base_folder("central", top_level_folder), "central", @@ -401,25 +405,69 @@ def process_glob_to_find_datatype_folders( return zip(ses_folder_keys, ses_folder_values) +# ----------------------------------------------------------------------------- # Wildcards # ----------------------------------------------------------------------------- -def search_for_wildcards( +def search_with_tags( cfg: Configs, base_folder: Path, local_or_central: str, all_names: List[str], sub: Optional[str] = None, ) -> List[str]: - """Handle wildcard flag in upload or download. + """Handle wildcard and datetime range searching in names during upload or download. + + There are two types of special patterns that can be used in names: + 1. Wildcards: Names containing @*@ will be replaced with "*" for glob pattern matching + 2. Datetime ranges: Names containing @DATETO@, @TIMETO@, or @DATETIMETO@ will be used + to filter folders within a specific datetime range + + For datetime ranges, the format must be: + - date: YYYYMMDD@DATETO@YYYYMMDD (e.g., "20240101@DATETO@20241231") + - time: HHMMSS@TIMETO@HHMMSS (e.g., "000000@TIMETO@235959") + - datetime: YYYYMMDDTHHMMSS@DATETIMETO@YYYYMMDDTHHMMSS + + Parameters + ---------- + cfg + datashuttle project configuration + base_folder + folder to search for wildcards in + local_or_central + "local" or "central" project path to search in + all_names + list of names that may contain wildcards or datetime ranges. If sub is + passed, these are treated as session names. If sub is None, they are + treated as subject names + sub + optional subject to search for sessions in. If not provided, + will search for subjects rather than sessions - All names in name are searched for @*@ string, and replaced - with single * for glob syntax. If sub is passed, it is - assumes all_names is ses_names and the sub folder is searched - for ses_names matching the name including wildcard. Otherwise, - if sub is None it is assumed all_names are sub names and - the level above is searched. + Returns + ------- + List[str] + A list of matched folder names after wildcard expansion and datetime filtering. + For datetime ranges, only folders with timestamps within the specified range + will be included. + + Examples + -------- + Wildcards: + >>> search_with_tags(cfg, path, "local", ["sub-@*@"]) + ["sub-001", "sub-002", "sub-003"] + + Date range: + >>> search_with_tags( + ... cfg, path, "local", ["sub-001_20240101@DATETO@20241231_id-*"] + ... ) + ["sub-001_date-20240315_id-1", "sub-001_date-20240401_id-2"] + + Time range: + >>> search_with_tags(cfg, path, "local", ["sub-002_000000@TIMETO@120000"]) + ["sub-002_time-083000", "sub-002_time-113000"] + ======= Parameters ---------- @@ -453,31 +501,326 @@ def search_for_wildcards( but where @*@-containing names have been replaced with search results. + >>>>>>> upstream/main + """ new_all_names: List[str] = [] + for name in all_names: + if not ( + canonical_tags.tags("*") in name + or canonical_tags.tags("DATETO") in name + or canonical_tags.tags("TIMETO") in name + or canonical_tags.tags("DATETIMETO") in name + ): + # If no special tags, just add the name as is + new_all_names.append(name) + continue + + # Handle wildcard replacement first if present + search_str = name if canonical_tags.tags("*") in name: - name = name.replace(canonical_tags.tags("*"), "*") + search_str = search_str.replace(canonical_tags.tags("*"), "*") + + # Handle datetime ranges + format_type = None + tag = None + + if canonical_tags.tags("DATETO") in search_str: + tag = canonical_tags.tags("DATETO") + format_type = "date" + elif canonical_tags.tags("TIMETO") in search_str: + tag = canonical_tags.tags("TIMETO") + format_type = "time" + elif canonical_tags.tags("DATETIMETO") in search_str: + tag = canonical_tags.tags("DATETIMETO") + format_type = "datetime" + + if format_type is not None: + assert tag is not None + search_str = format_and_validate_datetime_search_str( + search_str, format_type, tag + ) - matching_names: List[str] + # Use the helper function to perform the glob search if sub: - matching_names = search_sub_or_ses_level( # type: ignore - cfg, base_folder, local_or_central, sub, search_str=name + matching_names = search_sub_or_ses_level( + cfg, + base_folder, + local_or_central, + sub, + search_str=search_str, )[0] else: - matching_names = search_sub_or_ses_level( # type: ignore - cfg, base_folder, local_or_central, search_str=name + matching_names = search_sub_or_ses_level( + cfg, base_folder, local_or_central, search_str=search_str )[0] - new_all_names += matching_names + # Filter results by datetime range + start_timepoint, end_timepoint = ( + strip_start_end_date_from_datetime_tag(name, format_type, tag) + ) + matching_names = filter_names_by_datetime_range( + matching_names, format_type, start_timepoint, end_timepoint + ) + new_all_names.extend(matching_names) else: - new_all_names += [name] + # No datetime range, just perform the glob search with wildcards + if sub: + matching_names = search_sub_or_ses_level( + cfg, + base_folder, + local_or_central, + sub, + search_str=search_str, + )[0] + else: + matching_names = search_sub_or_ses_level( + cfg, base_folder, local_or_central, search_str=search_str + )[0] + new_all_names.extend(matching_names) - new_all_names = list( - set(new_all_names) - ) # remove duplicate names in case of wildcard overlap + return list(set(new_all_names)) # Remove duplicates - return new_all_names + +def filter_names_by_datetime_range( + names: List[str], + format_type: str, + start_timepoint: datetime, + end_timepoint: datetime, +) -> List[str]: + """Filter a list of names based on a datetime range. + + Assumes all names contain the format_type pattern (e.g., date-*, time-*) + as they were searched using this pattern. + + Parameters + ---------- + names + List of names to filter, all containing the datetime pattern + format_type + One of "datetime", "time", or "date" + start_timepoint + Start of the datetime range + end_timepoint + End of the datetime range + + Returns + ------- + List[str] + Filtered list of names that fall within the datetime range + + Raises + ------ + ValueError + If any datetime value does not match the expected ISO format + + """ + filtered_names: List[str] = [] + for candidate in names: + candidate_basename = ( + candidate if isinstance(candidate, str) else candidate.name + ) + value = get_values_from_bids_formatted_name( + [candidate_basename], format_type + )[0] + + try: + candidate_timepoint = datetime_object_from_string( + value, format_type + ) + except ValueError: + utils.log_and_raise_error( + f"Invalid {format_type} format in name {candidate_basename}. " + f"Expected ISO format: {canonical_tags.get_datetime_formats()[format_type]}", + ValueError, + ) + + if start_timepoint <= candidate_timepoint <= end_timepoint: + filtered_names.append(candidate) + + return filtered_names + + +# ----------------------------------------------------------------------------- +# Datetime Tag Functions +# ----------------------------------------------------------------------------- + + +def get_expected_datetime_len(format_type: str) -> int: + """Get the expected length of characters for a datetime format. + + Parameters + ---------- + format_type + One of "datetime", "time", or "date" + + Returns + ------- + int + The number of characters expected for the format + + """ + format_str = canonical_tags.get_datetime_formats()[format_type] + today = datetime.now() + return len(today.strftime(format_str)) + + +def find_datetime_in_name( + name: str, format_type: str, tag: str +) -> tuple[str | Any, ...] | None: + """Find and extract datetime values from a name using a regex pattern. + + Parameters + ---------- + name + The name containing the datetime range + e.g. "sub-001_20240101@DATETO@20250101_id-*" + format_type + One of "datetime", "time", or "date" + tag + The tag used for the range (e.g. @DATETO@) + + Returns + ------- + tuple[str, str] | None + A tuple containing (start_datetime_str, end_datetime_str) if found, + None if no match is found + + """ + expected_len = get_expected_datetime_len(format_type) + full_tag_regex = ( + rf"(\d{{{expected_len}}}){re.escape(tag)}(\d{{{expected_len}}})" + ) + match = re.search(full_tag_regex, name) + return match.groups() if match else None + + +def strip_start_end_date_from_datetime_tag( + search_str: str, format_type: str, tag: str +) -> tuple[datetime, datetime]: + """Extract and validate start and end datetime values from a search string. + + Parameters + ---------- + search_str + The search string containing the datetime range + e.g. "sub-001_20240101T000000@DATETIMETO@20250101T235959" + format_type + One of "datetime", "time", or "date" + tag + The tag used for the range (e.g. @DATETIMETO@) + + Returns + ------- + tuple[datetime, datetime] + A tuple containing (start_timepoint, end_timepoint) + + Raises + ------ + NeuroBlueprintError + If the datetime format is invalid, the range is malformed, + or end datetime is before start datetime + + """ + expected_len = get_expected_datetime_len(format_type) + full_tag_regex = ( + rf"(\d{{{expected_len}}}){re.escape(tag)}(\d{{{expected_len}}})" + ) + match = re.search(full_tag_regex, search_str) + + if not match: + utils.log_and_raise_error( + f"Invalid {format_type} range format in search string: {search_str}. Ensure the format matches the expected pattern: {canonical_tags.get_datetime_formats()[format_type]}.", + NeuroBlueprintError, + ) + + assert match is not None, "type narrow `match`" + start_str, end_str = match.groups() + + try: + start_timepoint = datetime_object_from_string(start_str, format_type) + end_timepoint = datetime_object_from_string(end_str, format_type) + except ValueError as e: + utils.log_and_raise_error( + f"Invalid {format_type} format in search string: {search_str}. Error: {str(e)}", + NeuroBlueprintError, + ) + + if end_timepoint < start_timepoint: + utils.log_and_raise_error( + f"End {format_type} is before start {format_type}. Ensure the end datetime is after the start datetime.", + NeuroBlueprintError, + ) + + return start_timepoint, end_timepoint + + +def format_and_validate_datetime_search_str( + search_str: str, format_type: str, tag: str +) -> str: + """Validate and format a search string containing a datetime range. + + Parameters + ---------- + search_str + The search string containing the datetime range + e.g. "sub-001_20240101@DATETO@20250101_id-*" or "sub-002_000000@TIMETO@235959" + format_type + One of "datetime", "time", or "date" + tag + The tag used for the range (e.g. @DATETO@) + + Returns + ------- + str + The formatted search string with datetime range replaced + e.g. "sub-001_date-*_id-*" or "sub-002_time-*" + + Raises + ------ + NeuroBlueprintError + If the datetime format is invalid or the range is malformed + + """ + # Validate the datetime range format + strip_start_end_date_from_datetime_tag(search_str, format_type, tag) + + # Replace datetime range with wildcard pattern + expected_len = get_expected_datetime_len(format_type) + full_tag_regex = ( + rf"(\d{{{expected_len}}}){re.escape(tag)}(\d{{{expected_len}}})" + ) + return re.sub(full_tag_regex, f"{format_type}-*", search_str) + + +def datetime_object_from_string( + datetime_string: str, format_type: str +) -> datetime: + """Convert a datetime string to a datetime object using the appropriate format. + + Parameters + ---------- + datetime_string : + The string to convert to a datetime object + + format_type : + One of "datetime", "time", or "date" + + Returns + ------- + datetime + The parsed datetime object + + Raises + ------ + ValueError + If the string cannot be parsed using the specified format + + """ + return datetime.strptime( + datetime_string, canonical_tags.get_datetime_formats()[format_type] + ) # ----------------------------------------------------------------------------- @@ -485,7 +828,32 @@ def search_for_wildcards( # ----------------------------------------------------------------------------- -# @overload: Cannot get type overloading to work with this function. +@overload +def search_sub_or_ses_level( + cfg: Configs, + base_folder: Path, + local_or_central: str, + sub: Optional[str] = ..., + ses: Optional[str] = ..., + search_str: str = ..., + verbose: bool = ..., + return_full_path: Literal[False] = ..., +) -> Tuple[List[str], List[str]]: ... + + +@overload +def search_sub_or_ses_level( + cfg: Configs, + base_folder: Path, + local_or_central: str, + sub: Optional[str] = ..., + ses: Optional[str] = ..., + search_str: str = ..., + verbose: bool = ..., + return_full_path: Literal[True] = ..., +) -> Tuple[List[Path], List[str]]: ... + + def search_sub_or_ses_level( cfg: Configs, base_folder: Path, @@ -495,7 +863,7 @@ def search_sub_or_ses_level( search_str: str = "*", verbose: bool = True, return_full_path: bool = False, -) -> Tuple[List[str] | List[Path], List[str]]: +) -> Tuple[Union[List[str], List[Path]], List[str]]: """Search project folder at the subject or session level. Parameters diff --git a/datashuttle/utils/formatting.py b/datashuttle/utils/formatting.py index 0c77cddcf..a0677f6a6 100644 --- a/datashuttle/utils/formatting.py +++ b/datashuttle/utils/formatting.py @@ -63,7 +63,13 @@ def check_and_format_names( names_to_format, reserved_keywords = [], [] for name in names: - if name in canonical_reserved_keywords() or tags("*") in name: + if ( + name in canonical_reserved_keywords() + or tags("*") in name + or tags("DATETO") in name + or tags("TIMETO") in name + or tags("DATETIMETO") in name + ): if tags("to") in name: # handle an edge case where use searches with both tags reserved_keywords += update_names_with_range_to_flag( diff --git a/datashuttle/utils/validation.py b/datashuttle/utils/validation.py index 59ad5fbe2..072e098c3 100644 --- a/datashuttle/utils/validation.py +++ b/datashuttle/utils/validation.py @@ -24,7 +24,11 @@ from itertools import chain from pathlib import Path -from datashuttle.configs import canonical_configs, canonical_folders +from datashuttle.configs import ( + canonical_configs, + canonical_folders, + canonical_tags, +) from datashuttle.utils import formatting, getters, utils from datashuttle.utils.custom_exceptions import NeuroBlueprintError @@ -384,8 +388,8 @@ def replace_tags_in_regexp(regexp: str) -> str: """ regexp_list = [regexp] - date_regexp = r"\d\d\d\d\d\d\d\d" - time_regexp = r"\d\d\d\d\d\d" + date_regexp = r"\d{8}" + time_regexp = r"\d{6}" formatting.replace_date_time_tags_in_name( regexp_list, @@ -559,18 +563,12 @@ def datetime_are_iso_format( A list of validation errors. """ - formats = { - "datetime": "%Y%m%dT%H%M%S", - "time": "%H%M%S", - "date": "%Y%m%d", - } - - key = next((key for key in formats if key in name), None) + datetime_keys = list(canonical_tags.get_datetime_formats().keys()) + key = next((key for key in datetime_keys if f"_{key}-" in name), None) error_message: List[str] if not key: error_message = [] - else: try: format_to_check = utils.get_values_from_bids_formatted_name( @@ -579,17 +577,48 @@ def datetime_are_iso_format( except: return [] - strfmt = formats[key] - - try: - datetime.strptime(format_to_check, strfmt) + if datetime_value_str_is_iso_format(format_to_check, key): error_message = [] - except ValueError: - error_message = [get_datetime_error(key, name, strfmt, path_)] + else: + error_message = [ + get_datetime_error( + key, + name, + canonical_tags.get_datetime_formats()[key], + path_, + ) + ] return error_message +def datetime_value_str_is_iso_format( + datetime_str: str, format_type: str +) -> bool: + """Validate that a datetime string matches the expected ISO format. + + Parameters + ---------- + datetime_str : str + The datetime string to validate + format_type : str + One of "datetime", "time", or "date" + + Returns + ------- + bool + True if the string matches the ISO format, False otherwise + + """ + try: + datetime.strptime( + datetime_str, canonical_tags.get_datetime_formats()[format_type] + ) + return True + except ValueError: + return False + + def raise_display_mode( message: str, display_mode: DisplayMode, log: bool ) -> None: diff --git a/pyproject.toml b/pyproject.toml index 2e9402411..3d5561b77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,10 +77,9 @@ requires = [ ] build-backend = "setuptools.build_meta" -[tool.mypy] -exclude = [ - "tests/" -] +[[tool.mypy.overrides]] +module = "tests.*" +ignore_errors = true [tool.setuptools] include-package-data = true diff --git a/tests/test_date_search_range.py b/tests/test_date_search_range.py new file mode 100644 index 000000000..bd2f3a917 --- /dev/null +++ b/tests/test_date_search_range.py @@ -0,0 +1,453 @@ +import os +import shutil + +import pytest + +from datashuttle.configs import canonical_tags + +from . import test_utils +from .base import BaseTest + + +class TestDateSearchRange(BaseTest): + """Test date/time range search functionality with real datashuttle projects.""" + + def test_simple_wildcard_first(self, project): + """Test basic wildcard functionality before testing date ranges.""" + subs = ["sub-001", "sub-002"] + sessions = ["ses-001", "ses-002"] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + project.upload_custom( + "rawdata", + sub_names=[f"sub-{canonical_tags.tags('*')}"], + ses_names=[f"ses-{canonical_tags.tags('*')}"], + datatype=["behav"], + ) + + central_path = project.get_central_path() / "rawdata" + transferred_subs = [sub.name for sub in central_path.glob("sub-*")] + + expected_subs = ["sub-001", "sub-002"] + assert sorted(transferred_subs) == sorted(expected_subs) + + for sub_name in expected_subs: + sub_path = central_path / sub_name + transferred_sessions = [ses.name for ses in sub_path.glob("ses-*")] + expected_sessions = ["ses-001", "ses-002"] + assert sorted(transferred_sessions) == sorted(expected_sessions) + + def test_date_range_transfer(self, project): + """Test that date range patterns correctly filter folders during transfer.""" + subs = ["sub-001", "sub-002"] + sessions = [ + "ses-001_date-20240301", + "ses-002_date-20240315", + "ses-003_date-20240401", + "ses-004_date-20240415", + "ses-005_date-20240501", + ] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True, "ephys": True}) + test_utils.make_and_check_local_project_folders( + project, + "rawdata", + subs, + sessions, + ["behav", "ephys"], + datatypes_used, + ) + + project.upload_custom( + "rawdata", + sub_names=subs, + ses_names=[ + f"ses-{canonical_tags.tags('*')}_20240315{canonical_tags.tags('DATETO')}20240401" + ], + datatype=["behav", "ephys"], + ) + + central_path = project.get_central_path() / "rawdata" + transferred_subs = list(central_path.glob("sub-*")) + + assert len(transferred_subs) == 2 + + for sub_path in transferred_subs: + transferred_sessions = [ses.name for ses in sub_path.glob("ses-*")] + expected_sessions = [ + "ses-002_date-20240315", + "ses-003_date-20240401", + ] + assert sorted(transferred_sessions) == sorted(expected_sessions) + + def test_time_range_transfer(self, project): + """Test that time range patterns work correctly.""" + subs = ["sub-001"] + sessions = [ + "ses-001_time-080000", + "ses-002_time-120000", + "ses-003_time-160000", + "ses-004_time-200000", + ] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + project.upload_custom( + "rawdata", + sub_names=subs, + ses_names=[ + f"ses-{canonical_tags.tags('*')}_100000{canonical_tags.tags('TIMETO')}180000" + ], + datatype=["behav"], + ) + + central_path = project.get_central_path() / "rawdata" / "sub-001" + transferred_sessions = [ses.name for ses in central_path.glob("ses-*")] + + expected_sessions = ["ses-002_time-120000", "ses-003_time-160000"] + assert sorted(transferred_sessions) == sorted(expected_sessions) + + def test_datetime_range_transfer(self, project): + """Test that wildcard matching works with datetime-tagged sessions.""" + subs = ["sub-001"] + sessions = [ + "ses-001_datetime-20240301T080000", + "ses-002_datetime-20240315T120000", + "ses-003_datetime-20240401T160000", + "ses-004_datetime-20240415T200000", + ] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + project.upload_custom( + "rawdata", + sub_names=subs, + ses_names=[ + f"ses-{canonical_tags.tags('*')}_datetime-20240315{canonical_tags.tags('*')}", + f"ses-{canonical_tags.tags('*')}_datetime-20240401{canonical_tags.tags('*')}", + ], + datatype=["behav"], + ) + + central_path = project.get_central_path() / "rawdata" / "sub-001" + transferred_sessions = [ses.name for ses in central_path.glob("ses-*")] + + expected_sessions = [ + "ses-002_datetime-20240315T120000", + "ses-003_datetime-20240401T160000", + ] + assert sorted(transferred_sessions) == sorted(expected_sessions) + + def test_combined_wildcard_and_date_range(self, project): + """Test combining wildcards with date ranges.""" + subs = ["sub-001", "sub-002", "sub-003"] + sessions = [ + "ses-001_date-20240301_run-01", + "ses-002_date-20240315_run-02", + "ses-003_date-20240401_run-01", + "ses-004_date-20240415_run-03", + ] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + project.upload_custom( + "rawdata", + sub_names=[f"sub-{canonical_tags.tags('*')}"], + ses_names=[ + f"ses-{canonical_tags.tags('*')}_20240310{canonical_tags.tags('DATETO')}20240420_run-01", + f"ses-{canonical_tags.tags('*')}_20240310{canonical_tags.tags('DATETO')}20240420_run-02", + ], + datatype=["behav"], + ) + + central_path = project.get_central_path() / "rawdata" + transferred_subs = list(central_path.glob("sub-*")) + + assert len(transferred_subs) == 3 + + for sub_path in transferred_subs: + transferred_sessions = [ses.name for ses in sub_path.glob("ses-*")] + expected_sessions = [ + "ses-002_date-20240315_run-02", + "ses-003_date-20240401_run-01", + ] + assert sorted(transferred_sessions) == sorted(expected_sessions) + + def test_invalid_date_range_errors(self, project): + """Test that invalid date ranges raise appropriate errors.""" + subs = ["sub-001"] + sessions = ["ses-001_date-20240301"] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + with pytest.raises(Exception) as exc_info: + project.upload_custom( + "rawdata", + sub_names=subs, + ses_names=[ + f"ses-{canonical_tags.tags('*')}_20240401{canonical_tags.tags('DATETO')}20240301" + ], + datatype=["behav"], + ) + assert "before start" in str(exc_info.value) + + with pytest.raises(Exception) as exc_info: + project.upload_custom( + "rawdata", + sub_names=subs, + ses_names=[ + f"ses-{canonical_tags.tags('*')}_2024030{canonical_tags.tags('DATETO')}20240401" + ], + datatype=["behav"], + ) + assert "Invalid" in str(exc_info.value) + + def test_no_matches_in_date_range(self, project): + """Test behavior when no folders match the date range.""" + subs = ["sub-001"] + sessions = [ + "ses-001_date-20240101", + "ses-002_date-20240201", + ] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + project.upload_custom( + "rawdata", + sub_names=subs, + ses_names=[ + f"ses-{canonical_tags.tags('*')}_20240301{canonical_tags.tags('DATETO')}20240401" + ], + datatype=["behav"], + ) + + central_path = project.get_central_path() / "rawdata" + transferred_items = list(central_path.glob("*")) + + if transferred_items: + transferred_sub_names = [ + item.name + for item in transferred_items + if item.name.startswith("sub-") + ] + assert len(transferred_sub_names) == 0 + + def test_subject_level_date_range(self, project): + """Test date ranges work at the subject level too.""" + subs = [ + "sub-001_date-20240301", + "sub-002_date-20240315", + "sub-003_date-20240401", + "sub-004_date-20240415", + ] + sessions = ["ses-001"] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + project.upload_custom( + "rawdata", + sub_names=[ + f"sub-{canonical_tags.tags('*')}_20240310{canonical_tags.tags('DATETO')}20240410" + ], + ses_names=sessions, + datatype=["behav"], + ) + + central_path = project.get_central_path() / "rawdata" + transferred_subs = [sub.name for sub in central_path.glob("sub-*")] + + expected_subs = ["sub-002_date-20240315", "sub-003_date-20240401"] + assert sorted(transferred_subs) == sorted(expected_subs) + + @pytest.mark.parametrize("project", ["full"], indirect=True) + def test_download_with_date_range(self, project): + """Test that date range patterns work for downloads as well as uploads.""" + subs = ["sub-001", "sub-002"] + sessions = [ + "ses-001_date-20240301", + "ses-002_date-20240315", + "ses-003_date-20240401", + "ses-004_date-20240415", + ] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + project.upload_custom( + "rawdata", + sub_names=subs, + ses_names=sessions, + datatype=["behav"], + ) + + os.chdir(project.get_local_path()) + local_rawdata = project.get_local_path() / "rawdata" + if local_rawdata.exists(): + shutil.rmtree(local_rawdata) + + project.download_custom( + "rawdata", + sub_names=subs, + ses_names=[ + f"ses-{canonical_tags.tags('*')}_20240310{canonical_tags.tags('DATETO')}20240401" + ], + datatype=["behav"], + ) + + local_path = project.get_local_path() / "rawdata" + downloaded_subs = list(local_path.glob("sub-*")) + + assert len(downloaded_subs) == 2 + + for sub_path in downloaded_subs: + downloaded_sessions = [ses.name for ses in sub_path.glob("ses-*")] + expected_sessions = [ + "ses-002_date-20240315", + "ses-003_date-20240401", + ] + assert sorted(downloaded_sessions) == sorted(expected_sessions) + + def test_edge_case_exact_boundary_dates(self, project): + """Test that boundary dates are handled correctly (inclusive ranges).""" + subs = ["sub-001"] + sessions = [ + "ses-001_date-20240301", + "ses-002_date-20240315", + "ses-003_date-20240401", + "ses-004_date-20240415", + ] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + project.upload_custom( + "rawdata", + sub_names=subs, + ses_names=[ + f"ses-{canonical_tags.tags('*')}_20240301{canonical_tags.tags('DATETO')}20240401" + ], + datatype=["behav"], + ) + + central_path = project.get_central_path() / "rawdata" / "sub-001" + transferred_sessions = [ses.name for ses in central_path.glob("ses-*")] + + expected_sessions = [ + "ses-001_date-20240301", + "ses-002_date-20240315", + "ses-003_date-20240401", + ] + assert sorted(transferred_sessions) == sorted(expected_sessions) + + def test_with_range_to_flag(self, project): + """Test that the @DATETO@ works well with @TO@""" + subs = ["sub-001"] + + sessions = [ + "ses-001_date-20240301", + "ses-002_date-20240301", + "ses-003_date-20240405", + "ses-004_date-20240415", + ] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + # Select such that ses-002 onwards is selected, and + # ses-004 is excluded based on date. + project.upload_custom( + "rawdata", + sub_names=subs, + ses_names=[ + f"ses-002@TO@004_20240301{canonical_tags.tags('DATETO')}20240406" + ], + datatype=["behav"], + ) + + central_path = project.get_central_path() / "rawdata" / "sub-001" + transferred_sessions = [ses.name for ses in central_path.glob("ses-*")] + + expected_sessions = [ + "ses-002_date-20240301", + "ses-003_date-20240405", + ] + assert sorted(transferred_sessions) == sorted(expected_sessions) + + def test_without_wildcard_ses(self, project): + """Test without wildcard ses. + + Including @*@ only led to an uncaught but as it was triggering a + conditional in `check_and_format_names` that was not triggered by + @DATETO@ alone though it should have been. + """ + subs = ["sub-001"] + + sessions = [ + "ses-001_date-20240301", + "ses-002_date-20240301", + "ses-003_date-20240405", + "ses-004_date-20240415", + ] + + datatypes_used = test_utils.get_all_broad_folders_used(value=False) + datatypes_used.update({"behav": True}) + test_utils.make_and_check_local_project_folders( + project, "rawdata", subs, sessions, ["behav"], datatypes_used + ) + + # Select such that ses-002 is selected (and it is in range) + project.upload_custom( + "rawdata", + sub_names=subs, + ses_names=[ + f"ses-002_20240301{canonical_tags.tags('DATETO')}20240302" + ], + datatype=["behav"], + ) + + central_path = project.get_central_path() / "rawdata" / "sub-001" + transferred_sessions = [ses.name for ses in central_path.glob("ses-*")] + + expected_sessions = [ + "ses-002_date-20240301", + ] + assert sorted(transferred_sessions) == sorted(expected_sessions)