diff --git a/.github/actions/load-data/action.yml b/.github/actions/load-data/action.yml index 514840e..b78fe20 100644 --- a/.github/actions/load-data/action.yml +++ b/.github/actions/load-data/action.yml @@ -13,9 +13,10 @@ runs: rclone_config: ${{ inputs.rclone-config }} - name: Get dataset version hash + id: hash shell: bash run: | - HASH=$(rclone lsl remote:"SampleData" --drive-shared-with-me) + HASH=$(rclone lsjson remote:"SampleData" --drive-shared-with-me --recursive | sed 's/,$//' | sort | sha256sum | cut -d' ' -f1) echo "DATASET_HASH=$HASH" >> $GITHUB_OUTPUT - name: Cache datasets @@ -23,7 +24,7 @@ runs: id: cache-datasets with: path: ./testing_data - key: ephys-datasets-${{ steps.ephys.outputs.DATASET_HASH }} + key: ${{ steps.hash.outputs.DATASET_HASH }} enableCrossOsArchive: true - if: ${{ steps.cache-datasets.outputs.cache-hit != 'true' }} diff --git a/.github/workflows/all_os_versions.txt b/.github/workflows/all_os_versions.txt new file mode 100644 index 0000000..1bcf5cd --- /dev/null +++ b/.github/workflows/all_os_versions.txt @@ -0,0 +1 @@ +["ubuntu-latest", "macos-latest", "windows-2022"] \ No newline at end of file diff --git a/.github/workflows/all_python_versions.txt b/.github/workflows/all_python_versions.txt new file mode 100644 index 0000000..350c415 --- /dev/null +++ b/.github/workflows/all_python_versions.txt @@ -0,0 +1 @@ +["3.10", "3.11", "3.12", "3.13"] \ No newline at end of file diff --git a/.github/workflows/pr-tests.yml b/.github/workflows/pr-tests.yml index cf703c7..97ee0db 100644 --- a/.github/workflows/pr-tests.yml +++ b/.github/workflows/pr-tests.yml @@ -10,4 +10,43 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true \ No newline at end of file + cancel-in-progress: true + +jobs: + load_python_and_os_versions: + runs-on: ubuntu-latest + outputs: + ALL_PYTHON_VERSIONS: ${{ steps.load_python_versions.outputs.python_versions }} + ALL_OS_VERSIONS: ${{ steps.load_os_versions.outputs.os_versions }} + steps: + - uses: actions/checkout@v4 + - id: load_python_versions + run: echo "python_versions=$(cat ./.github/workflows/all_python_versions.txt)" >> "$GITHUB_OUTPUT" + - id: load_os_versions + run: echo "os_versions=$(cat ./.github/workflows/all_os_versions.txt)" >> "$GITHUB_OUTPUT" + - name: Debugging + run: | + echo "Loaded Python versions: ${{ steps.load_python_versions.outputs.python_versions }}" + echo "Loaded OS versions: ${{ steps.load_os_versions.outputs.os_versions }}" + + run-tests: + needs: [load_python_and_os_versions] + uses: ./.github/workflows/run-tests.yml + secrets: + RCLONE_CONFIG: ${{ secrets.RCLONE_CONFIG }} + with: # Ternary operator: condition && value_if_true || value_if_false + python-versions: ${{ github.event.pull_request.draft == true && '["3.10"]' || needs.load_python_and_os_versions.outputs.ALL_PYTHON_VERSIONS }} + os-versions: ${{ github.event.pull_request.draft == true && '["ubuntu-latest"]' || needs.load_python_and_os_versions.outputs.ALL_OS_VERSIONS }} + + check-final-status: + name: All tests passing + if: always() + needs: + - run-tests + runs-on: ubuntu-latest + steps: + - name: Decide whether all jobs succeeded or at least one failed + uses: re-actors/alls-green@release/v1 + with: + allowed-skips: run-tests + jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 5fd96d4..e00d8ee 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -11,4 +11,50 @@ on: description: 'List of OS versions to use in matrix, as JSON string' required: true type: string - workflow_dispatch: \ No newline at end of file + secrets: + RCLONE_CONFIG: + required: true + workflow_dispatch: + inputs: + python-versions: + description: 'List of Python versions to use in matrix, as JSON string' + required: true + type: string + default: '["3.10", "3.11", "3.12", "3.13"]' + os-versions: + description: 'List of OS versions to use in matrix, as JSON string' + required: true + type: string + default: '["ubuntu-latest", "windows-2022", "macos-latest"]' + +jobs: + run: + name: ${{ matrix.os }} Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ${{ fromJson(inputs.python-versions) }} + os: ${{ fromJson(inputs.os-versions) }} + steps: + - uses: actions/checkout@v5 + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install pip + run: python -m pip install -U pip # Official recommended way + + - name: Install GuPPy with testing requirements + run: | + python -m pip install "." + python -m pip install --group test + + - name: Prepare data for tests + uses: ./.github/actions/load-data + with: + rclone-config: ${{ secrets.RCLONE_CONFIG }} + + - name: Run tests + run: pytest tests -vv -rsx # -n auto --dist loadscope # TODO: re-enable parallel execution when logging issues with Windows are resolved diff --git a/.gitignore b/.gitignore index de0bef9..0628429 100755 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,6 @@ z-score_methods.tgn GuPPy/runFiberPhotometryAnalysis.ipynb .vscode/ *.egg-info/ +.clinerules/ + +testing_data/ diff --git a/pyproject.toml b/pyproject.toml index aa0a3c9..822137f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,13 @@ dependencies = [ "tables", ] +[dependency-groups] +test = [ + "pytest", + "pytest-cov", + "pytest-xdist" # Runs tests on parallel +] + [project.scripts] guppy = "guppy.main:main" diff --git a/src/guppy/preprocess.py b/src/guppy/preprocess.py index 14926f3..e9cb5b8 100755 --- a/src/guppy/preprocess.py +++ b/src/guppy/preprocess.py @@ -17,7 +17,10 @@ from matplotlib.widgets import MultiCursor from pathlib import Path from .combineDataFn import processTimestampsForCombiningData -plt.switch_backend('TKAgg') + +# Only set matplotlib backend if not in CI environment +if not os.getenv('CI'): + plt.switch_backend('TKAgg') def takeOnlyDirs(paths): removePaths = [] diff --git a/src/guppy/saveStoresList.py b/src/guppy/saveStoresList.py index 5d11fc2..dc89b87 100755 --- a/src/guppy/saveStoresList.py +++ b/src/guppy/saveStoresList.py @@ -133,6 +133,16 @@ def saveStorenames(inputParameters, data, event_name, flag, filepath): # getting input parameters inputParameters = inputParameters + # Headless path: if storenames_map provided, write storesList.csv without building the Panel UI + storenames_map = inputParameters.get("storenames_map") + if isinstance(storenames_map, dict) and len(storenames_map) > 0: + op = make_dir(filepath) + arr = np.asarray([list(storenames_map.keys()), list(storenames_map.values())], dtype=str) + np.savetxt(os.path.join(op, 'storesList.csv'), arr, delimiter=",", fmt='%s') + insertLog(f"Storeslist file saved at {op}", logging.INFO) + insertLog('Storeslist : \n'+str(arr), logging.INFO) + return + # reading storenames from the data fetched using 'readtsq' function if isinstance(data, pd.DataFrame): data['name'] = np.asarray(data['name'], dtype=str) @@ -583,7 +593,7 @@ def check_channels(state): return unique_state.shape[0], unique_state # function to decide NPM timestamps unit (seconds, ms or us) -def decide_ts_unit_for_npm(df): +def decide_ts_unit_for_npm(df, timestamp_column_name=None, time_unit=None, headless=False): col_names = np.array(list(df.columns)) col_names_ts = [''] for name in col_names: @@ -592,6 +602,18 @@ def decide_ts_unit_for_npm(df): ts_unit = 'seconds' if len(col_names_ts)>2: + # Headless path: auto-select column/unit without any UI + if headless: + if timestamp_column_name is not None: + assert timestamp_column_name in col_names_ts, f"Provided timestamp_column_name '{timestamp_column_name}' not found in columns {col_names_ts[1:]}" + chosen = timestamp_column_name + else: + chosen = col_names_ts[1] + df.insert(1, 'Timestamp', df[chosen]) + df = df.drop(col_names_ts[1:], axis=1) + valid_units = {'seconds', 'milliseconds', 'microseconds'} + ts_unit = time_unit if (isinstance(time_unit, str) and time_unit in valid_units) else 'seconds' + return df, ts_unit #def comboBoxSelected(event): # print(event.widget.get()) @@ -741,10 +763,19 @@ def read_doric(filepath): # and recognize type of 'csv' files either from # Neurophotometrics, Doric systems or custom made 'csv' files # and read data accordingly -def import_np_doric_csv(filepath, isosbestic_control, num_ch): +def import_np_doric_csv(filepath, isosbestic_control, num_ch, inputParameters=None): insertLog("If it exists, importing either NPM or Doric or csv file based on the structure of file", logging.DEBUG) + # Headless configuration (used to avoid any UI prompts when running tests) + headless = bool(os.environ.get('GUPPY_BASE_DIR')) + npm_timestamp_column_name = None + npm_time_unit = None + npm_split_events = None + if isinstance(inputParameters, dict): + npm_timestamp_column_name = inputParameters.get('npm_timestamp_column_name') + npm_time_unit = inputParameters.get('npm_time_unit', 'seconds') + npm_split_events = inputParameters.get('npm_split_events', True) path = sorted(glob.glob(os.path.join(filepath, '*.csv'))) + \ sorted(glob.glob(os.path.join(filepath, '*.doric'))) path_chev = glob.glob(os.path.join(filepath, '*chev*')) @@ -879,16 +910,19 @@ def import_np_doric_csv(filepath, isosbestic_control, num_ch): elif flag=='event_np': type_val = np.array(df.iloc[:,1]) type_val_unique = np.unique(type_val) - window = tk.Tk() - if len(type_val_unique)>1: - response = messagebox.askyesno('Multiple event TTLs', 'Based on the TTL file,\ + if headless: + response = 1 if bool(npm_split_events) else 0 + else: + window = tk.Tk() + if len(type_val_unique)>1: + response = messagebox.askyesno('Multiple event TTLs', 'Based on the TTL file,\ it looks like TTLs \ belongs to multipe behavior type. \ Do you want to create multiple files for each \ behavior type ?') - else: - response = 0 - window.destroy() + else: + response = 0 + window.destroy() if response==1: timestamps = np.array(df.iloc[:,0]) for j in range(len(type_val_unique)): @@ -907,7 +941,12 @@ def import_np_doric_csv(filepath, isosbestic_control, num_ch): event_from_filename.append('event'+str(0)) else: file = f'file{str(i)}_' - df, ts_unit = decide_ts_unit_for_npm(df) + df, ts_unit = decide_ts_unit_for_npm( + df, + timestamp_column_name=npm_timestamp_column_name, + time_unit=npm_time_unit, + headless=headless + ) df, indices_dict, num_channels = decide_indices(file, df, flag) keys = list(indices_dict.keys()) for k in range(len(keys)): @@ -1004,7 +1043,7 @@ def execute(inputParameters): for i in folderNames: filepath = os.path.join(inputParameters['abspath'], i) data = readtsq(filepath) - event_name, flag = import_np_doric_csv(filepath, isosbestic_control, num_ch) + event_name, flag = import_np_doric_csv(filepath, isosbestic_control, num_ch, inputParameters=inputParameters) saveStorenames(inputParameters, data, event_name, flag, filepath) insertLog('#'*400, logging.INFO) except Exception as e: diff --git a/src/guppy/savingInputParameters.py b/src/guppy/savingInputParameters.py index 95cd976..4adeebb 100644 --- a/src/guppy/savingInputParameters.py +++ b/src/guppy/savingInputParameters.py @@ -24,23 +24,31 @@ def savingInputParameters(): else: pass - # Create the main window - folder_selection = tk.Tk() - folder_selection.title("Select the folder path where your data is located") - folder_selection.geometry("700x200") - def select_folder(): + # Determine base folder path (headless-friendly via env var) + base_dir_env = os.environ.get('GUPPY_BASE_DIR') + is_headless = base_dir_env and os.path.isdir(base_dir_env) + if is_headless: global folder_path - folder_path = filedialog.askdirectory(title="Select the folder path where your data is located") - if folder_path: - print(f"Folder path set to {folder_path}") - folder_selection.destroy() - else: - folder_path = os.path.expanduser('~') - print(f"Folder path set to {folder_path}") - - select_button = ttk.Button(folder_selection, text="Select a Folder", command=select_folder) - select_button.pack(pady=5) - folder_selection.mainloop() + folder_path = base_dir_env + print(f"Folder path set to {folder_path} (from GUPPY_BASE_DIR)") + else: + # Create the main window + folder_selection = tk.Tk() + folder_selection.title("Select the folder path where your data is located") + folder_selection.geometry("700x200") + def select_folder(): + global folder_path + folder_path = filedialog.askdirectory(title="Select the folder path where your data is located") + if folder_path: + print(f"Folder path set to {folder_path}") + folder_selection.destroy() + else: + folder_path = os.path.expanduser('~') + print(f"Folder path set to {folder_path}") + + select_button = ttk.Button(folder_selection, text="Select a Folder", command=select_folder) + select_button.pack(pady=5) + folder_selection.mainloop() current_dir = os.getcwd() @@ -525,4 +533,13 @@ def onclickpsth(event=None): template.main.append(group) template.main.append(visualize) + # Expose minimal hooks and widgets to enable programmatic testing + template._hooks = { + "onclickProcess": onclickProcess, + "getInputParameters": getInputParameters, + } + template._widgets = { + "files_1": files_1, + } + return template diff --git a/src/guppy/testing/__init__.py b/src/guppy/testing/__init__.py new file mode 100644 index 0000000..17d2b2a --- /dev/null +++ b/src/guppy/testing/__init__.py @@ -0,0 +1,8 @@ +from .api import step1, step2, step3, step4 + +__all__ = [ + "step1", + "step2", + "step3", + "step4", +] diff --git a/src/guppy/testing/api.py b/src/guppy/testing/api.py new file mode 100644 index 0000000..bc8b239 --- /dev/null +++ b/src/guppy/testing/api.py @@ -0,0 +1,370 @@ +""" +Python API for GuPPy pipeline steps. + +Step 1: Save Input Parameters +- Writes GuPPyParamtersUsed.json into each selected data folder. +- Mirrors the Panel UI's Step 1 behavior without invoking any UI by default. + +This module is intentionally minimal and non-invasive. +""" + +from __future__ import annotations + +import json +import os +import numpy as np +from typing import Iterable, List + +from guppy.savingInputParameters import savingInputParameters +from guppy.saveStoresList import execute +from guppy.readTevTsq import readRawData +from guppy.preprocess import extractTsAndSignal +from guppy.computePsth import psthForEachStorename +from guppy.findTransientsFreqAndAmp import executeFindFreqAndAmp + + + + + + +def step1(*, base_dir: str, selected_folders: Iterable[str]) -> None: + """ + Run pipeline Step 1 (Save Input Parameters) via the Panel logic. + + This calls the exact ``onclickProcess`` function defined in + ``savingInputParameters()``, in headless mode. The ``GUPPY_BASE_DIR`` + environment variable is used to bypass the Tk folder selection dialog. + The function programmatically sets the FileSelector value to + ``selected_folders`` and triggers the underlying callback that writes + ``GuPPyParamtersUsed.json`` into each selected folder. + + Parameters + ---------- + base_dir : str + Root directory used to initialize the FileSelector. All ``selected_folders`` + must reside under this path. + selected_folders : Iterable[str] + Absolute paths to the session directories to analyze. All must share the + same parent directory. + + Raises + ------ + RuntimeError + If the ``savingInputParameters`` template does not expose the required + testing hooks (``_hooks['onclickProcess']`` and ``_widgets['files_1']``). + """ + os.environ["GUPPY_BASE_DIR"] = base_dir + + # Build the template headlessly + template = savingInputParameters() + + # Sanity checks: ensure hooks/widgets exposed + if not hasattr(template, "_hooks") or "onclickProcess" not in template._hooks: + raise RuntimeError("savingInputParameters did not expose 'onclickProcess' hook") + if not hasattr(template, "_widgets") or "files_1" not in template._widgets: + raise RuntimeError("savingInputParameters did not expose 'files_1' widget") + + # Select folders and trigger actual step-1 logic + template._widgets["files_1"].value = list(selected_folders) + template._hooks["onclickProcess"]() + + +def step2(*, base_dir: str, selected_folders: Iterable[str], storenames_map: dict[str, str], npm_timestamp_column_name: str | None = None, npm_time_unit: str = "seconds", npm_split_events: bool = True) -> None: + """ + Run pipeline Step 2 (Save Storenames) via the actual Panel-backed logic. + + This builds the Step 2 template headlessly (using ``GUPPY_BASE_DIR`` to bypass + the folder dialog), sets the FileSelector to ``selected_folders``, retrieves + the full input parameters via ``getInputParameters()``, injects the provided + ``storenames_map``, and calls ``execute(inputParameters)`` from + ``guppy.saveStoresList``. The execute() function is minimally augmented to + support a headless branch when ``storenames_map`` is present, while leaving + Panel behavior unchanged. + + Parameters + ---------- + base_dir : str + Root directory used to initialize the FileSelector. All ``selected_folders`` + must reside directly under this path. + selected_folders : Iterable[str] + Absolute paths to the session directories to process. + storenames_map : dict[str, str] + Mapping from raw storenames (e.g., "Dv1A") to semantic names + (e.g., "control_DMS"). Insertion order is preserved. + + Raises + ------ + ValueError + If validation fails (e.g., empty mapping, invalid directories, or parent + mismatch). + RuntimeError + If the template does not expose the required testing hooks/widgets. + """ + # Validate base_dir + if not isinstance(base_dir, str) or not base_dir: + raise ValueError("base_dir must be a non-empty string") + base_dir = os.path.abspath(base_dir) + if not os.path.isdir(base_dir): + raise ValueError(f"base_dir does not exist or is not a directory: {base_dir}") + + # Validate selected_folders + sessions = list(selected_folders or []) + if not sessions: + raise ValueError("selected_folders must be a non-empty iterable of session directories") + abs_sessions = [os.path.abspath(s) for s in sessions] + for s in abs_sessions: + if not os.path.isdir(s): + raise ValueError(f"Session path does not exist or is not a directory: {s}") + parent = os.path.dirname(s) + if parent != base_dir: + raise ValueError( + f"All selected_folders must share the same parent equal to base_dir. " + f"Got parent {parent!r} for session {s!r}, expected {base_dir!r}" + ) + + # Validate storenames_map + if not isinstance(storenames_map, dict) or not storenames_map: + raise ValueError("storenames_map must be a non-empty dict[str, str]") + for k, v in storenames_map.items(): + if not isinstance(k, str) or not k.strip(): + raise ValueError(f"Invalid storename key: {k!r}") + if not isinstance(v, str) or not v.strip(): + raise ValueError(f"Invalid semantic name for key {k!r}: {v!r}") + + # Headless build: set base_dir and construct the template + os.environ["GUPPY_BASE_DIR"] = base_dir + template = savingInputParameters() + + # Ensure hooks/widgets exposed + if not hasattr(template, "_hooks") or "getInputParameters" not in template._hooks: + raise RuntimeError("savingInputParameters did not expose 'getInputParameters' hook") + if not hasattr(template, "_widgets") or "files_1" not in template._widgets: + raise RuntimeError("savingInputParameters did not expose 'files_1' widget") + + # Select folders and fetch input parameters + template._widgets["files_1"].value = abs_sessions + input_params = template._hooks["getInputParameters"]() + + # Inject storenames mapping for headless execution + input_params["storenames_map"] = dict(storenames_map) + + # Add npm parameters + input_params["npm_timestamp_column_name"] = npm_timestamp_column_name + input_params["npm_time_unit"] = npm_time_unit + input_params["npm_split_events"] = npm_split_events + + # Call the underlying Step 2 executor (now headless-aware) + execute(input_params) + + +def step3(*, base_dir: str, selected_folders: Iterable[str], npm_timestamp_column_name: str | None = None, npm_time_unit: str = "seconds", npm_split_events: bool = True) -> None: + """ + Run pipeline Step 3 (Read Raw Data) via the actual Panel-backed logic, headlessly. + + This builds the template headlessly (using ``GUPPY_BASE_DIR`` to bypass + the folder dialog), sets the FileSelector to ``selected_folders``, retrieves + the full input parameters via ``getInputParameters()``, and calls the + underlying worker ``guppy.readTevTsq.readRawData(input_params)`` that the + UI normally launches via subprocess. No GUI is spawned. + + Parameters + ---------- + base_dir : str + Root directory used to initialize the FileSelector. All ``selected_folders`` + must reside directly under this path. + selected_folders : Iterable[str] + Absolute paths to the session directories to process. + + Raises + ------ + ValueError + If validation fails (e.g., empty iterable, invalid directories, or parent mismatch). + RuntimeError + If the template does not expose the required testing hooks/widgets. + """ + # Validate base_dir + if not isinstance(base_dir, str) or not base_dir: + raise ValueError("base_dir must be a non-empty string") + base_dir = os.path.abspath(base_dir) + if not os.path.isdir(base_dir): + raise ValueError(f"base_dir does not exist or is not a directory: {base_dir}") + + # Validate selected_folders + sessions = list(selected_folders or []) + if not sessions: + raise ValueError("selected_folders must be a non-empty iterable of session directories") + abs_sessions = [os.path.abspath(s) for s in sessions] + for s in abs_sessions: + if not os.path.isdir(s): + raise ValueError(f"Session path does not exist or is not a directory: {s}") + parent = os.path.dirname(s) + if parent != base_dir: + raise ValueError( + f"All selected_folders must share the same parent equal to base_dir. " + f"Got parent {parent!r} for session {s!r}, expected {base_dir!r}" + ) + + # Headless build: set base_dir and construct the template + os.environ["GUPPY_BASE_DIR"] = base_dir + template = savingInputParameters() + + # Ensure hooks/widgets exposed + if not hasattr(template, "_hooks") or "getInputParameters" not in template._hooks: + raise RuntimeError("savingInputParameters did not expose 'getInputParameters' hook") + if not hasattr(template, "_widgets") or "files_1" not in template._widgets: + raise RuntimeError("savingInputParameters did not expose 'files_1' widget") + + # Select folders and fetch input parameters + template._widgets["files_1"].value = abs_sessions + input_params = template._hooks["getInputParameters"]() + + # Inject explicit NPM parameters (match Step 2 style) + input_params["npm_timestamp_column_name"] = npm_timestamp_column_name + input_params["npm_time_unit"] = npm_time_unit + input_params["npm_split_events"] = npm_split_events + + # Call the underlying Step 3 worker directly (no subprocess) + readRawData(input_params) + + +def step4(*, base_dir: str, selected_folders: Iterable[str], npm_timestamp_column_name: str | None = None, npm_time_unit: str = "seconds", npm_split_events: bool = True) -> None: + """ + Run pipeline Step 4 (Extract timestamps and signal) via the Panel-backed logic, headlessly. + + This builds the template headlessly (using ``GUPPY_BASE_DIR`` to bypass + the folder dialog), sets the FileSelector to ``selected_folders``, retrieves + the full input parameters via ``getInputParameters()``, and calls the + underlying worker ``guppy.preprocess.extractTsAndSignal(input_params)`` that the + UI normally launches via subprocess. No GUI is spawned. + + Parameters + ---------- + base_dir : str + Root directory used to initialize the FileSelector. All ``selected_folders`` + must reside directly under this path. + selected_folders : Iterable[str] + Absolute paths to the session directories to process. + + Raises + ------ + ValueError + If validation fails (e.g., empty iterable, invalid directories, or parent mismatch). + RuntimeError + If the template does not expose the required testing hooks/widgets. + """ + # Validate base_dir + if not isinstance(base_dir, str) or not base_dir: + raise ValueError("base_dir must be a non-empty string") + base_dir = os.path.abspath(base_dir) + if not os.path.isdir(base_dir): + raise ValueError(f"base_dir does not exist or is not a directory: {base_dir}") + + # Validate selected_folders + sessions = list(selected_folders or []) + if not sessions: + raise ValueError("selected_folders must be a non-empty iterable of session directories") + abs_sessions = [os.path.abspath(s) for s in sessions] + for s in abs_sessions: + if not os.path.isdir(s): + raise ValueError(f"Session path does not exist or is not a directory: {s}") + parent = os.path.dirname(s) + if parent != base_dir: + raise ValueError( + f"All selected_folders must share the same parent equal to base_dir. " + f"Got parent {parent!r} for session {s!r}, expected {base_dir!r}" + ) + + # Headless build: set base_dir and construct the template + os.environ["GUPPY_BASE_DIR"] = base_dir + template = savingInputParameters() + + # Ensure hooks/widgets exposed + if not hasattr(template, "_hooks") or "getInputParameters" not in template._hooks: + raise RuntimeError("savingInputParameters did not expose 'getInputParameters' hook") + if not hasattr(template, "_widgets") or "files_1" not in template._widgets: + raise RuntimeError("savingInputParameters did not expose 'files_1' widget") + + # Select folders and fetch input parameters + template._widgets["files_1"].value = abs_sessions + input_params = template._hooks["getInputParameters"]() + + # Inject explicit NPM parameters (match Step 2 style) + input_params["npm_timestamp_column_name"] = npm_timestamp_column_name + input_params["npm_time_unit"] = npm_time_unit + input_params["npm_split_events"] = npm_split_events + + # Call the underlying Step 4 worker directly (no subprocess) + extractTsAndSignal(input_params) + + +def step5(*, base_dir: str, selected_folders: Iterable[str], npm_timestamp_column_name: str | None = None, npm_time_unit: str = "seconds", npm_split_events: bool = True) -> None: + """ + Run pipeline Step 5 (PSTH Computation) via the Panel-backed logic, headlessly. + + This builds the template headlessly (using ``GUPPY_BASE_DIR`` to bypass + the folder dialog), sets the FileSelector to ``selected_folders``, retrieves + the full input parameters via ``getInputParameters()``, and calls the + underlying worker ``guppy.computePsth.psthForEachStorename(input_params)`` that the + UI normally launches via subprocess. No GUI is spawned. + + Parameters + ---------- + base_dir : str + Root directory used to initialize the FileSelector. All ``selected_folders`` + must reside directly under this path. + selected_folders : Iterable[str] + Absolute paths to the session directories to process. + + Raises + ------ + ValueError + If validation fails (e.g., empty iterable, invalid directories, or parent mismatch). + RuntimeError + If the template does not expose the required testing hooks/widgets. + """ + # Validate base_dir + if not isinstance(base_dir, str) or not base_dir: + raise ValueError("base_dir must be a non-empty string") + base_dir = os.path.abspath(base_dir) + if not os.path.isdir(base_dir): + raise ValueError(f"base_dir does not exist or is not a directory: {base_dir}") + + # Validate selected_folders + sessions = list(selected_folders or []) + if not sessions: + raise ValueError("selected_folders must be a non-empty iterable of session directories") + abs_sessions = [os.path.abspath(s) for s in sessions] + for s in abs_sessions: + if not os.path.isdir(s): + raise ValueError(f"Session path does not exist or is not a directory: {s}") + parent = os.path.dirname(s) + if parent != base_dir: + raise ValueError( + f"All selected_folders must share the same parent equal to base_dir. " + f"Got parent {parent!r} for session {s!r}, expected {base_dir!r}" + ) + + # Headless build: set base_dir and construct the template + os.environ["GUPPY_BASE_DIR"] = base_dir + template = savingInputParameters() + + # Ensure hooks/widgets exposed + if not hasattr(template, "_hooks") or "getInputParameters" not in template._hooks: + raise RuntimeError("savingInputParameters did not expose 'getInputParameters' hook") + if not hasattr(template, "_widgets") or "files_1" not in template._widgets: + raise RuntimeError("savingInputParameters did not expose 'files_1' widget") + + # Select folders and fetch input parameters + template._widgets["files_1"].value = abs_sessions + input_params = template._hooks["getInputParameters"]() + + # Inject explicit NPM parameters (match Step 2 style) + input_params["npm_timestamp_column_name"] = npm_timestamp_column_name + input_params["npm_time_unit"] = npm_time_unit + input_params["npm_split_events"] = npm_split_events + + # Call the underlying Step 5 worker directly (no subprocess) + psthForEachStorename(input_params) + + # Also compute frequency/amplitude and transients occurrences (normally triggered by CLI main) + executeFindFreqAndAmp(input_params) diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..294c354 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,8 @@ +import os +import sys + +# Ensure the 'src' directory is on sys.path for tests without installation +PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +SRC_PATH = os.path.join(PROJECT_ROOT, "src") +if SRC_PATH not in sys.path: + sys.path.insert(0, SRC_PATH) diff --git a/tests/test_step1.py b/tests/test_step1.py new file mode 100644 index 0000000..a428531 --- /dev/null +++ b/tests/test_step1.py @@ -0,0 +1,98 @@ +import json +import os + +import numpy as np +import pytest + +from guppy.testing.api import step1 + + +@pytest.fixture(scope="function") +def default_parameters(): + return { + "combine_data": False, + "isosbestic_control": True, + "timeForLightsTurnOn": 1, + "filter_window": 100, + "removeArtifacts": False, + "noChannels": 2, + "zscore_method": "standard z-score", + "baselineWindowStart": 0, + "baselineWindowEnd": 0, + "nSecPrev": -10, + "nSecPost": 20, + "timeInterval": 2, + "bin_psth_trials": 0, + "use_time_or_trials": "Time (min)", + "baselineCorrectionStart": -5, + "baselineCorrectionEnd": 0, + "peak_startPoint": [ + -5.0, + 0.0, + 5.0, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan + ], + "peak_endPoint": [ + 0.0, + 3.0, + 10.0, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan, + np.nan + ], + "selectForComputePsth": "z_score", + "selectForTransientsComputation": "z_score", + "moving_window": 15, + "highAmpFilt": 2, + "transientsThresh": 3 + } + + +def test_step1(tmp_path, default_parameters): + # Arrange: base directory with two sessions under the same parent + session_names = ["session1", "session2"] + base_name = "data_root" + base_dir = tmp_path / base_name + base_dir.mkdir(parents=True, exist_ok=True) + sessions = [] + for name in session_names: + path = base_dir / name + path.mkdir(parents=True, exist_ok=True) + sessions.append(str(path)) + base_dir = str(base_dir) + + # Act: call actual Panel onclickProcess via the API helper (headless) + step1(base_dir=base_dir, selected_folders=sessions) + + # Assert: JSON written for each session with key defaults + for s in sessions: + out_fp = os.path.join(s, "GuPPyParamtersUsed.json") + assert os.path.exists(out_fp), f"Missing file: {out_fp}" + with open(out_fp, "r") as f: + data = json.load(f) + + # Check that JSON data matches default parameters + for key, expected_value in default_parameters.items(): + if isinstance(expected_value, np.ndarray): + np.testing.assert_array_equal(data[key], expected_value) + elif isinstance(expected_value, list) and any(isinstance(x, float) and np.isnan(x) for x in expected_value): + # Handle lists with NaN values + actual = data[key] + assert len(actual) == len(expected_value) + for i, (a, e) in enumerate(zip(actual, expected_value)): + if np.isnan(e): + assert np.isnan(a) or a is None, f"Mismatch at index {i}: expected NaN, got {a}" + else: + assert a == e, f"Mismatch at index {i}: expected {e}, got {a}" + else: + assert data[key] == expected_value, f"Mismatch for {key}: expected {expected_value}, got {data[key]}" diff --git a/tests/test_step2.py b/tests/test_step2.py new file mode 100644 index 0000000..e6815ab --- /dev/null +++ b/tests/test_step2.py @@ -0,0 +1,288 @@ +import csv +import os +import glob +import shutil +from pathlib import Path + +import pytest + +from guppy.testing.api import step2 + + +@pytest.mark.parametrize( + "session_subdir, storenames_map", + [ + ( + "SampleData_csv/sample_data_csv_1", + { + "Sample_Control_Channel": "control_region", + "Sample_Signal_Channel": "signal_region", + "Sample_TTL": "ttl", + }, + ), + ( + "SampleData_Doric/sample_doric_1", + { + "AIn-1 - Raw": "control_region", + "AIn-2 - Raw": "signal_region", + "DI--O-1": "ttl", + }, + ), + ( + "SampleData_Doric/sample_doric_2", + { + "AIn-1 - Dem (ref)": "control_region", + "AIn-1 - Dem (da)": "signal_region", + "DI/O-1": "ttl", + }, + ), + ( + "SampleData_Doric/sample_doric_3", + { + "CAM1_EXC1/ROI01": "control_region", + "CAM1_EXC2/ROI01": "signal_region", + "DigitalIO/CAM1": "ttl", + }, + ), + ( + "SampleData_Doric/sample_doric_4", + { + "Series0001/AIN01xAOUT01-LockIn": "control_region", + "Series0001/AIN01xAOUT02-LockIn": "signal_region", + }, + ), + ( + "SampleData_Doric/sample_doric_5", + { + "Series0001/AIN01xAOUT01-LockIn": "control_region", + "Series0001/AIN01xAOUT02-LockIn": "signal_region", + }, + ), + ( + "SampleData_Clean/Photo_63_207-181030-103332", + { + "Dv1A": "control_dms", + "Dv2A": "signal_dms", + "PrtN": "port_entries_dms", + }, + ), + ( + "SampleData_with_artifacts/Photo_048_392-200728-121222", + { + "Dv1A": "control_dms", + "Dv2A": "signal_dms", + "PrtN": "port_entries_dms", + }, + ), + # TODO: Add sampleData_NPM_1 after fixing Doric vs. NPM determination bug. + ( + "SampleData_Neurophotometrics/sampleData_NPM_2", + { + "file0_chev6": "control_region", + "file1_chev6": "signal_region", + }, + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_3", + { + "file0_chev3": "control_region3", + "file0_chod3": "signal_region3", + "event3": "ttl_region3", + }, + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_4", + { + "file0_chev1": "control_region1", + "file0_chod1": "signal_region1", + "eventTrue": "ttl_true_region1", + }, + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_5", + { + "file0_chev1": "control_region1", + "file0_chod1": "signal_region1", + "event0": "ttl_region1", + }, + ), + ], + ids=[ + "csv_generic", + "sample_doric_1", + "sample_doric_2", + "sample_doric_3", + "sample_doric_4", + "sample_doric_5", + "tdt_clean", + "tdt_with_artifacts", + "sample_npm_2", + "sample_npm_3", + "sample_npm_4", + "sample_npm_5", + ], +) +def test_step2(tmp_path, session_subdir, storenames_map): + """ + Step 2 integration test (Save Storenames) using real sample data, isolated to a temporary workspace. + For each dataset: + - Copies the session into a temp workspace + - Cleans any copied *_output_* artifacts (using a specific glob to avoid non-dirs) + - Calls step2 headlessly with an explicit, deterministic storenames_map + - Asserts storesList.csv exists and exactly matches the provided mapping (2xN) + """ + if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_3": + npm_timestamp_column_name = "ComputerTimestamp" + npm_time_unit = "milliseconds" + else: + npm_timestamp_column_name = None + npm_time_unit = None + if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_5": + npm_split_events = False + else: + npm_split_events = True + + # Source sample data + src_base_dir = str(Path(".") / "testing_data") + src_session = os.path.join(src_base_dir, session_subdir) + if not os.path.isdir(src_session): + pytest.skip(f"Sample data not available at expected path: {src_session}") + + # Stage a clean copy of the session into a temporary workspace + tmp_base = tmp_path / "data_root" + tmp_base.mkdir(parents=True, exist_ok=True) + dest_name = os.path.basename(src_session) + session_copy = tmp_base / dest_name + shutil.copytree(src_session, session_copy) + + # Remove any copied artifacts in the temp session; match only this session's output directory(ies) + for d in glob.glob(os.path.join(session_copy, f"{dest_name}_output_*")): + assert os.path.isdir(d), f"Expected output directory for cleanup, got non-directory: {d}" + shutil.rmtree(d) + + # Remove any copied GuPPyParamtersUsed.json to ensure a fresh run + params_fp = session_copy / "GuPPyParamtersUsed.json" + if params_fp.exists(): + params_fp.unlink() + + # Run Step 2 headlessly using the explicit mapping + step2(base_dir=str(tmp_base), selected_folders=[str(session_copy)], storenames_map=storenames_map, npm_timestamp_column_name=npm_timestamp_column_name, npm_time_unit=npm_time_unit, npm_split_events=npm_split_events) + + # Validate storesList.csv exists and matches the mapping exactly (order-preserved) + basename = os.path.basename(session_copy) + output_dirs = sorted(glob.glob(os.path.join(session_copy, f"{basename}_output_*"))) + assert output_dirs, f"No output directories found in {session_copy}" + + out_dir = None + for d in output_dirs: + if os.path.exists(os.path.join(d, "storesList.csv")): + out_dir = d + break + assert out_dir is not None, f"No storesList.csv found in any output directory under {session_copy}" + + out_fp = os.path.join(out_dir, "storesList.csv") + assert os.path.exists(out_fp), f"Missing storesList.csv: {out_fp}" + + with open(out_fp, newline="") as f: + reader = csv.reader(f) + rows = list(reader) + + assert len(rows) == 2, f"Expected 2 rows (storenames, names_for_storenames), got {len(rows)}" + assert rows[0] == list(storenames_map.keys()), "Row 0 (storenames) mismatch" + assert rows[1] == list(storenames_map.values()), "Row 1 (names_for_storenames) mismatch" + + # Additional NPM assertions: ensure Step 2 created the expected CSV files for Neurophotometrics + if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_1": + expected_files = [ + "bl72bl82_12feb2024_fp.csv", + "bl72bl82_12feb2024_stimuli.csv", + "eventAfVf.csv", + "eventAfVm.csv", + "eventAfVn.csv", + "eventAfVu.csv", + "eventAmVf.csv", + "eventAmVm.csv", + "eventAmVn.csv", + "eventAmVu.csv", + "eventAnVf.csv", + "eventAnVm.csv", + "eventAnVu.csv", + "eventAuVf.csv", + "eventAuVm.csv", + "eventAuVn.csv", + "eventAuVu.csv", + "eventblankvideo.csv", + "eventpinknoise.csv", + "eventtone.csv", + "eventwhitenoise.csv", + "file0_chev1.csv", + "file0_chod1.csv", + ] + for rel in expected_files: + fp = os.path.join(session_copy, rel) + assert os.path.exists(fp), f"Missing expected NPM file at Step 2: {fp}" + elif session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_2": + expected_files = [ + "file0_chev1.csv", + "file0_chev2.csv", + "file0_chev3.csv", + "file0_chev4.csv", + "file0_chev5.csv", + "file0_chev6.csv", + "file0_chev7.csv", + "file1_chev1.csv", + "file1_chev2.csv", + "file1_chev3.csv", + "file1_chev4.csv", + "file1_chev5.csv", + "file1_chev6.csv", + "file1_chev7.csv", + "FiberData415.csv", + "FiberData470.csv", + ] + for rel in expected_files: + fp = os.path.join(session_copy, rel) + assert os.path.exists(fp), f"Missing expected NPM file at Step 2: {fp}" + elif session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_3": + expected_files = [ + "event1.csv", + "event3.csv", + "file0_chev1.csv", + "file0_chev2.csv", + "file0_chev3.csv", + "file0_chod1.csv", + "file0_chod2.csv", + "file0_chod3.csv", + ] + for rel in expected_files: + fp = os.path.join(session_copy, rel) + assert os.path.exists(fp), f"Missing expected NPM file at Step 2: {fp}" + elif session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_4": + expected_files = [ + "eventTrue.csv", + "eventFalse.csv", + "file0_chev1.csv", + "file0_chev2.csv", + "file0_chev3.csv", + "file0_chod1.csv", + "file0_chod2.csv", + "file0_chod3.csv", + ] + for rel in expected_files: + fp = os.path.join(session_copy, rel) + assert os.path.exists(fp), f"Missing expected NPM file at Step 2: {fp}" + elif session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_5": + expected_files = [ + "event0.csv", + "file0_chev1.csv", + "file0_chev2.csv", + "file0_chev3.csv", + "file0_chod1.csv", + "file0_chod2.csv", + "file0_chod3.csv", + "PagCeAVgatFear_1512_1.csv", + "PagCeAVgatFear_1512_ts0.csv", + ] + for rel in expected_files: + fp = os.path.join(session_copy, rel) + assert os.path.exists(fp), f"Missing expected NPM file at Step 2: {fp}" diff --git a/tests/test_step3.py b/tests/test_step3.py new file mode 100644 index 0000000..ab945f5 --- /dev/null +++ b/tests/test_step3.py @@ -0,0 +1,212 @@ +import os +import csv +import glob +import shutil + +import h5py +import pytest +from pathlib import Path + +from guppy.testing.api import step2, step3 + + +@pytest.fixture(scope="function") +def storenames_map(): + return { + "Sample_Control_Channel": "control_region", + "Sample_Signal_Channel": "signal_region", + "Sample_TTL": "ttl", + } + +@pytest.mark.parametrize( + "session_subdir, storenames_map", + [ + ( + "SampleData_csv/sample_data_csv_1", + { + "Sample_Control_Channel": "control_region", + "Sample_Signal_Channel": "signal_region", + "Sample_TTL": "ttl", + }, + ), + ( + "SampleData_Doric/sample_doric_1", + { + "AIn-1 - Raw": "control_region", + "AIn-2 - Raw": "signal_region", + "DI--O-1": "ttl", + }, + ), + ( + "SampleData_Doric/sample_doric_2", + { + "AIn-1 - Dem (ref)": "control_region", + "AIn-1 - Dem (da)": "signal_region", + "DI/O-1": "ttl", + }, + ), + ( + "SampleData_Doric/sample_doric_3", + { + "CAM1_EXC1/ROI01": "control_region", + "CAM1_EXC2/ROI01": "signal_region", + "DigitalIO/CAM1": "ttl", + }, + ), + ( + "SampleData_Doric/sample_doric_4", + { + "Series0001/AIN01xAOUT01-LockIn": "control_region", + "Series0001/AIN01xAOUT02-LockIn": "signal_region", + }, + ), + ( + "SampleData_Doric/sample_doric_5", + { + "Series0001/AIN01xAOUT01-LockIn": "control_region", + "Series0001/AIN01xAOUT02-LockIn": "signal_region", + }, + ), + ( + "SampleData_Clean/Photo_63_207-181030-103332", + { + "Dv1A": "control_dms", + "Dv2A": "signal_dms", + "PrtN": "port_entries_dms", + }, + ), + ( + "SampleData_with_artifacts/Photo_048_392-200728-121222", + { + "Dv1A": "control_dms", + "Dv2A": "signal_dms", + "PrtN": "port_entries_dms", + }, + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_2", + { + "file0_chev6": "control_region", + "file1_chev6": "signal_region", + }, + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_3", + { + "file0_chev3": "control_region3", + "file0_chod3": "signal_region3", + "event3": "ttl_region3", + }, + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_4", + { + "file0_chev1": "control_region1", + "file0_chod1": "signal_region1", + "eventTrue": "ttl_true_region1", + }, + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_5", + { + "file0_chev1": "control_region1", + "file0_chod1": "signal_region1", + "event0": "ttl_region1", + }, + ), + ], + ids=[ + "csv_generic", + "sample_doric_1", + "sample_doric_2", + "sample_doric_3", + "sample_doric_4", + "sample_doric_5", + "tdt_clean", + "tdt_with_artifacts", + "sample_npm_2", + "sample_npm_3", + "sample_npm_4", + "sample_npm_5", + ], +) +def test_step3(tmp_path, storenames_map, session_subdir): + """ + Full integration test for Step 3 (Read Raw Data) using real CSV sample data, + isolated to a temporary workspace to avoid mutating shared sample data. + + Behavior: + - Copies the SampleData_csv session under GDriveSampleData into tmp_path. + - Cleans any copied artifacts (*_output_* dirs, GuPPyParamtersUsed.json). + - Derives a minimal storenames_map from the copied session and runs Step 2 + to create storesList.csv in the temp copy. + - Runs Step 3 headlessly and verifies per-storename HDF5 outputs exist in + the temp copy (never touching the original sample path). + """ + if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_3": + npm_timestamp_column_name = "ComputerTimestamp" + npm_time_unit = "milliseconds" + else: + npm_timestamp_column_name = None + npm_time_unit = None + if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_5": + npm_split_events = False + else: + npm_split_events = True + + src_base_dir = str(Path(".") / "testing_data") + src_session = os.path.join(src_base_dir, session_subdir) + + if not os.path.isdir(src_session): + pytest.skip(f"Sample data not available at expected path: {src_session}") + + # Stage a clean copy of the session into a temporary workspace + tmp_base = tmp_path / "data_root" + tmp_base.mkdir(parents=True, exist_ok=True) + dest_name = os.path.basename(src_session) + session_copy = tmp_base / dest_name + shutil.copytree(src_session, session_copy) + + # Remove any copied artifacts in the temp session + # Use a specific glob that uniquely matches this session's output directory(ies) + for d in glob.glob(os.path.join(session_copy, f"{dest_name}_output_*")): + assert os.path.isdir(d), f"Expected output directory for cleanup, got non-directory: {d}" + shutil.rmtree(d) + params_fp = session_copy / "GuPPyParamtersUsed.json" + if params_fp.exists(): + params_fp.unlink() + + # Step 2: create storesList.csv in the temp copy + step2(base_dir=str(tmp_base), selected_folders=[str(session_copy)], storenames_map=storenames_map, npm_timestamp_column_name=npm_timestamp_column_name, npm_time_unit=npm_time_unit, npm_split_events=npm_split_events) + + # Step 3: read raw data in the temp copy + step3(base_dir=str(tmp_base), selected_folders=[str(session_copy)], npm_timestamp_column_name=npm_timestamp_column_name, npm_time_unit=npm_time_unit, npm_split_events=npm_split_events) + + # Validate outputs exist in the temp copy + basename = os.path.basename(session_copy) + output_dirs = sorted(glob.glob(os.path.join(session_copy, f"{basename}_output_*"))) + assert output_dirs, f"No output directories found in {session_copy}" + out_dir = None + for d in output_dirs: + if os.path.exists(os.path.join(d, "storesList.csv")): + out_dir = d + break + assert out_dir is not None, f"No storesList.csv found in any output directory under {session_copy}" + stores_fp = os.path.join(out_dir, "storesList.csv") + + # Assert: storesList.csv structure is 2xN + with open(stores_fp, newline="") as f: + reader = csv.reader(f) + rows = list(reader) + assert len(rows) == 2, "storesList.csv should be 2 rows (storenames, names_for_storenames)" + storenames = rows[0] + assert storenames, "Expected at least one storename in storesList.csv" + + # For each storename, ensure an HDF5 was produced; minimally check timestamps dataset exists. + for storename in storenames: + safe = storename.replace("\\", "_").replace("/", "_") + h5_path = os.path.join(out_dir, f"{safe}.hdf5") + assert os.path.exists(h5_path), f"Missing HDF5 for storename {storename!r} at {h5_path}" + + with h5py.File(h5_path, "r") as f: + assert "timestamps" in f, "Expected 'timestamps' dataset in HDF5" diff --git a/tests/test_step4.py b/tests/test_step4.py new file mode 100644 index 0000000..ec972d7 --- /dev/null +++ b/tests/test_step4.py @@ -0,0 +1,231 @@ +import os +import glob +import shutil + +import h5py +import pytest +from pathlib import Path + +from guppy.testing.api import step2, step3, step4 + +@pytest.mark.parametrize( + "session_subdir, storenames_map, expected_region, expected_ttl", + [ + ( + "SampleData_csv/sample_data_csv_1", + { + "Sample_Control_Channel": "control_region", + "Sample_Signal_Channel": "signal_region", + "Sample_TTL": "ttl", + }, + "region", + "ttl", + ), + ( + "SampleData_Doric/sample_doric_1", + { + "AIn-1 - Raw": "control_region", + "AIn-2 - Raw": "signal_region", + "DI--O-1": "ttl", + }, + "region", + "ttl", + ), + ( + "SampleData_Doric/sample_doric_2", + { + "AIn-1 - Dem (ref)": "control_region", + "AIn-1 - Dem (da)": "signal_region", + "DI/O-1": "ttl", + }, + "region", + "ttl", + ), + ( + "SampleData_Doric/sample_doric_3", + { + "CAM1_EXC1/ROI01": "control_region", + "CAM1_EXC2/ROI01": "signal_region", + "DigitalIO/CAM1": "ttl", + }, + "region", + "ttl", + ), + ( + "SampleData_Doric/sample_doric_4", + { + "Series0001/AIN01xAOUT01-LockIn": "control_region", + "Series0001/AIN01xAOUT02-LockIn": "signal_region", + }, + "region", + None, + ), + ( + "SampleData_Doric/sample_doric_5", + { + "Series0001/AIN01xAOUT01-LockIn": "control_region", + "Series0001/AIN01xAOUT02-LockIn": "signal_region", + }, + "region", + None, + ), + ( + "SampleData_Clean/Photo_63_207-181030-103332", + { + "Dv1A": "control_dms", + "Dv2A": "signal_dms", + "PrtN": "port_entries_dms", + }, + "dms", + "port_entries_dms", + ), + ( + "SampleData_with_artifacts/Photo_048_392-200728-121222", + { + "Dv1A": "control_dms", + "Dv2A": "signal_dms", + "PrtN": "port_entries_dms", + }, + "dms", + "port_entries_dms", + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_2", + { + "file0_chev6": "control_region", + "file1_chev6": "signal_region", + }, + "region", + None, + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_3", + { + "file0_chev3": "control_region3", + "file0_chod3": "signal_region3", + "event3": "ttl_region3", + }, + "region3", + "ttl_region3", + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_4", + { + "file0_chev1": "control_region1", + "file0_chod1": "signal_region1", + "eventTrue": "ttl_true_region1", + }, + "region1", + "ttl_true_region1", + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_5", + { + "file0_chev1": "control_region1", + "file0_chod1": "signal_region1", + "event0": "ttl_region1", + }, + "region1", + "ttl_region1", + ), + ], + ids=[ + "csv_generic", + "sample_doric_1", + "sample_doric_2", + "sample_doric_3", + "sample_doric_4", + "sample_doric_5", + "tdt_clean", + "tdt_with_artifacts", + "sample_npm_2", + "sample_npm_3", + "sample_npm_4", + "sample_npm_5", + ], +) +@pytest.mark.filterwarnings("ignore::UserWarning") +def test_step4(tmp_path, monkeypatch, session_subdir, storenames_map, expected_region, expected_ttl): + """ + Full integration test for Step 4 (Extract timestamps and signal) using real CSV sample data, + isolated to a temporary workspace to avoid mutating shared sample data. + + Pipeline executed on a temp copy: + - Step 2: create storesList.csv (derived from sample data if not present) + - Step 3: read raw data (per-storename HDF5 files) + - Step 4: extract timestamps/signal, compute z-score/dFF, time corrections, etc. + + Notes: + - matplotlib plotting in preprocess uses a GUI backend; to avoid blocking, we stub plt.show(). + - Assertions confirm creation of key HDF5 outputs expected from Step 4. + """ + if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_3": + npm_timestamp_column_name = "ComputerTimestamp" + npm_time_unit = "milliseconds" + else: + npm_timestamp_column_name = None + npm_time_unit = None + if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_5": + npm_split_events = False + else: + npm_split_events = True + + # Use the CSV sample session + src_base_dir = str(Path(".") / "testing_data") + src_session = os.path.join(src_base_dir, session_subdir) + if not os.path.isdir(src_session): + pytest.skip(f"Sample data not available at expected path: {src_session}") + + # Stub matplotlib.pyplot.show to avoid GUI blocking + import matplotlib.pyplot as plt # noqa: F401 + monkeypatch.setattr("matplotlib.pyplot.show", lambda *args, **kwargs: None) + + # Stage a clean copy of the session into a temporary workspace + tmp_base = tmp_path / "data_root" + tmp_base.mkdir(parents=True, exist_ok=True) + dest_name = os.path.basename(src_session) + session_copy = tmp_base / dest_name + shutil.copytree(src_session, session_copy) + + # Remove any copied artifacts in the temp session (match only this session's output dirs) + for d in glob.glob(os.path.join(session_copy, f"{dest_name}_output_*")): + assert os.path.isdir(d), f"Expected output directory for cleanup, got non-directory: {d}" + shutil.rmtree(d) + params_fp = session_copy / "GuPPyParamtersUsed.json" + if params_fp.exists(): + params_fp.unlink() + + # Step 2: create storesList.csv in the temp copy + step2(base_dir=str(tmp_base), selected_folders=[str(session_copy)], storenames_map=storenames_map, npm_timestamp_column_name=npm_timestamp_column_name, npm_time_unit=npm_time_unit, npm_split_events=npm_split_events) + + # Step 3: read raw data in the temp copy + step3(base_dir=str(tmp_base), selected_folders=[str(session_copy)], npm_timestamp_column_name=npm_timestamp_column_name, npm_time_unit=npm_time_unit, npm_split_events=npm_split_events) + + # Step 4: extract timestamps and signal in the temp copy + step4(base_dir=str(tmp_base), selected_folders=[str(session_copy)], npm_timestamp_column_name=npm_timestamp_column_name, npm_time_unit=npm_time_unit, npm_split_events=npm_split_events) + + # Validate outputs exist in the temp copy + basename = os.path.basename(session_copy) + output_dirs = sorted(glob.glob(os.path.join(session_copy, f"{basename}_output_*"))) + assert output_dirs, f"No output directories found in {session_copy}" + out_dir = None + for d in output_dirs: + if os.path.exists(os.path.join(d, "storesList.csv")): + out_dir = d + break + assert out_dir is not None, f"No storesList.csv found in any output directory under {session_copy}" + stores_fp = os.path.join(out_dir, "storesList.csv") + assert os.path.exists(stores_fp), "Missing storesList.csv after Step 2/3/4" + + # Ensure timeCorrection_.hdf5 exists with 'timestampNew' + timecorr = os.path.join(out_dir, f"timeCorrection_{expected_region}.hdf5") + assert os.path.exists(timecorr), f"Missing {timecorr}" + with h5py.File(timecorr, "r") as f: + assert "timestampNew" in f, f"Expected 'timestampNew' dataset in {timecorr}" + + # If TTLs exist, check their per-region 'ts' outputs + if expected_ttl is not None: + ttl_fp = os.path.join(out_dir, f"{expected_ttl}_{expected_region}.hdf5") + assert os.path.exists(ttl_fp), f"Missing TTL-aligned file {ttl_fp}" + with h5py.File(ttl_fp, "r") as f: + assert "ts" in f, f"Expected 'ts' dataset in {ttl_fp}" diff --git a/tests/test_step5.py b/tests/test_step5.py new file mode 100644 index 0000000..5f93660 --- /dev/null +++ b/tests/test_step5.py @@ -0,0 +1,251 @@ +import os +import glob +import shutil + +import pytest +import pandas as pd +from pathlib import Path + +from guppy.testing.api import step2, step3, step4, step5 + + +@pytest.mark.parametrize( + "session_subdir, storenames_map, expected_region, expected_ttl", + [ + ( + "SampleData_csv/sample_data_csv_1", + { + "Sample_Control_Channel": "control_region", + "Sample_Signal_Channel": "signal_region", + "Sample_TTL": "ttl", + }, + "region", + "ttl", + ), + ( + "SampleData_Doric/sample_doric_1", + { + "AIn-1 - Raw": "control_region", + "AIn-2 - Raw": "signal_region", + "DI--O-1": "ttl", + }, + "region", + "ttl", + ), + ( + "SampleData_Doric/sample_doric_2", + { + "AIn-1 - Dem (ref)": "control_region", + "AIn-1 - Dem (da)": "signal_region", + "DI/O-1": "ttl", + }, + "region", + "ttl", + ), + ( + "SampleData_Doric/sample_doric_3", + { + "CAM1_EXC1/ROI01": "control_region", + "CAM1_EXC2/ROI01": "signal_region", + "DigitalIO/CAM1": "ttl", + }, + "region", + "ttl", + ), + ( + "SampleData_Doric/sample_doric_4", + { + "Series0001/AIN01xAOUT01-LockIn": "control_region", + "Series0001/AIN01xAOUT02-LockIn": "signal_region", + }, + "region", + None, + ), + ( + "SampleData_Doric/sample_doric_5", + { + "Series0001/AIN01xAOUT01-LockIn": "control_region", + "Series0001/AIN01xAOUT02-LockIn": "signal_region", + }, + "region", + None, + ), + ( + "SampleData_Clean/Photo_63_207-181030-103332", + { + "Dv1A": "control_dms", + "Dv2A": "signal_dms", + "PrtN": "port_entries_dms", + }, + "dms", + "port_entries_dms", + ), + ( + "SampleData_with_artifacts/Photo_048_392-200728-121222", + { + "Dv1A": "control_dms", + "Dv2A": "signal_dms", + "PrtN": "port_entries_dms", + }, + "dms", + "port_entries_dms", + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_2", + { + "file0_chev6": "control_region", + "file1_chev6": "signal_region", + }, + "region", + None, + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_3", + { + "file0_chev3": "control_region3", + "file0_chod3": "signal_region3", + "event3": "ttl_region3", + }, + "region3", + "ttl_region3", + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_4", + { + "file0_chev1": "control_region1", + "file0_chod1": "signal_region1", + "eventTrue": "ttl_true_region1", + }, + "region1", + "ttl_true_region1", + ), + ( + "SampleData_Neurophotometrics/sampleData_NPM_5", + { + "file0_chev1": "control_region1", + "file0_chod1": "signal_region1", + "event0": "ttl_region1", + }, + "region1", + "ttl_region1", + ), + ], + ids=[ + "csv_generic", + "sample_doric_1", + "sample_doric_2", + "sample_doric_3", + "sample_doric_4", + "sample_doric_5", + "tdt_clean", + "tdt_with_artifacts", + "sample_npm_2", + "sample_npm_3", + "sample_npm_4", + "sample_npm_5", + ], +) +@pytest.mark.filterwarnings("ignore::UserWarning") +def test_step5(tmp_path, monkeypatch, session_subdir, storenames_map, expected_region, expected_ttl): + """ + Full integration test for Step 5 (PSTH Computation) using real CSV sample data, + isolated to a temporary workspace to avoid mutating shared sample data. + + Pipeline executed on a temp copy: + - Step 2: save storenames (storesList.csv) + - Step 3: read raw data (per-storename HDF5 outputs) + - Step 4: extract timestamps/signal, z-score/dFF, time corrections + - Step 5: compute PSTH and peak/AUC outputs + + Notes: + - matplotlib plotting in earlier steps may use a GUI backend; stub plt.show() to avoid blocking. + - Assertions confirm creation and basic readability of PSTH-related outputs from Step 5. + - Defaults are used for input parameters; PSTH computation defaults to z_score. + """ + if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_3": + npm_timestamp_column_name = "ComputerTimestamp" + npm_time_unit = "milliseconds" + else: + npm_timestamp_column_name = None + npm_time_unit = None + if session_subdir == "SampleData_Neurophotometrics/sampleData_NPM_5": + npm_split_events = False + else: + npm_split_events = True + + # Use the sample session + src_base_dir = str(Path(".") / "testing_data") + src_session = os.path.join(src_base_dir, session_subdir) + if not os.path.isdir(src_session): + pytest.skip(f"Sample data not available at expected path: {src_session}") + + # Stub matplotlib.pyplot.show to avoid GUI blocking (used in earlier steps) + import matplotlib.pyplot as plt # noqa: F401 + monkeypatch.setattr("matplotlib.pyplot.show", lambda *args, **kwargs: None) + + # Stage a clean copy of the session into a temporary workspace + tmp_base = tmp_path / "data_root" + tmp_base.mkdir(parents=True, exist_ok=True) + dest_name = os.path.basename(src_session) + session_copy = tmp_base / dest_name + shutil.copytree(src_session, session_copy) + + # Remove any copied artifacts in the temp session (match only this session's output dirs) + for d in glob.glob(os.path.join(session_copy, f"{dest_name}_output_*")): + assert os.path.isdir(d), f"Expected output directory for cleanup, got non-directory: {d}" + shutil.rmtree(d) + params_fp = session_copy / "GuPPyParamtersUsed.json" + if params_fp.exists(): + params_fp.unlink() + + # Step 2: create storesList.csv in the temp copy with explicit naming + step2(base_dir=str(tmp_base), selected_folders=[str(session_copy)], storenames_map=storenames_map, npm_timestamp_column_name=npm_timestamp_column_name, npm_time_unit=npm_time_unit, npm_split_events=npm_split_events) + + # Step 3: read raw data in the temp copy + step3(base_dir=str(tmp_base), selected_folders=[str(session_copy)], npm_timestamp_column_name=npm_timestamp_column_name, npm_time_unit=npm_time_unit, npm_split_events=npm_split_events) + + # Step 4: extract timestamps and signal in the temp copy + step4(base_dir=str(tmp_base), selected_folders=[str(session_copy)], npm_timestamp_column_name=npm_timestamp_column_name, npm_time_unit=npm_time_unit, npm_split_events=npm_split_events) + + # Step 5: compute PSTH in the temp copy (headless) + step5(base_dir=str(tmp_base), selected_folders=[str(session_copy)], npm_timestamp_column_name=npm_timestamp_column_name, npm_time_unit=npm_time_unit, npm_split_events=npm_split_events) + + # Locate output directory + basename = os.path.basename(session_copy) + output_dirs = sorted(glob.glob(os.path.join(session_copy, f"{basename}_output_*"))) + assert output_dirs, f"No output directories found in {session_copy}" + out_dir = None + for d in output_dirs: + if os.path.exists(os.path.join(d, "storesList.csv")): + out_dir = d + break + assert out_dir is not None, f"No storesList.csv found in any output directory under {session_copy}" + stores_fp = os.path.join(out_dir, "storesList.csv") + assert os.path.exists(stores_fp), "Missing storesList.csv after Steps 2-5" + + # Expected PSTH outputs (defaults compute z_score PSTH) - only for datasets with TTLs + if expected_ttl is not None: + psth_h5 = os.path.join(out_dir, f"{expected_ttl}_{expected_region}_z_score_{expected_region}.h5") + psth_baseline_uncorr_h5 = os.path.join(out_dir, f"{expected_ttl}_{expected_region}_baselineUncorrected_z_score_{expected_region}.h5") + peak_auc_h5 = os.path.join(out_dir, f"peak_AUC_{expected_ttl}_{expected_region}_z_score_{expected_region}.h5") + peak_auc_csv = os.path.join(out_dir, f"peak_AUC_{expected_ttl}_{expected_region}_z_score_{expected_region}.csv") + + # Assert file creation + assert os.path.exists(psth_h5), f"Missing PSTH HDF5: {psth_h5}" + assert os.path.exists(psth_baseline_uncorr_h5), f"Missing baseline-uncorrected PSTH HDF5: {psth_baseline_uncorr_h5}" + assert os.path.exists(peak_auc_h5), f"Missing PSTH Peak/AUC HDF5: {peak_auc_h5}" + assert os.path.exists(peak_auc_csv), f"Missing PSTH Peak/AUC CSV: {peak_auc_csv}" + + # Basic readability checks: PSTH HDF5 contains a DataFrame with expected columns + df = pd.read_hdf(psth_h5, key="df") + assert "timestamps" in df.columns, f"'timestamps' column missing in {psth_h5}" + # The DataFrame should include a 'mean' column per create_Df implementation + assert "mean" in df.columns, f"'mean' column missing in {psth_h5}" + + # Additional artifacts from transients frequency/amplitude computation (Step 5 side-effect) + freq_amp_h5 = os.path.join(out_dir, f"freqAndAmp_z_score_{expected_region}.h5") + freq_amp_csv = os.path.join(out_dir, f"freqAndAmp_z_score_{expected_region}.csv") + trans_occ_csv = os.path.join(out_dir, f"transientsOccurrences_z_score_{expected_region}.csv") + assert os.path.exists(freq_amp_h5), f"Missing freq/amp HDF5: {freq_amp_h5}" + assert os.path.exists(freq_amp_csv), f"Missing freq/amp CSV: {freq_amp_csv}" + assert os.path.exists(trans_occ_csv), f"Missing transients occurrences CSV: {trans_occ_csv}"