Merge pull request #90 from scipp/mcstas-to-nexus

nvaytet · web-flow · commit 3f1b8bd11f60 · 2025-06-26T11:14:26.000+02:00
Mcstas to nexus
diff --git a/tools/mcstas_to_nexus.ipynb b/tools/mcstas_to_nexus.ipynb
@@ -0,0 +1,272 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0",
+   "metadata": {},
+   "source": [
+    "# McStas to NeXus for ODIN\n",
+    "\n",
+    "This notebook converts data from a McStas simulation output (`.h5` filetype) to a NeXus file that uses a file for the Odin instrument (written by CODA) as a template for the geometry information.\n",
+    "\n",
+    "It adds events to the `timepix3` detector and the `beam_monitor_3` monitor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import scipp as sc\n",
+    "import scippnexus as sx\n",
+    "import h5py as h5\n",
+    "import shutil"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_mcstas_simulation_data(\n",
+    "    file_path,\n",
+    "    nevents,\n",
+    "    data_path=\"entry1/data/transmission_event_signal_dat_list_p_t_x_y_z_vx_vy_vz/events\",\n",
+    "):\n",
+    "    nevents = int(nevents)\n",
+    "    with sx.File(file_path, \"r\") as f:\n",
+    "        # The name p_t_x_y_z_vx_vy_vz represents\n",
+    "        # probability, time of arrival, position(x, y, z) and velocity(vx, vy, vz).\n",
+    "        # The name also represents the order of each field in the table.\n",
+    "        # For example, probability is the first field, so data['dim_1', 0] is the probability.\n",
+    "        data = f[data_path][()].rename_dims({\"dim_0\": \"event\"})\n",
+    "\n",
+    "    probabilities = data[\"dim_1\", 0].copy()\n",
+    "    probabilities.unit = \"dimensionless\"\n",
+    "    time_of_arrival = data[\"dim_1\", 1].copy()\n",
+    "    time_of_arrival.unit = \"s\"  # Hardcoded unit from the data.\n",
+    "    positions = data[\"dim_1\", 2:5]\n",
+    "    # Units are hardcoded from the data.\n",
+    "    x_pos = positions[\"dim_1\", 0].copy()\n",
+    "    x_pos.unit = \"m\"\n",
+    "    y_pos = positions[\"dim_1\", 1].copy()\n",
+    "    y_pos.unit = \"m\"\n",
+    "\n",
+    "    inds = np.random.choice(\n",
+    "        np.arange(data.sizes[\"event\"]),\n",
+    "        nevents,\n",
+    "        p=probabilities.values / probabilities.values.sum(),\n",
+    "    )\n",
+    "\n",
+    "    out = sc.DataArray(\n",
+    "        data=sc.array(\n",
+    "            dims=probabilities.dims,\n",
+    "            values=np.ones(nevents),\n",
+    "            unit=\"counts\",\n",
+    "        ),\n",
+    "        coords={\n",
+    "            \"time_of_arrival\": sc.array(\n",
+    "                dims=time_of_arrival.dims,\n",
+    "                values=time_of_arrival.to(unit=\"us\").values[inds],\n",
+    "                unit=\"us\",\n",
+    "            ),\n",
+    "            \"x\": sc.array(\n",
+    "                dims=x_pos.dims,\n",
+    "                values=x_pos.values[inds],\n",
+    "                unit=x_pos.unit,\n",
+    "            ),\n",
+    "            \"y\": sc.array(\n",
+    "                dims=y_pos.dims,\n",
+    "                values=y_pos.values[inds],\n",
+    "                unit=y_pos.unit,\n",
+    "            ),\n",
+    "        },\n",
+    "    )\n",
+    "    return out"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def replace_dataset(entry, name, values):\n",
+    "    attrs = dict(entry[name].attrs)\n",
+    "    del entry[name]\n",
+    "    dset = entry.create_dataset(name, data=values)\n",
+    "    dset.attrs.update(attrs)\n",
+    "\n",
+    "\n",
+    "def mcstas_to_nexus(\n",
+    "    mcstas_data_file: str,\n",
+    "    template_nexus_file: str,\n",
+    "    outfile: str,\n",
+    "    nevents: float = 1e6,\n",
+    "    detector_entry_path: str | None = \"entry/instrument/event_mode_detectors/timepix3\",\n",
+    "    monitor_entry_path: str | None = \"entry/instrument/beam_monitor_3\",\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    Store the events from a McStas Odin simulation in a NeXus CODA file.\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    mcstas_data_file:\n",
+    "        Data file containing simulated McStas events.\n",
+    "    template_nexus_file:\n",
+    "        NeXus file containing geometry and instrument info, used as a template.\n",
+    "    outfile:\n",
+    "        Output file to be written.\n",
+    "    nevents:\n",
+    "        Number of events to have in the output file\n",
+    "        (events are sampled from the probabilities of the mcstas events).\n",
+    "    detector_entry_path:\n",
+    "        Location of the event detector in the nexus tree.\n",
+    "    monitor_entry_path:\n",
+    "        Location of the event monitor in the nexus tree.\n",
+    "    \"\"\"\n",
+    "    # Find shape of detector panel\n",
+    "    with h5.File(template_nexus_file, \"r\") as f:\n",
+    "        shape = f[f\"{detector_entry_path}/x_pixel_offset\"].shape\n",
+    "        det_numbers = f[f\"{detector_entry_path}/detector_number\"][()]\n",
+    "\n",
+    "    da = load_mcstas_simulation_data(mcstas_data_file, nevents=int(nevents))\n",
+    "    binned = da.bin(y=shape[0], x=shape[1]).rename_dims(y=\"dim_0\", x=\"dim_1\")\n",
+    "\n",
+    "    toa = binned.bins.coords[\"time_of_arrival\"].bins.concat().value\n",
+    "\n",
+    "    # IMPORTANT! we need to sort the arrays below according to toa,\n",
+    "    # so that the event_index does not get messed up!\n",
+    "    event_id = sc.sort(\n",
+    "        (\n",
+    "            sc.bins_like(binned, sc.array(dims=binned.dims, values=det_numbers))\n",
+    "            .bins.concat()\n",
+    "            .value\n",
+    "        ),\n",
+    "        key=toa,\n",
+    "    )\n",
+    "\n",
+    "    unit = \"ns\"\n",
+    "    period = (1.0 / sc.scalar(14.0, unit=\"Hz\")).to(unit=unit)\n",
+    "    start = sc.datetime(\"2024-01-01T12:00:00.000000000\")\n",
+    "\n",
+    "    event_time_zero = sc.sort(\n",
+    "        (period * (toa.to(unit=\"ns\", copy=False) // period)).to(dtype=int) + start,\n",
+    "        key=toa,\n",
+    "    )\n",
+    "\n",
+    "    event_time_offset = sc.sort(toa % period.to(unit=toa.unit), key=toa)\n",
+    "\n",
+    "    event_index = sc.DataArray(\n",
+    "        data=sc.ones_like(event_time_offset),\n",
+    "        coords={\"event_time_zero\": event_time_zero},\n",
+    "    ).group(\"event_time_zero\")\n",
+    "\n",
+    "    event_index = sc.cumsum(event_index.bins.size())\n",
+    "    event_index.values = np.concatenate([[0], event_index.values[:-1]])\n",
+    "\n",
+    "    # Now edit the template file\n",
+    "    print(f\"Writing {outfile} file\")\n",
+    "    shutil.copyfile(template_nexus_file, outfile)\n",
+    "    f = h5.File(outfile, \"r+\")\n",
+    "\n",
+    "    # Detector data\n",
+    "    if detector_entry_path is not None:\n",
+    "        # remove translation offset\n",
+    "        f[f\"{detector_entry_path}/transformations/translation\"].attrs[\"offset\"] = (\n",
+    "            np.array([0, 0, 0], dtype=\"float32\")\n",
+    "        )\n",
+    "\n",
+    "        event_data = f[f\"{detector_entry_path}/timepix3_events\"]\n",
+    "        replace_dataset(event_data, name=\"event_id\", values=event_id.values)\n",
+    "        replace_dataset(\n",
+    "            event_data,\n",
+    "            name=\"event_time_offset\",\n",
+    "            values=event_time_offset.to(\n",
+    "                unit=event_data[\"event_time_offset\"].attrs[\"units\"], copy=False\n",
+    "            ).values,\n",
+    "        )\n",
+    "        replace_dataset(event_data, name=\"event_index\", values=event_index.values)\n",
+    "        replace_dataset(\n",
+    "            event_data,\n",
+    "            name=\"event_time_zero\",\n",
+    "            values=event_index.coords[\"event_time_zero\"]\n",
+    "            .to(unit=event_data[\"event_time_zero\"].attrs[\"units\"], copy=False)\n",
+    "            .values.astype(int),\n",
+    "        )\n",
+    "\n",
+    "    # Monitor data\n",
+    "    if monitor_entry_path is not None:\n",
+    "        monitor_data = f[f\"{monitor_entry_path}/monitor_3_events\"]\n",
+    "        replace_dataset(\n",
+    "            monitor_data, name=\"event_id\", values=np.zeros_like(event_id.values)\n",
+    "        )\n",
+    "        replace_dataset(\n",
+    "            monitor_data,\n",
+    "            name=\"event_time_offset\",\n",
+    "            values=event_time_offset.to(\n",
+    "                unit=monitor_data[\"event_time_offset\"].attrs[\"units\"], copy=False\n",
+    "            ).values,\n",
+    "        )\n",
+    "        replace_dataset(monitor_data, name=\"event_index\", values=event_index.values)\n",
+    "        replace_dataset(\n",
+    "            monitor_data,\n",
+    "            name=\"event_time_zero\",\n",
+    "            values=event_index.coords[\"event_time_zero\"]\n",
+    "            .to(unit=monitor_data[\"event_time_zero\"].attrs[\"units\"], copy=False)\n",
+    "            .values.astype(int),\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "files = [\n",
+    "    \"small_mcstas_sample_images.h5\",\n",
+    "    \"small_mcstas_ob_images.h5\",\n",
+    "    \"iron_simulation_sample.h5\",\n",
+    "    \"iron_simulation_ob.h5\",\n",
+    "]\n",
+    "\n",
+    "for file in files:\n",
+    "    mcstas_to_nexus(\n",
+    "        mcstas_data_file=file,\n",
+    "        template_nexus_file=\"977695_00072982.hdf\",\n",
+    "        outfile=file.replace(\".h5\", \".nxs\"),\n",
+    "        nevents=1e6,\n",
+    "    )"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}