From 1cfcd0df5292fab8a2df1989c576f7d4eb8af708 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Tue, 11 Jun 2024 20:53:01 +0200 Subject: [PATCH 1/4] adapt the neighbour visualization to the new state of the module --- experimentation/visualize-neighbours.ipynb | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/experimentation/visualize-neighbours.ipynb b/experimentation/visualize-neighbours.ipynb index b60816b..17bac08 100644 --- a/experimentation/visualize-neighbours.ipynb +++ b/experimentation/visualize-neighbours.ipynb @@ -13,7 +13,7 @@ "import pandas as pd\n", "import shapely\n", "\n", - "import healpix_convolution.neighbours as nb" + "import healpix_convolution" ] }, { @@ -142,7 +142,7 @@ "source": [ "resolution = 1\n", "cell_ids = np.array([25], dtype=\"int64\")\n", - "neighbours = nb.neighbours(\n", + "neighbours = healpix_convolution.neighbours(\n", " cell_ids, resolution=resolution, indexing_scheme=\"nested\", ring=2\n", ")\n", "\n", @@ -207,7 +207,9 @@ "resolution = 2\n", "cell_ids = np.arange(4**resolution, 2 * 4**resolution)\n", "\n", - "actual = nb.neighbours(cell_ids, resolution=resolution, indexing_scheme=\"nested\")\n", + "actual = healpix_convolution.neighbours(\n", + " cell_ids, resolution=resolution, indexing_scheme=\"nested\"\n", + ")\n", "expected_ = hp.get_all_neighbours(2**resolution, cell_ids, nest=True).T\n", "expected = np.concatenate([cell_ids[:, None], expected_], axis=1)" ] From 54abaf43d981c7b7db37af91b8d2271da653fad2 Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Tue, 11 Jun 2024 20:53:26 +0200 Subject: [PATCH 2/4] add a notebook that demonstrates the convolution --- experimentation/convolution.ipynb | 377 ++++++++++++++++++++++++++++++ 1 file changed, 377 insertions(+) create mode 100644 experimentation/convolution.ipynb diff --git a/experimentation/convolution.ipynb b/experimentation/convolution.ipynb new file mode 100644 index 0000000..f602aba --- /dev/null +++ b/experimentation/convolution.ipynb @@ -0,0 +1,377 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "import distributed\n", + "\n", + "client = distributed.Client()\n", + "client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "import xdggs" + ] + }, + { + "cell_type": "raw", + "id": "2", + "metadata": {}, + "source": [ + "def clean_healpix_params(ds):\n", + " out = ds.copy()\n", + " params = out[\"cell_ids\"].attrs\n", + " params.pop(\"nside\")\n", + " params.pop(\"grid_type\")\n", + " params.pop(\"lat\")\n", + " params.pop(\"lon\")\n", + " params.pop(\"rot_lat\")\n", + " params.pop(\"rot_lon\")\n", + "\n", + " return out" + ] + }, + { + "cell_type": "raw", + "id": "3", + "metadata": {}, + "source": [ + "def cast_cell_ids(ds):\n", + " return ds.assign_coords(cell_ids=lambda ds: ds[\"cell_ids\"].astype(\"int64\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "url = \"/home/jmagin/work/data/destine/average_surface_temperature.zarr\"\n", + "ds = (\n", + " xr.open_dataset(url, engine=\"zarr\", chunks={})\n", + " .isel(oceanModelLayer=0)\n", + " .pipe(xdggs.decode)\n", + ")\n", + "ds" + ] + }, + { + "cell_type": "raw", + "id": "5", + "metadata": {}, + "source": [ + "url = \"/home/jmagin/work/data/air_temperature_healpix.nc\"\n", + "ds = (\n", + " xr.open_dataset(url, engine=\"netcdf4\", chunks={\"time\": 100})\n", + " .pipe(clean_healpix_params)\n", + " .pipe(cast_cell_ids)\n", + " #.assign_coords(cell_ids=lambda ds: ds[\"cell_ids\"].compute())\n", + " #.pipe(xdggs.decode)\n", + ")\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "import xdggs # noqa: F401\n", + "\n", + "from healpix_convolution.kernels import gaussian\n", + "\n", + "\n", + "def gaussian_kernel(\n", + " cell_ids, sigma: float, truncate: float = 4.0, kernel_size: int | None = None\n", + "):\n", + " \"\"\"Create a symmetric gaussian kernel for the given cell ids\n", + "\n", + " Parameters\n", + " ----------\n", + " cell_ids : xarray.DataArray\n", + " The cell ids. Must be valid according to xdggs.\n", + " sigma : float\n", + " The standard deviation of the gaussian function in radians.\n", + " truncate : float, default: 4.0\n", + " Truncate the kernel after this many multiples of sigma.\n", + " kernel_size : int, optional\n", + " If given, will be used instead of ``truncate`` to determine the size of the kernel.\n", + "\n", + " Returns\n", + " -------\n", + " kernel : xarray.DataArray\n", + " The kernel as a sparse matrix.\n", + " \"\"\"\n", + " dims = list(cell_ids.dims)\n", + "\n", + " grid = xdggs.healpix.HealpixInfo.from_dict(cell_ids.attrs)\n", + "\n", + " matrix = xr.apply_ufunc(\n", + " gaussian.gaussian_kernel,\n", + " cell_ids,\n", + " kwargs={\n", + " \"resolution\": grid.resolution,\n", + " \"indexing_scheme\": grid.indexing_scheme,\n", + " \"sigma\": sigma,\n", + " \"truncate\": truncate,\n", + " \"kernel_size\": kernel_size,\n", + " },\n", + " input_core_dims=[dims],\n", + " output_core_dims=[[\"output_cells\", \"input_cells\"]],\n", + " dask=\"allowed\",\n", + " keep_attrs=\"drop\",\n", + " )\n", + "\n", + " if kernel_size is not None:\n", + " size_param = {\"kernel_size\": kernel_size}\n", + " else:\n", + " size_param = {\"truncate\": truncate}\n", + "\n", + " return matrix.assign_attrs(\n", + " {\"kernel_type\": \"gaussian\", \"method\": \"continuous\", \"sigma\": sigma} | size_param\n", + " ).assign_coords(\n", + " input_cell_ids=cell_ids.swap_dims({\"cells\": \"input_cells\"}).variable,\n", + " output_cell_ids=cell_ids.swap_dims({\"cells\": \"output_cells\"}).variable,\n", + " )" + ] + }, + { + "cell_type": "raw", + "id": "7", + "metadata": {}, + "source": [ + "from healpix_convolution.kernels.xarray.gaussian import gaussian_kernel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "kernel = gaussian_kernel(ds[\"cell_ids\"], sigma=0.0015, truncate=3)\n", + "kernel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "def convolve(ds, kernel):\n", + " if ds.chunksizes:\n", + " kernel = kernel.chunk()\n", + "\n", + " def _convolve(arr, weights):\n", + " src_dims = [\"input_cells\"]\n", + "\n", + " if not set(src_dims).issubset(arr.dims):\n", + " return arr\n", + "\n", + " return xr.dot(\n", + " # drop all input coords, as those would most likely be broadcast\n", + " arr.variable,\n", + " weights,\n", + " # This dimension will be \"contracted\"\n", + " # or summed over after multiplying by the weights\n", + " dims=src_dims,\n", + " )\n", + "\n", + " input_dims = [\"cells\"]\n", + "\n", + " unchanged_coords = {\n", + " k: v for k, v in ds.coords.items() if not set(input_dims).intersection(v.dims)\n", + " }\n", + "\n", + " return (\n", + " ds.rename_dims({\"cells\": \"input_cells\"})\n", + " .map(_convolve, weights=kernel)\n", + " .rename_dims({\"output_cells\": \"cells\"})\n", + " .rename_vars({\"output_cell_ids\": \"cell_ids\"})\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "mask = ds.notnull()\n", + "convolved = convolve(ds.fillna(0).compute(), kernel).where(mask)\n", + "convolved" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "computed = convolved.compute()\n", + "computed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "convolved_ = computed.pipe(xdggs.decode).dggs.assign_latlon_coords()\n", + "convolved_" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "## visualization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import cartopy.crs as ccrs\n", + "import cartopy.feature" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "import healpy as hp\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "c = ds.compute()\n", + "mask = c.avg_thetao.notnull()\n", + "smoothed = xr.DataArray(\n", + " hp.smoothing(c.avg_thetao.fillna(0).data, sigma=0.0015, nest=True),\n", + " dims=\"cells\",\n", + " coords=c.coords,\n", + ").where(mask)\n", + "smoothed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "17", + "metadata": {}, + "outputs": [], + "source": [ + "smoothed.count()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "convolved_.avg_thetao - smoothed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "from healpix_convolution.plotting import xr_plot_healpix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "fig, axes = plt.subplots(\n", + " nrows=3, ncols=1, figsize=(16, 16), subplot_kw={\"projection\": ccrs.Mollweide()}\n", + ")\n", + "mappable1 = xr_plot_healpix(\n", + " smoothed, ax=axes[0], cmap=\"plasma\", vmin=230, title=\"smoothing\"\n", + ")\n", + "# fig.colorbar(mappable1, orientation=\"horizontal\")\n", + "mappable2 = xr_plot_healpix(\n", + " convolved_.avg_thetao,\n", + " ax=axes[1],\n", + " cmap=\"plasma\",\n", + " vmin=230,\n", + " title=\"healpix-convolution\",\n", + ")\n", + "# fig.colorbar(mappable2, orientation=\"horizontal\")\n", + "mappable3 = xr_plot_healpix(\n", + " convolved_.avg_thetao - smoothed,\n", + " ax=axes[2],\n", + " cmap=\"RdBu_r\",\n", + " title=\"diff\",\n", + " vmin=-60,\n", + " vmax=60,\n", + ")\n", + "fig.colorbar(mappable3)\n", + "fig.savefig(\"comparison_smoothing.png\", format=\"png\", bbox_inches=\"tight\", dpi=300)" + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 96af1d99276a20412efed6afea36c4ca38ef528b Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Wed, 12 Jun 2024 15:31:22 +0200 Subject: [PATCH 3/4] experimentation notebook for the usage with pure `numpy` --- experimentation/convolution-numpy.ipynb | 164 ++++++++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 experimentation/convolution-numpy.ipynb diff --git a/experimentation/convolution-numpy.ipynb b/experimentation/convolution-numpy.ipynb new file mode 100644 index 0000000..cac0934 --- /dev/null +++ b/experimentation/convolution-numpy.ipynb @@ -0,0 +1,164 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import healpix_convolution as hc\n", + "from healpix_convolution.convolution import convolve\n", + "from healpix_convolution.plotting import plot_healpix" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "## load data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "\n", + "url = \"/home/jmagin/work/data/destine/average_surface_temperature.zarr\"\n", + "ds = xr.open_dataset(url, engine=\"zarr\", chunks={}).isel(oceanModelLayer=0)\n", + "ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "computed = ds.compute()\n", + "data = computed[\"avg_thetao\"].data\n", + "cell_ids = computed[\"cell_ids\"].data\n", + "resolution = computed[\"cell_ids\"].attrs[\"resolution\"]\n", + "indexing_scheme = computed[\"cell_ids\"].attrs[\"indexing_scheme\"]" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## compute the kernel" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "kernel = hc.kernels.gaussian_kernel(\n", + " cell_ids,\n", + " resolution=resolution,\n", + " indexing_scheme=indexing_scheme,\n", + " sigma=0.001,\n", + " truncate=3,\n", + ")\n", + "kernel" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "## convolve the data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "# `axis` does not work, the spatial dimension needs to be last\n", + "# use `numpy.moveaxis` if it is not already the last dimension\n", + "result = convolve(data, kernel)\n", + "result" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "## plot the result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import cartopy.crs as ccrs\n", + "\n", + "fig, axes = plt.subplots(\n", + " ncols=1, nrows=2, figsize=(16, 14), subplot_kw={\"projection\": ccrs.Mollweide()}\n", + ")\n", + "plot_healpix(\n", + " data,\n", + " cell_ids,\n", + " ax=axes[0],\n", + " resolution=resolution,\n", + " cmap=\"plasma\",\n", + " title=\"original SST\",\n", + ")\n", + "plot_healpix(\n", + " result,\n", + " cell_ids,\n", + " ax=axes[1],\n", + " resolution=resolution,\n", + " cmap=\"plasma\",\n", + " title=\"result of the convolution\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 885dd02f00658f0b7c694855c99ad202581d36ef Mon Sep 17 00:00:00 2001 From: Justus Magin Date: Fri, 14 Jun 2024 12:06:31 +0200 Subject: [PATCH 4/4] choose a different resolution / cell id to demonstrate neighbours --- experimentation/visualize-neighbours.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/experimentation/visualize-neighbours.ipynb b/experimentation/visualize-neighbours.ipynb index 17bac08..90c8d2f 100644 --- a/experimentation/visualize-neighbours.ipynb +++ b/experimentation/visualize-neighbours.ipynb @@ -140,8 +140,8 @@ "metadata": {}, "outputs": [], "source": [ - "resolution = 1\n", - "cell_ids = np.array([25], dtype=\"int64\")\n", + "resolution = 2\n", + "cell_ids = np.array([16], dtype=\"int64\")\n", "neighbours = healpix_convolution.neighbours(\n", " cell_ids, resolution=resolution, indexing_scheme=\"nested\", ring=2\n", ")\n",