Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy as np
import pytest
import xarray as xr
import zarr
from obstore.store import LocalStore
from xarray.core.variable import Variable

Expand Down Expand Up @@ -72,10 +73,14 @@ def local_registry():
return ObjectStoreRegistry({"file://": LocalStore()})


@pytest.fixture()
def zarr_store_scalar(tmpdir):
import zarr
@pytest.fixture(params=["int8", "uint8", "float32"])
def zarr_array_fill_value(request):
store = zarr.storage.MemoryStore()
return zarr.create_array(store=store, shape=(), dtype=request.param)


@pytest.fixture()
def zarr_store_scalar():
store = zarr.storage.MemoryStore()
zarr_store_scalar = zarr.create_array(store=store, shape=(), dtype="int8")
zarr_store_scalar[()] = 42
Expand Down
4 changes: 4 additions & 0 deletions docs/releases.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@

### Bug fixes

- `ZarrParser` no longer uses `ZARR_DEFAULT_FILL_VALUE` lookup to infer missing `fill_value`.
([#666](https://github.com/zarr-developers/VirtualiZarr/pull/812)).
By [Raphael Hagen](https://github.com/norlandrhagen).

### Documentation

### Internal changes
Expand Down
29 changes: 7 additions & 22 deletions virtualizarr/parsers/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pathlib import Path # noqa
from typing import TYPE_CHECKING, Any, Hashable

import numpy as np
import zarr
from zarr.api.asynchronous import open_group as open_group_async
from zarr.core.metadata import ArrayV3Metadata
from zarr.storage import ObjectStore
Expand All @@ -20,23 +20,13 @@
from virtualizarr.registry import ObjectStoreRegistry
from virtualizarr.vendor.zarr.core.common import _concurrent_map

FillValueT = bool | str | float | int | list | None

ZARR_DEFAULT_FILL_VALUE: dict[str, FillValueT] = {
# numpy dtypes's hierarchy lets us avoid checking for all the widths
# https://numpy.org/doc/stable/reference/arrays.scalars.html
np.dtype("bool").kind: False,
np.dtype("int").kind: 0,
np.dtype("float").kind: 0.0,
np.dtype("complex").kind: [0.0, 0.0],
np.dtype("datetime64").kind: 0,
}

if TYPE_CHECKING:
import zarr

ZarrArrayType = zarr.AsyncArray | zarr.Array


async def get_chunk_mapping_prefix(zarr_array: zarr.AsyncArray, path: str) -> dict:
async def get_chunk_mapping_prefix(zarr_array: ZarrArrayType, path: str) -> dict:
"""Create a dictionary to pass into ChunkManifest __init__"""

# TODO: For when we want to support reading V2 we should parse the /c/ and "/" between chunks
Expand Down Expand Up @@ -70,26 +60,21 @@ async def get_chunk_mapping_prefix(zarr_array: zarr.AsyncArray, path: str) -> di
}


async def build_chunk_manifest(zarr_array: zarr.AsyncArray, path: str) -> ChunkManifest:
async def build_chunk_manifest(zarr_array: ZarrArrayType, path: str) -> ChunkManifest:
"""Build a ChunkManifest from a dictionary"""
chunk_map = await get_chunk_mapping_prefix(zarr_array=zarr_array, path=path)
return ChunkManifest(chunk_map)


def get_metadata(zarr_array: zarr.AsyncArray[Any]) -> ArrayV3Metadata:
fill_value = zarr_array.metadata.fill_value
if fill_value is not None:
fill_value = ZARR_DEFAULT_FILL_VALUE[zarr_array.metadata.fill_value.dtype.kind]

def get_metadata(zarr_array: ZarrArrayType) -> ArrayV3Metadata:
zarr_format = zarr_array.metadata.zarr_format

if zarr_format == 2:
# TODO: Once we want to support V2, we will have to deconstruct the
# zarr_array codecs etc. and reconstruct them with create_v3_array_metadata
raise NotImplementedError("Reading Zarr V2 currently not supported.")

elif zarr_format == 3:
return zarr_array.metadata
return zarr_array.metadata # type: ignore[return-value]

else:
raise NotImplementedError("Zarr format is not recognized as v2 or v3.")
Expand Down
13 changes: 11 additions & 2 deletions virtualizarr/tests/test_parsers/test_zarr.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import numpy as np
import pytest
import zarr
from obstore.store import LocalStore

from virtualizarr import open_virtual_dataset
from virtualizarr.manifests import ManifestArray
from virtualizarr.parsers import ZarrParser
from virtualizarr.parsers.zarr import get_chunk_mapping_prefix
from virtualizarr.parsers.zarr import get_chunk_mapping_prefix, get_metadata
from virtualizarr.registry import ObjectStoreRegistry

ZarrArrayType = zarr.AsyncArray | zarr.Array


@pytest.mark.parametrize(
"zarr_store",
Expand Down Expand Up @@ -105,7 +108,7 @@ def test_virtual_dataset_zarr_attrs(self, zarr_store):
assert expected == actual


def test_scalar_get_chunk_mapping_prefix(zarr_store_scalar):
def test_scalar_get_chunk_mapping_prefix(zarr_store_scalar: ZarrArrayType):
# Use a scalar zarr store with a /c/ representing the scalar:
# https://zarr-specs.readthedocs.io/en/latest/v3/chunk-key-encodings/default/index.html#description

Expand All @@ -118,3 +121,9 @@ def test_scalar_get_chunk_mapping_prefix(zarr_store_scalar):
)
assert chunk_map["c"]["offset"] == 0
assert chunk_map["c"]["length"] == 10


def test_get_metadata(zarr_array_fill_value: ZarrArrayType):
# Check that the `get_metadata` function is assigning fill_values
zarr_array_metadata = get_metadata(zarr_array=zarr_array_fill_value)
assert zarr_array_metadata.fill_value == zarr_array_fill_value.metadata.fill_value