diff --git a/asdf/_asdf.py b/asdf/_asdf.py index bd3646af9..bd88046c5 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -16,6 +16,7 @@ from . import constants, generic_io, lazy_nodes, reference, schema, treeutil, util, versioning, yamlutil from ._block.manager import Manager as BlockManager from ._helpers import validate_version +from .blocks import BlockViewer from .config import config_context, get_config from .exceptions import ( AsdfManifestURIMismatchWarning, @@ -140,6 +141,7 @@ def __init__( self._closed = False self._external_asdf_by_uri = {} self._blocks = BlockManager(uri=uri, lazy_load=lazy_load, memmap=memmap) + self._blocks_view = BlockViewer(self._blocks) if tree is None: # Bypassing the tree property here, to avoid validating # an empty tree. @@ -544,8 +546,6 @@ def open_external(self, uri, **kwargs): def tree(self): """ Get/set the tree of data in the ASDF file. - - When set, the tree will be validated against the ASDF schema. """ if self._closed: msg = "Cannot access data from closed ASDF file" @@ -556,6 +556,13 @@ def tree(self): def tree(self, tree): self._tree = AsdfObject(tree) + @property + def blocks(self): + """ + A `asdf.blocks.BlockViewer` with read-only access to ASDF blocks loaded from the ASDF file. + """ + return self._blocks_view + def keys(self): return self.tree.keys() @@ -1415,6 +1422,7 @@ def info( max_cols=display.DEFAULT_MAX_COLS, show_values=display.DEFAULT_SHOW_VALUES, refresh_extension_manager=NotSet, + show_blocks=True, ): """ Print a rendering of this file's tree to stdout. @@ -1438,6 +1446,11 @@ def info( show_values : bool, optional Set to False to disable display of primitive values in the rendered tree. + + show_blocks : bool, optional + Display block information after the tree. If max_rows + does not allow displaying the block information it will + not be shown. """ if refresh_extension_manager is not NotSet: warnings.warn("refresh_extension_manager is deprecated", DeprecationWarning) @@ -1451,6 +1464,13 @@ def info( refresh_extension_manager=refresh_extension_manager, extension_manager=self.extension_manager, ) + if show_blocks: + if isinstance(max_rows, tuple) and max_rows: + n = max_rows[0] + else: + n = max_rows + if n is None or len(lines) + len(self.blocks) <= n: + lines.extend(self.blocks._info()) print("\n".join(lines)) def search(self, key=NotSet, type_=NotSet, value=NotSet, filter_=None): diff --git a/asdf/_commands/info.py b/asdf/_commands/info.py index 91fef2697..e69db725c 100644 --- a/asdf/_commands/info.py +++ b/asdf/_commands/info.py @@ -6,7 +6,7 @@ from .main import Command -__all__ = ["info"] +__all__ = [] class Info(Command): @@ -29,6 +29,9 @@ def setup_arguments(cls, subparsers): parser.add_argument( "--max-cols", type=int, help="Maximum length of line. If not provided lines will have no length limit." ) + parser.add_argument( + "--hide-blocks", action="store_true", default=False, help="Skip printing information about the ASDF blocks" + ) parser.add_argument( "--show-values", @@ -45,9 +48,8 @@ def setup_arguments(cls, subparsers): @classmethod def run(cls, args): - info(args.filename, args.max_rows, args.max_cols, args.show_values) + info(args.filename, args.max_rows, args.max_cols, args.show_values, args.hide_blocks) -def info(filename, max_rows, max_cols, show_values): - with asdf.open(filename) as af: - af.info(max_rows, max_cols, show_values) +def info(filename, max_rows, max_cols, show_values, hide_blocks): + asdf.info(filename, max_rows=max_rows, max_cols=max_cols, show_values=show_values, show_blocks=not hide_blocks) diff --git a/asdf/_convenience.py b/asdf/_convenience.py index a147bb333..39e278f73 100644 --- a/asdf/_convenience.py +++ b/asdf/_convenience.py @@ -12,7 +12,13 @@ __all__ = ["info"] -def info(node_or_path, max_rows=DEFAULT_MAX_ROWS, max_cols=DEFAULT_MAX_COLS, show_values=DEFAULT_SHOW_VALUES): +def info( + node_or_path, + max_rows=DEFAULT_MAX_ROWS, + max_cols=DEFAULT_MAX_COLS, + show_values=DEFAULT_SHOW_VALUES, + show_blocks=True, +): """ Print a rendering of an ASDF tree or sub-tree to stdout. @@ -39,9 +45,14 @@ def info(node_or_path, max_rows=DEFAULT_MAX_ROWS, max_cols=DEFAULT_MAX_COLS, sho show_values : bool, optional Set to False to disable display of primitive values in the rendered tree. + + show_blocks : bool, optional + Display block information after the tree. If max_rows + does not allow displaying the block information it will + not be shown. """ with _manage_node(node_or_path) as node: - node.info(max_rows=max_rows, max_cols=max_cols, show_values=show_values) + node.info(max_rows=max_rows, max_cols=max_cols, show_values=show_values, show_blocks=show_blocks) @contextmanager diff --git a/asdf/_tests/_block/test_viewer.py b/asdf/_tests/_block/test_viewer.py new file mode 100644 index 000000000..a1be1998a --- /dev/null +++ b/asdf/_tests/_block/test_viewer.py @@ -0,0 +1,113 @@ +import numpy as np +import pytest + +import asdf +from asdf.constants import BLOCK_MAGIC + + +@pytest.fixture() +def asdf_file(tmp_path): + fn = tmp_path / "test.asdf" + tree = { + "array_0": np.arange(42), + "array_1": np.arange(720, dtype="f8"), + } + tree["view_0"] = tree["array_0"] + tree["view_1"] = tree["array_1"][:42] + tree["stream"] = asdf.Stream([1], "f4") + af = asdf.AsdfFile(tree) + af.set_array_compression(tree["array_1"], "bzp2") + af.write_to(fn, pad_blocks=0.1) + with asdf.open(fn) as af: + yield af + + +def test_count_blocks(asdf_file): + assert len(asdf_file.blocks) == 3 + + +def test_flags(asdf_file): + assert asdf_file.blocks[0].header["flags"] == 0 + assert asdf_file.blocks[1].header["flags"] == 0 + assert asdf_file.blocks[2].header["flags"] == 1 + + +def test_compression(asdf_file): + assert asdf_file.blocks[0].header["compression"] == b"\x00\x00\x00\x00" + assert asdf_file.blocks[1].header["compression"] == b"bzp2" + assert asdf_file.blocks[2].header["compression"] == b"\x00\x00\x00\x00" + + +def test_header_read_only(asdf_file): + with pytest.raises(TypeError, match="does not support item assignment"): + asdf_file.blocks[0].header["flags"] = 42 + + +@pytest.mark.parametrize("attr", ("offset", "data_offset", "loaded")) +def test_attr_read_only(asdf_file, attr): + # message varies by python version + with pytest.raises(AttributeError, match="(can't set attribute|object has no setter)"): + setattr(asdf_file.blocks[0], attr, 42) + + +def test_offset(asdf_file): + # test a relative offset to make this test not depend on a specific tree size. + relative_offset = asdf_file.blocks[1].offset - asdf_file.blocks[0].data_offset + assert asdf_file.blocks[0].header["allocated_size"] + len(BLOCK_MAGIC) == relative_offset + + +def test_loaded(tmp_path): + # can't use the asdf_file fixture here as the Stream + # causes all blocks to be loaded + fn = tmp_path / "test.asdf" + asdf.dump({"arrays": [np.zeros(3) for _ in range(3)]}, fn) + + with asdf.open(fn) as af: + assert not af.blocks[0].loaded + assert not af.blocks[1].loaded + assert not af.blocks[2].loaded + + # trigger loading of all blocks + assert np.sum([a.sum() for a in af["arrays"]]) == 0 + + assert af.blocks[0].loaded + assert af.blocks[1].loaded + assert af.blocks[2].loaded + + +def test_info(asdf_file, capsys): + asdf_file.blocks.info() + lines = capsys.readouterr().out.splitlines() + # use private API to confirm public + h = asdf_file._blocks.blocks[0].header + assert f"Block 0: {h['allocated_size']} bytes, {h['used_size']} used" in lines[0] + h = asdf_file._blocks.blocks[1].header + assert f"Block 1: {h['allocated_size']} bytes, {h['used_size']} used, bzp2 compression" in lines[1] + assert "Block 2: Stream" in lines[2] + + +@pytest.mark.parametrize("show_blocks", (True, False)) +@pytest.mark.parametrize( + "max_rows, blocks_expected", + ( + (None, True), + (10, False), + ((None, 10), True), + ((10, None), False), + ), +) +def test_info_limited(asdf_file, capsys, max_rows, blocks_expected, show_blocks): + asdf_file.info(max_rows=max_rows, show_blocks=show_blocks) + out = capsys.readouterr().out + if blocks_expected and show_blocks: + assert "Block 0" in out + else: + assert "Block 0" not in out + + +def test_info_many_blocks(tmp_path, capsys): + fn = tmp_path / "test.asdf" + asdf.dump({"arrays": [np.zeros(3) for _ in range(11)]}, fn) + asdf.info(fn, max_rows=None) + out = capsys.readouterr().out + assert "Block 0" in out diff --git a/asdf/_tests/commands/test_info.py b/asdf/_tests/commands/test_info.py index f7490dcc8..35c04b027 100644 --- a/asdf/_tests/commands/test_info.py +++ b/asdf/_tests/commands/test_info.py @@ -21,3 +21,18 @@ def test_info_command(capsys, test_data_path): assert "frames" in captured.out new_len = len(captured.out.split("\n")) assert new_len < original_len + + +@pytest.mark.parametrize("hide_blocks", (True, False)) +def test_hide_blocks(capsys, test_data_path, hide_blocks): + file_path = test_data_path / "ndarray0.asdf" + + args = ["info", str(file_path)] + if hide_blocks: + args.append("--hide-blocks") + assert main.main_from_args(args) == 0 + captured = capsys.readouterr() + if hide_blocks: + assert "Block 0:" not in captured.out + else: + assert "Block 0:" in captured.out diff --git a/asdf/blocks.py b/asdf/blocks.py new file mode 100644 index 000000000..282a6ac17 --- /dev/null +++ b/asdf/blocks.py @@ -0,0 +1,106 @@ +import math +import sys +from collections.abc import Sequence +from types import MappingProxyType + +from asdf.constants import BLOCK_FLAG_STREAMED + +__all__ = ["BlockView", "BlockViewer"] + + +class BlockView: + """ + A read-only view of an ASDF block. + """ + + def __init__(self, read_block): + self._read_block = read_block + + @property + def header(self): + """ + MappingProxy: A read-only mapping of ASDF block header contents. + """ + return MappingProxyType(self._read_block.header) + + @property + def offset(self): + """ + int: The offset (in bytes) of the ASDF block from the start of the file. + """ + return self._read_block.offset + + @property + def data_offset(self): + """ + int: The offset (in bytes) of the ASDF block data from the start of the file. + """ + return self._read_block.data_offset + + @property + def loaded(self): + """ + bool: True if the ASDF block data has been loaded (and cached). + """ + return self._read_block._cached_data is not None + + def load(self, out=None): + if out is not None: + raise NotImplementedError("Reading into an array is not yet supported") + return self._read_block.cached_data + + def _info(self): + header = self.header + if header["flags"] & BLOCK_FLAG_STREAMED: + return "Stream" + line = f"{header['allocated_size']} bytes" + if header["allocated_size"] != header["used_size"]: + line += f", {header['used_size']} used" + if header["compression"] != b"\0\0\0\0": + line += f", {header['compression'].decode('ascii')} compression" + return line + + +class BlockViewer(Sequence): + """ + A read-only sequence of `BlockView` objects. + """ + + def __init__(self, manager): + self._manager = manager + + def __len__(self): + return len(self._manager.blocks) + + def __getitem__(self, index): + return BlockView(self._manager.blocks[index]) + + def _info(self): + n = len(self) + if not n: + return [] + + # conditionally use tty bold formatting:w + if hasattr(sys.stdout, "isatty") and sys.stdout.isatty(): + + def bold(s): + return f"\x1b[1m{s}\x1b[0m" + + else: + + def bold(s): + return s + + index_string_length = int(math.log10(n)) + 1 + lines = [] + for i, block in enumerate(self): + index_string = str(i).rjust(index_string_length) + prefix = bold(f"█ Block {index_string}") + lines.append(f"{prefix}: {block._info()}") + return lines + + def info(self): + """ + Print a rendering of these blocks to stdout. + """ + print("\n".join(self._info())) diff --git a/docs/asdf/user_api/asdf_blocks.rst b/docs/asdf/user_api/asdf_blocks.rst new file mode 100644 index 000000000..7ae61f400 --- /dev/null +++ b/docs/asdf/user_api/asdf_blocks.rst @@ -0,0 +1,8 @@ +****************** +asdf.blocks Module +****************** + +.. currentmodule:: asdf + +.. automodapi:: asdf.blocks + :no-inheritance-diagram: diff --git a/docs/asdf/user_api/index.rst b/docs/asdf/user_api/index.rst index aa1263c8d..01b5c8ab3 100644 --- a/docs/asdf/user_api/index.rst +++ b/docs/asdf/user_api/index.rst @@ -9,9 +9,11 @@ User API :hidden: asdf_package.rst + asdf_blocks.rst asdf_search.rst asdf_config.rst * :doc:`asdf Package ` +* :doc:`asdf.blocks Module ` * :doc:`asdf.search Module ` * :doc:`asdf.config Module `