From 1a38f0b4a2b27b0950109776bdd17208151f08e4 Mon Sep 17 00:00:00 2001 From: Thomas Robitaille Date: Tue, 8 Apr 2025 14:32:36 +0100 Subject: [PATCH] Save output as artifacts --- .github/workflows/main.yml | 1 + conftest.py | 7 ++ jupyter_output_monitor/_monitor.py | 80 +++++++++++++------- jupyter_output_monitor/_utils.py | 27 ++++++- jupyter_output_monitor/conftest.py | 16 ++++ jupyter_output_monitor/tests/test_monitor.py | 34 +++++++-- pyproject.toml | 1 + tox.ini | 2 +- 8 files changed, 133 insertions(+), 35 deletions(-) create mode 100644 conftest.py create mode 100644 jupyter_output_monitor/conftest.py diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 38971cc..34f07c9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,6 +9,7 @@ jobs: test: uses: OpenAstronomy/github-actions-workflows/.github/workflows/tox.yml@v1 with: + artifact-path: output-* envs: | - linux: py310-test - linux: py311-test diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..d66d52f --- /dev/null +++ b/conftest.py @@ -0,0 +1,7 @@ +def pytest_addoption(parser): + parser.addoption( + "--output-path", + action="store", + default=None, + help="Output directory to use for tests", + ) diff --git a/jupyter_output_monitor/_monitor.py b/jupyter_output_monitor/_monitor.py index 426c345..7e2d9c8 100644 --- a/jupyter_output_monitor/_monitor.py +++ b/jupyter_output_monitor/_monitor.py @@ -13,7 +13,7 @@ from playwright.sync_api import sync_playwright from ._server import jupyter_server -from ._utils import clear_notebook, isotime +from ._utils import clear_notebook, isotime, max_uint8_difference __all__ = ["monitor", "monitor_group"] @@ -50,8 +50,16 @@ def monitor_group(): default=10, help="Time in s to wait after executing each cell", ) +@click.option( + "--atol", + default=0, + help=( + "If an output image for a cell exists, a new image will only be written " + "out if the maximum uint8 difference between the two exceeds atol" + ), +) @click.option("--headless", is_flag=True, help="Whether to run in headless mode") -def monitor(notebook, url, output, wait_after_execute, headless): +def monitor(notebook, url, output, wait_after_execute, atol, headless): if output is None: output = f"output-{iso_to_path(isotime())}" @@ -73,12 +81,12 @@ def monitor(notebook, url, output, wait_after_execute, headless): clear_notebook(notebook, os.path.join(notebook_dir, "notebook.ipynb")) with jupyter_server(notebook_dir) as server: url = server.base_url + "/lab/tree/notebook.ipynb" - _monitor_output(url, output, wait_after_execute, headless) + _monitor_output(url, output, wait_after_execute, atol, headless) else: - _monitor_output(url, output, wait_after_execute, headless) + _monitor_output(url, output, wait_after_execute, atol, headless) -def _monitor_output(url, output, wait_after_execute, headless): +def _monitor_output(url, output, wait_after_execute, atol, headless): # Index of the current last screenshot, by output index last_screenshot = {} @@ -129,13 +137,15 @@ def _monitor_output(url, output, wait_after_execute, headless): # Check if server is asking us to select a kernel dialogs = list(page.query_selector_all(".jp-Dialog-header")) for dialog in dialogs: - if 'Select Kernel' in dialog.inner_text(): + if "Select Kernel" in dialog.inner_text(): print("Server is asking to select a kernel, accepting default") accept = list(page.query_selector_all(".jp-mod-accept")) if len(accept) == 1: accept[0].click() else: - print("Error: multiple accept buttons found, not sure which to click") + print( + "Error: multiple accept buttons found, not sure which to click", + ) sys.exit(1) last_screenshot = {} @@ -222,25 +232,43 @@ def _monitor_output(url, output, wait_after_execute, headless): ): print(" -> change detected!") - timestamp = isotime() - - screenshot_filename = os.path.join( - output, - f"output-{output_index:03d}-{iso_to_path(timestamp)}.png", - ) - image = Image.open(BytesIO(screenshot_bytes)) - image.save(screenshot_filename) - - log.write( - f"{timestamp},output-changed,{output_index},{screenshot_filename}\n", - ) - log.flush() - - print( - f"Saving screenshot of output {output_index} at {timestamp}", - ) - - last_screenshot[output_index] = screenshot_bytes + if output_index in last_screenshot: + max_diff = max_uint8_difference( + last_screenshot[output_index], + screenshot_bytes, + ) + else: + max_diff = 256 + + if max_diff >= atol: + print( + f" -> maximum difference ({max_diff}) exceeds atol ({atol}), writing out image", + ) + + timestamp = isotime() + + screenshot_filename = os.path.join( + output, + f"output-{output_index:03d}-{iso_to_path(timestamp)}.png", + ) + image = Image.open(BytesIO(screenshot_bytes)) + image.save(screenshot_filename) + + log.write( + f"{timestamp},output-changed,{output_index},{screenshot_filename}\n", + ) + log.flush() + + print( + f"Saving screenshot of output {output_index} at {timestamp}", + ) + + last_screenshot[output_index] = screenshot_bytes + + else: + print( + f" -> maximum difference ({max_diff}) not does exceed atol ({atol}), skipping", + ) print("Stopping monitoring output and moving on to next input cell") diff --git a/jupyter_output_monitor/_utils.py b/jupyter_output_monitor/_utils.py index bc2888a..e8c7e54 100644 --- a/jupyter_output_monitor/_utils.py +++ b/jupyter_output_monitor/_utils.py @@ -1,10 +1,13 @@ import datetime +import io import socket +import numpy as np from nbconvert import NotebookExporter +from PIL import Image from traitlets.config import Config -__all__ = ["get_free_port", "clear_notebook", "isotime"] +__all__ = ["get_free_port", "clear_notebook", "isotime", "max_uint8_difference"] def get_free_port(): @@ -31,3 +34,25 @@ def clear_notebook(input_notebook, output_notebook): def isotime(): return datetime.datetime.now().isoformat() + + +def max_uint8_difference(image1_bytes, image2_bytes): + # Load images from bytes + image1 = Image.open(io.BytesIO(image1_bytes)).convert("RGB") + image2 = Image.open(io.BytesIO(image2_bytes)).convert("RGB") + + # Convert images to numpy arrays + array1 = np.array(image1, dtype=np.uint8) + array2 = np.array(image2, dtype=np.uint8) + + # Ensure both images have the same dimensions + if array1.shape != array2.shape: + return 256 + + # Calculate the absolute difference + diff = np.abs(array1.astype(np.int16) - array2.astype(np.int16)) + + # Find the maximum difference + max_diff = np.max(diff) + + return max_diff diff --git a/jupyter_output_monitor/conftest.py b/jupyter_output_monitor/conftest.py new file mode 100644 index 0000000..735fdf8 --- /dev/null +++ b/jupyter_output_monitor/conftest.py @@ -0,0 +1,16 @@ +import pathlib +import tempfile + +import pytest + + +@pytest.fixture() +def output_path(request): + path_option = request.config.getoption("--output-path") + if path_option: + yield pathlib.Path(path_option) + else: + # Create a temporary directory if no path is specified + temp_dir = tempfile.TemporaryDirectory() + yield pathlib.Path(temp_dir.name) + temp_dir.cleanup() diff --git a/jupyter_output_monitor/tests/test_monitor.py b/jupyter_output_monitor/tests/test_monitor.py index 70f25b0..ed3d3c4 100644 --- a/jupyter_output_monitor/tests/test_monitor.py +++ b/jupyter_output_monitor/tests/test_monitor.py @@ -3,11 +3,18 @@ import sys from pathlib import Path +import pytest + DATA = Path(__file__).parent / "data" -def test_simple(tmp_path): - output_path = tmp_path / "output" +@pytest.mark.parametrize("threshold", [None, 2]) +def test_simple(output_path, threshold): + if threshold: + output_path = output_path / "simple_threshold" + else: + output_path = output_path / "simple" + extra = [] if threshold is None else ["--atol", str(threshold)] subprocess.run( [ sys.executable, @@ -19,6 +26,7 @@ def test_simple(tmp_path): "--output", str(output_path), "--headless", + *extra, ], check=True, ) @@ -29,18 +37,30 @@ def test_simple(tmp_path): assert len(list(output_path.glob("input-*.png"))) == 5 # Output screenshots - assert len(list(output_path.glob("output-*.png"))) == 4 + if threshold: + assert len(list(output_path.glob("output-*.png"))) in (4, 5) + else: + assert len(list(output_path.glob("output-*.png"))) >= 4 - # Specifically for cell with index 33 - assert len(list(output_path.glob("output-003-*.png"))) == 1 + # Specifically for cell with index 3 + if threshold: + assert len(list(output_path.glob("output-003-*.png"))) == 1 + else: + assert len(list(output_path.glob("output-003-*.png"))) >= 1 # Specifically for cell with index 33 - assert len(list(output_path.glob("output-033-*.png"))) == 3 + if threshold: + assert len(list(output_path.glob("output-033-*.png"))) in (3, 4) + else: + assert len(list(output_path.glob("output-033-*.png"))) >= 3 # Check that event log exists and is parsable with open(output_path / "event_log.csv") as f: reader = csv.reader(f, delimiter=",") - assert len(list(reader)) == 10 + if threshold: + assert len(list(reader)) in (10, 11) + else: + assert len(list(reader)) >= 10 subprocess.run( [ diff --git a/pyproject.toml b/pyproject.toml index 5d91608..e681f6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ lint.ignore = [ "D103", "D104", "C901", + "PLR0913", "PLR0915", "PLR2004", "DTZ", diff --git a/tox.ini b/tox.ini index f17fa68..8b797ce 100644 --- a/tox.ini +++ b/tox.ini @@ -11,4 +11,4 @@ extras = commands = pip freeze playwright install chromium - pytest --pyargs jupyter_output_monitor {posargs} + pytest --pyargs jupyter_output_monitor {posargs} --output-path {toxinidir}/output-{envname}