Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion tests/test_components/autograd/test_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
MINIMUM_SPACING_FRACTION,
)
from tidy3d.components.autograd.derivative_utils import DerivativeInfo
from tidy3d.components.autograd.field_map import FieldMap
from tidy3d.components.autograd.utils import is_tidy_box
from tidy3d.components.data.data_array import DataArray
from tidy3d.exceptions import AdjointError
from tidy3d.plugins.polyslab import ComplexPolySlab
from tidy3d.web import run, run_async
from tidy3d.web.api.autograd.utils import FieldMap

from ...utils import SIM_FULL, AssertLogLevel, run_emulated, tracer_arr

Expand Down Expand Up @@ -1174,6 +1174,21 @@ def objective(*params):
ag.grad(objective)(params0)


def test_sim_hash_changes_with_traced_keys():
"""Ensure the model hash accounts for autograd traced paths."""

sim_traced = SIM_FULL.copy()
original_field_map = sim_traced._strip_traced_fields()

structures = list(sim_traced.structures)
structures[0] = structures[0].to_static()
sim_modified = sim_traced.updated_copy(structures=tuple(structures))

modified_field_map = sim_modified._strip_traced_fields()
assert original_field_map != modified_field_map
assert sim_traced._hash_self() != sim_modified._hash_self()


def test_sim_traced_override_structures():
"""Make sure that sims with traced override structures are handled properly."""

Expand Down
76 changes: 76 additions & 0 deletions tidy3d/components/autograd/field_map.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
"""Typed containers for autograd traced field metadata."""

from __future__ import annotations

import json
from typing import Any, Callable

import pydantic.v1 as pydantic

from tidy3d.components.autograd.types import AutogradFieldMap, dict_ag
from tidy3d.components.base import Tidy3dBaseModel
from tidy3d.components.types import ArrayLike, tidycomplex


class Tracer(Tidy3dBaseModel):
"""Representation of a single traced element within a model."""

path: tuple[Any, ...] = pydantic.Field(
...,
title="Path to the traced object in the model dictionary.",
)
data: float | tidycomplex | ArrayLike = pydantic.Field(..., title="Tracing data")


class FieldMap(Tidy3dBaseModel):
"""Collection of traced elements."""

tracers: tuple[Tracer, ...] = pydantic.Field(
...,
title="Collection of Tracers.",
)

@property
def to_autograd_field_map(self) -> AutogradFieldMap:
"""Convert to ``AutogradFieldMap`` autograd dictionary."""
return dict_ag({tracer.path: tracer.data for tracer in self.tracers})

@classmethod
def from_autograd_field_map(cls, autograd_field_map: AutogradFieldMap) -> FieldMap:
"""Initialize from an ``AutogradFieldMap`` autograd dictionary."""
tracers = []
for path, data in autograd_field_map.items():
tracers.append(Tracer(path=path, data=data))
return cls(tracers=tuple(tracers))


def _encoded_path(path: tuple[Any, ...]) -> str:
"""Return a stable JSON representation for a traced path."""
return json.dumps(list(path), separators=(",", ":"), ensure_ascii=True)


class TracerKeys(Tidy3dBaseModel):
"""Collection of traced field paths."""

keys: tuple[tuple[Any, ...], ...] = pydantic.Field(
...,
title="Collection of tracer keys.",
)

def encoded_keys(self) -> list[str]:
"""Return the JSON-encoded representation of keys."""
return [_encoded_path(path) for path in self.keys]

@classmethod
def from_field_mapping(
cls,
field_mapping: AutogradFieldMap,
*,
sort_key: Callable[[tuple[Any, ...]], str] | None = None,
) -> TracerKeys:
"""Construct keys from an autograd field mapping."""
if sort_key is None:
sort_key = _encoded_path

sorted_paths = tuple(sorted(field_mapping.keys(), key=sort_key))
return cls(keys=sorted_paths)
17 changes: 17 additions & 0 deletions tidy3d/components/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
# If json string is larger than ``MAX_STRING_LENGTH``, split the string when storing in hdf5
MAX_STRING_LENGTH = 1_000_000_000
FORBID_SPECIAL_CHARACTERS = ["/"]
TRACED_FIELD_KEYS_ATTR = "__tidy3d_traced_field_keys__"


def cache(prop):
Expand Down Expand Up @@ -767,6 +768,9 @@ def add_data_to_file(data_dict: dict, group_path: str = "") -> None:
add_data_to_file(data_dict=value, group_path=subpath)

add_data_to_file(data_dict=self.dict())
traced_keys_payload = self._serialized_traced_field_keys()
if traced_keys_payload:
f_handle.attrs[TRACED_FIELD_KEYS_ATTR] = traced_keys_payload
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wondering if it makes a bit more sense to use a new dataset (like we do for the simulation json) rather than the attrs directly. Feels like a bit of an abuse of even the hdf5 attrs purpose.

For the simulation json, I remember Tyler first tried putting it in attrs too, but it can be a problem when it becomes large. Here it's unlikely that the json will be very big, but still potentially better to just make a dataset?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would actually argue that this here is metadata and exactly the kind of thing that should go in hdf5 attrs 😄 - it's a short list of paths, it never grows beyond a couple of bytes, and never needs random access, and hdf5 attrs are intended for exactly that style of short descriptive information. I think it's different from the simulation json, because that really isn't metadata, and also as you said potentially quite large.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, yeah, fine by me.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm, but I also feel uneasy about this. what is the disadvantage to just making a dataset? I feel it makes things more explicit that this is some data that we care about. It's not purely metadata.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's a fair point, maybe the line between metadata and model data is a bit fuzzy here. i lean toward calling it metadata, because it's a deterministic byproduct of the actual model and the user inputs. if the simulation is rebuilt from scratch, then the same traced paths will be "rediscovered", so it's closer to a property rather than part of the model.
including it as a dataset also might require some additional plumbing for the hdf5 reader/writer (haven't checked), attrs doesnt show up in the json reconstruction. that being said, the difference is mostly semantic 🤷

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok I asked chat gpt and it agrees with yannick :D. so I'm fine with current implementation

Short answer:
Use HDF5 attributes if the new info is small (a few strings/ints/bools), describes the run, and you’ll read it whole.
Use a dataset only if it’s large, tabular/array-like, needs chunking/compression, or will be queried/sliced.
This info is metadata (provenance/config that affects processing, not the simulation arrays themselves).
Why attrs fit here
Attributes are meant for descriptive/run-level info (schema versions, flags, tool versions).
Cheap to read, no chunking needed, live with the object they describe (usually the root group).
Keep them small; if it grows or becomes structured, promote to a dataset or a small /cache_key group.
Cache/hash tips (to avoid fragile keys)
Scope the key: hash a defined subset only (inputs + this new flag/signature + relevant versions), not the whole file.
Canonicalization: store under /cache_key (either as attributes or tiny datasets). When building the hash, read keys, sort by name, encode deterministically (e.g., canonical JSON with sorted keys / UTF-8, or CBOR), and hash.
Versioning: include cache_key_version and producer/tool version fields. Bump when semantics change.
Defaults for back-compat: treat “missing field” as an explicit default (e.g., "backend_signature":"v0"), so old files don’t slip through incorrectly.
Auditability: also store the computed cache_key back into the file (attr) so you can explain cache hits later.
Rule of thumb
Attrs: small, descriptive, per-run constants → ✅
Dataset: big/structured, per-step/per-record data or anything you might slice/ compress → ✅
Given your description, I’d put the new field(s) as root attributes (or attributes under /cache_key) and include them in the deterministic hash. Yes—consider it metadata.


@classmethod
def dict_from_hdf5_gz(
Expand Down Expand Up @@ -1054,6 +1058,19 @@ def insert_value(x, path: tuple[str, ...], sub_dict: dict):

return self.parse_obj(self_dict)

def _serialized_traced_field_keys(self) -> Optional[str]:
"""Return a serialized, order-independent representation of traced field paths."""

field_mapping = self._strip_traced_fields()
if not field_mapping:
return None

# TODO: remove this deferred import once TracerKeys is decoupled from Tidy3dBaseModel.
from tidy3d.components.autograd.field_map import TracerKeys

tracer_keys = TracerKeys.from_field_mapping(field_mapping)
return tracer_keys.json(separators=(",", ":"), ensure_ascii=True)

def to_static(self) -> Tidy3dBaseModel:
"""Version of object with all autograd-traced fields removed."""

Expand Down
2 changes: 1 addition & 1 deletion tidy3d/web/api/autograd/io_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import tempfile

import tidy3d as td
from tidy3d.components.autograd.field_map import FieldMap, TracerKeys
from tidy3d.web.core.s3utils import download_file, upload_file # type: ignore

from .constants import SIM_FIELDS_KEYS_FILE, SIM_VJP_FILE
from .utils import FieldMap, TracerKeys


def upload_sim_fields_keys(sim_fields_keys: list[tuple], task_id: str, verbose: bool = False):
Expand Down
47 changes: 0 additions & 47 deletions tidy3d/web/api/autograd/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,8 @@
import typing

import numpy as np
import pydantic as pd

import tidy3d as td
from tidy3d.components.autograd.types import AutogradFieldMap, dict_ag
from tidy3d.components.base import Tidy3dBaseModel
from tidy3d.components.types import ArrayLike, tidycomplex

""" E and D field gradient map calculation helpers. """

Expand Down Expand Up @@ -79,46 +75,3 @@ def get_field_key(dim: str, fld_data: typing.Union[td.FieldData, td.Permittivity
mult = cmp_1 * cmp_2
field_components[key_1] = mult
return fld_1.updated_copy(**field_components)


class Tracer(Tidy3dBaseModel):
"""Class to store a single traced field."""

path: tuple[typing.Any, ...] = pd.Field(
...,
title="Path to the traced object in the model dictionary.",
)

data: typing.Union[float, tidycomplex, ArrayLike] = pd.Field(..., title="Tracing data")


class FieldMap(Tidy3dBaseModel):
"""Class to store a collection of traced fields."""

tracers: tuple[Tracer, ...] = pd.Field(
...,
title="Collection of Tracers.",
)

@property
def to_autograd_field_map(self) -> AutogradFieldMap:
"""Convert to ``AutogradFieldMap`` autograd dictionary."""
return dict_ag({tracer.path: tracer.data for tracer in self.tracers})

@classmethod
def from_autograd_field_map(cls, autograd_field_map) -> FieldMap:
"""Initialize from an ``AutogradFieldMap`` autograd dictionary."""
tracers = []
for path, data in autograd_field_map.items():
tracers.append(Tracer(path=path, data=data))

return cls(tracers=tuple(tracers))


class TracerKeys(Tidy3dBaseModel):
"""Class to store a collection of tracer keys."""

keys: tuple[tuple[typing.Any, ...], ...] = pd.Field(
...,
title="Collection of tracer keys.",
)