Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
aa7cdc6
draft workflow RDF
FynnBe Oct 26, 2022
7924b2f
update passthrough module generation
FynnBe Oct 26, 2022
825cad3
Tensor -> Arg; ArgType
FynnBe Oct 26, 2022
891093f
test examples
FynnBe Oct 26, 2022
8f06f9e
add schema validation
FynnBe Oct 26, 2022
6889bf8
add test_workflow_rdf.py
FynnBe Oct 26, 2022
9f0eed2
fix missing workflow import
FynnBe Oct 26, 2022
8639d8d
update generate_rdf_docs.py and generate_json_specs.py
FynnBe Oct 26, 2022
1476670
fix typing import
FynnBe Oct 26, 2022
fe9243e
test_steps and better workflow kwargs
FynnBe Oct 27, 2022
7afc2c6
Merge branch 'main' into workflow_rdf
FynnBe Oct 27, 2022
5645d4c
Update example_specs/workflows/hpa/single_cell_classification.yaml
FynnBe Oct 28, 2022
218b67a
wip discussion with constantin
FynnBe Oct 28, 2022
94d1292
wip2
FynnBe Oct 28, 2022
5831b04
Merge branch 'main' into workflow_rdf
FynnBe Oct 31, 2022
428d605
axes and options
FynnBe Oct 31, 2022
491e7b4
Merge branch 'main' into workflow_rdf
FynnBe Nov 3, 2022
6dacb68
update workflow RDF schema and raw_nodes
FynnBe Nov 3, 2022
9768848
finish first draft of workflow RDF spec
FynnBe Nov 3, 2022
e3d963e
inputs/options/outputs -> *_spec
FynnBe Nov 4, 2022
cd4bd4c
enforce unique step ids
FynnBe Nov 4, 2022
d894da9
detect type workflow
FynnBe Nov 4, 2022
7ace197
don't accept emtpy strings
FynnBe Nov 4, 2022
f5af22f
also log binarized
FynnBe Nov 4, 2022
96376e5
Merge branch 'main' into workflow_rdf
FynnBe Nov 8, 2022
9fe4ca2
wip remove wf steps
FynnBe Nov 24, 2022
31ecba9
rename importable sources
FynnBe Nov 24, 2022
ea1b826
black
FynnBe Nov 24, 2022
283da9a
update changelog
FynnBe Nov 24, 2022
73b31b9
remove steps from workflow spec
FynnBe Nov 24, 2022
9c4a81d
split up CallableSource field
FynnBe Nov 24, 2022
91e6783
set format_version as default
FynnBe Nov 24, 2022
8bdb9c9
prohibit serializing a list from a string
FynnBe Nov 24, 2022
a89bf07
remove specialized axes classes
FynnBe Nov 24, 2022
5670cbc
remove redundant brackets
FynnBe Nov 24, 2022
9b82e90
update workflow tests
FynnBe Nov 25, 2022
0243665
rename DEFAULT_TYPE_NAME_MAP
FynnBe Nov 28, 2022
91d141f
rename ArbitraryAxes to UnknownAxes
FynnBe Nov 28, 2022
a3d97c8
make nested_errors optional
FynnBe Nov 30, 2022
b7b51a9
assert for mypy
FynnBe Dec 6, 2022
eb4e3f8
some aliases for backward compatibility
FynnBe Dec 6, 2022
1102f6a
add AXIS_LETTER_TO_NAME and AXIS_NAME_TO_LETTER
FynnBe Dec 8, 2022
6780a70
Merge branch 'main' into workflow_rdf
FynnBe Feb 1, 2023
924d667
Merge branch 'main' into workflow_rdf
FynnBe Feb 9, 2023
b66798a
update hello workflow example
FynnBe Feb 9, 2023
c90cdd4
Merge branch 'main' into workflow_rdf
FynnBe Mar 3, 2023
ff5cc6e
Merge branch 'main' into workflow_rdf
FynnBe Mar 15, 2023
052c553
remove +\n from CLI help
FynnBe Mar 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bioimageio/spec/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import collection, model, rdf, shared
from . import collection, model, rdf, shared, workflow
from .commands import update_format, update_rdf, validate
from .io_ import (
get_resource_package_content,
Expand Down
4 changes: 4 additions & 0 deletions bioimageio/spec/shared/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ def deserialize(self, value: typing.Any, attr: str = None, data: typing.Mapping[
return value


class Boolean(DocumentedField, marshmallow_fields.Boolean):
pass


class DateTime(DocumentedField, marshmallow_fields.DateTime):
"""
Parses datetime in ISO8601 or if value already has datetime.datetime type
Expand Down
14 changes: 14 additions & 0 deletions bioimageio/spec/workflow/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from . import v0_2

# autogen: start
from . import converters, raw_nodes, schema, utils
from .raw_nodes import FormatVersion

try:
from typing import get_args
except ImportError:
from typing_extensions import get_args # type: ignore

format_version = get_args(FormatVersion)[-1]

# autogen: stop
3 changes: 3 additions & 0 deletions bioimageio/spec/workflow/converters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Auto-generated by generate_passthrough_modules.py - do not modify

from .v0_2.converters import *
3 changes: 3 additions & 0 deletions bioimageio/spec/workflow/raw_nodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Auto-generated by generate_passthrough_modules.py - do not modify

from .v0_2.raw_nodes import *
3 changes: 3 additions & 0 deletions bioimageio/spec/workflow/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Auto-generated by generate_passthrough_modules.py - do not modify

from .v0_2.schema import *
3 changes: 3 additions & 0 deletions bioimageio/spec/workflow/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Auto-generated by generate_passthrough_modules.py - do not modify

from .v0_2.utils import *
9 changes: 9 additions & 0 deletions bioimageio/spec/workflow/v0_2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from . import converters, raw_nodes, schema, utils
from .raw_nodes import FormatVersion

try:
from typing import get_args
except ImportError:
from typing_extensions import get_args # type: ignore

format_version = get_args(FormatVersion)[-1]
3 changes: 3 additions & 0 deletions bioimageio/spec/workflow/v0_2/converters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from bioimageio.spec.rdf.v0_2.converters import maybe_convert as maybe_convert_rdf

maybe_convert = maybe_convert_rdf
64 changes: 64 additions & 0 deletions bioimageio/spec/workflow/v0_2/raw_nodes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
""" raw nodes for the dataset RDF spec

raw nodes are the deserialized equivalent to the content of any RDF.
serialization and deserialization are defined in schema:
RDF <--schema--> raw nodes
"""
import typing
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Union

from marshmallow import missing
from marshmallow.utils import _Missing

from bioimageio.spec.rdf.v0_2.raw_nodes import FormatVersion, RDF as _RDF, URI
from bioimageio.spec.shared.raw_nodes import RawNode

try:
from typing import Literal, get_args
except ImportError:
from typing_extensions import Literal, get_args # type: ignore

FormatVersion = FormatVersion
ArgType = Literal["tensor", "int", "float", "string", "boolean", "list", "dict", "any"]
DefaultType = Union[int, float, str, bool, list, dict, None]
TYPE_NAME_MAP = {int: "int", float: "float", str: "string", bool: "boolean", list: "list", dict: "dict", None: "null"}


@dataclass
class Arg(RawNode):
name: str = missing
type: ArgType = missing
default: Union[_Missing, DefaultType] = missing
description: Union[_Missing, str] = missing


@dataclass
class WorkflowKwarg(RawNode):
name: str = missing
type: ArgType = missing
default: DefaultType = missing
description: Union[_Missing, str] = missing


@dataclass
class Step(RawNode):
id: Union[_Missing, str] = missing
op: str = missing
inputs: Union[_Missing, List[str]] = missing
outputs: Union[_Missing, List[str]] = missing
kwargs: Union[_Missing, Dict[str, Any]] = missing


@dataclass
class Workflow(_RDF):
type: Literal["workflow"] = missing

inputs: List[Arg] = missing
outputs: List[Arg] = missing

steps: List[Step] = missing
test_steps: List[Step] = missing

kwargs: Union[_Missing, List[WorkflowKwarg]] = missing
228 changes: 228 additions & 0 deletions bioimageio/spec/workflow/v0_2/schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
import typing

from marshmallow import ValidationError, missing, validates, validates_schema

from bioimageio.spec.rdf.v0_2.schema import RDF
from bioimageio.spec.shared import field_validators, fields
from bioimageio.spec.shared.schema import SharedBioImageIOSchema
from . import raw_nodes

try:
from typing import get_args
except ImportError:
from typing_extensions import get_args # type: ignore


class _BioImageIOSchema(SharedBioImageIOSchema):
raw_nodes = raw_nodes


class Arg(_BioImageIOSchema):
name = fields.String(
required=True,
bioimageio_description="Argument/tensor name. No duplicates are allowed.",
)
type = fields.String(
required=True,
validate=field_validators.OneOf(get_args(raw_nodes.ArgType)),
bioimageio_description=f"Argument type. One of: {get_args(raw_nodes.ArgType)}",
)
default = fields.Raw(
required=False,
bioimageio_description="Default value compatible with type given by `type` field.",
allow_none=True,
)

@validates_schema
def default_has_compatible_type(self, data, **kwargs):
if data.get("default") is None:
return

arg_type_name = data.get("type")
if arg_type_name == "any":
return

default_type = type(data["default"])
type_name = raw_nodes.TYPE_NAME_MAP[default_type]
if type_name != arg_type_name:
raise ValidationError(
f"Default value of type {default_type} (type name: {type_name}) does not match type: {arg_type_name}"
)

description = fields.String(bioimageio_description="Description of argument/tensor.")


class WorkflowKwarg(_BioImageIOSchema):
name = fields.String(
required=True,
bioimageio_description="Key word argument name. No duplicates are allowed.",
)
type = fields.String(
required=True,
validate=field_validators.OneOf(get_args(raw_nodes.ArgType)),
bioimageio_description=f"Argument type. One of: {get_args(raw_nodes.ArgType)}",
)
default = fields.Raw(
required=True,
bioimageio_description="Default value compatible with type given by `type` field.",
allow_none=True,
)

@validates_schema
def default_has_compatible_type(self, data, **kwargs):
if data.get("default") is None:
return

arg_type_name = data.get("type")
if arg_type_name == "any":
return

default_type = type(data["default"])
type_name = raw_nodes.TYPE_NAME_MAP[default_type]
if type_name != arg_type_name:
raise ValidationError(
f"Default value of type {default_type} (type name: {type_name}) does not match type: {arg_type_name}"
)

description = fields.String(required=False, bioimageio_description="Description of key word argument.")


class Step(_BioImageIOSchema):
id = fields.String(
required=False,
validate=field_validators.Predicate("isidentifier"),
bioimageio_description="Step id for referencing the steps' kwargs or outputs.",
)
op = fields.String(
required=True,
validate=field_validators.Predicate("isidentifier"),
bioimageio_description="Name of operation. Must be implemented in bioimageio.core or bioimageio.contrib.",
)
inputs = fields.List(
fields.String(
bioimageio_description="named output of a previous step with the pattern '<step id>.outputs.<output name>'",
),
required=False,
)
outputs = fields.List(
fields.String(
validate=field_validators.Predicate("isidentifier"),
),
bioimageio_description="output names for this step",
required=False,
)
kwargs = fields.Kwargs(bioimageio_description="Key word arguments for op.")


class Workflow(_BioImageIOSchema, RDF):
bioimageio_description = f"""# BioImage.IO Workflow Resource Description File {get_args(raw_nodes.FormatVersion)[-1]}
This specification defines the fields used in a BioImage.IO-compliant resource description file (`RDF`) for describing workflows.
These fields are typically stored in a YAML file which we call Workflow Resource Description File or `workflow RDF`.

The workflow RDF YAML file contains mandatory and optional fields. In the following description, optional fields are indicated by _optional_.
_optional*_ with an asterisk indicates the field is optional depending on the value in another field.
"""
inputs = fields.List(
fields.Nested(Arg()),
validate=field_validators.Length(min=1),
required=True,
bioimageio_description="Describes the inputs expected by this workflow.",
)

@validates("inputs")
def no_duplicate_input_names(self, value: typing.List[raw_nodes.Arg]):
if not isinstance(value, list) or not all(isinstance(v, raw_nodes.Arg) for v in value):
raise ValidationError("Could not check for duplicate input names due to another validation error.")

names = [t.name for t in value]
if len(names) > len(set(names)):
raise ValidationError("Duplicate input names are not allowed.")

outputs = fields.List(
fields.Nested(Arg()),
validate=field_validators.Length(min=1),
bioimageio_description="Describes the outputs from this workflow.",
)

@validates("outputs")
def no_duplicate_output_names(self, value: typing.List[raw_nodes.Arg]):
if not isinstance(value, list) or not all(isinstance(v, raw_nodes.Arg) for v in value):
raise ValidationError("Could not check for duplicate output names due to another validation error.")

names = [t["name"] if isinstance(t, dict) else t.name for t in value]
if len(names) > len(set(names)):
raise ValidationError("Duplicate output names are not allowed.")

@validates_schema
def inputs_and_outputs(self, data, **kwargs):
ipts: typing.List[raw_nodes.Arg] = data.get("inputs")
outs: typing.List[raw_nodes.Arg] = data.get("outputs")
if any(
[
not isinstance(ipts, list),
not isinstance(outs, list),
not all(isinstance(v, raw_nodes.Arg) for v in ipts),
not all(isinstance(v, raw_nodes.Arg) for v in outs),
]
):
raise ValidationError("Could not check for duplicate names due to another validation error.")

# no duplicate names
names = [t.name for t in ipts + outs] # type: ignore
if len(names) > len(set(names)):
raise ValidationError("Duplicate names are not allowed.")

kwargs = fields.List(
fields.Nested(WorkflowKwarg()),
required=False,
bioimageio_description="Key word arguments for this workflow.",
)

steps = fields.List(
fields.Nested(Step()),
validate=field_validators.Length(min=1),
required=True,
bioimageio_description="Workflow steps to be executed consecutively.",
)

@validates_schema
def step_input_references_exist(self, data, **kwargs):
inputs = data.get("inputs")
if not inputs or not isinstance(inputs, list) or not all(isinstance(ipt, raw_nodes.Arg) for ipt in inputs):
raise ValidationError("Missing/invalid 'inputs'")
steps = data.get("steps")
if not steps or not isinstance(steps, list) or not isinstance(steps[0], raw_nodes.Step):
raise ValidationError("Missing/invalid 'steps'")

references = {f"inputs.{ipt.name}" for ipt in inputs}
for step in steps:
if step.inputs:
for si in step.inputs:
if si not in references:
raise ValidationError(f"Invalid step input reference '{si}'")

if step.outputs:
references.update({f"{step.id}.outputs.{out}" for out in step.outputs})

test_steps = fields.List(
fields.Nested(Step()),
validate=field_validators.Length(min=1),
required=True,
bioimageio_description="Test steps to be executed consecutively.",
)

@validates_schema
def test_step_input_references_exist(self, data, **kwargs):
steps = data.get("test_steps")
if not steps or not isinstance(steps, list) or not isinstance(steps[0], raw_nodes.Step):
raise ValidationError("Missing/invalid 'test_steps'")

references = set()
for step in steps:
if step.inputs:
for si in step.inputs:
if si not in references:
raise ValidationError(f"Invalid test step input reference '{si}'")

if step.outputs:
references.update({f"{step.id}.outputs.{out}" for out in step.outputs})
5 changes: 5 additions & 0 deletions bioimageio/spec/workflow/v0_2/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from . import raw_nodes


def filter_resource_description(raw_rd: raw_nodes.Workflow) -> raw_nodes.Workflow:
return raw_rd
Loading