Skip to content

Commit b00c2bf

Browse files
committed
upload as image
Signed-off-by: Isabella do Amaral <[email protected]>
1 parent 91c65b6 commit b00c2bf

File tree

10 files changed

+144
-53
lines changed

10 files changed

+144
-53
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ test:
2525

2626
.PHONY: test-e2e
2727
test-e2e:
28-
poetry run pytest --e2e -s -x -rA
28+
poetry run pytest --e2e -s -x -rA -v
2929

3030
.PHONY: test-e2e-model-registry
3131
test-e2e-model-registry:

e2e/test_cli.sh

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22

3-
SCRIPT_DIR="$(dirname "$(realpath "$BASH_SOURCE")")"
43
set -e
54

65
echo "Preparing venv ..."
@@ -15,17 +14,19 @@ echo "Running E2E test for CLI ..."
1514
omlmd push localhost:5001/mmortari/mlartifact:v1 README.md --empty-metadata --plain-http
1615
omlmd push localhost:5001/mmortari/mlartifact:v1 README.md --metadata tests/data/md.json --plain-http
1716

18-
omlmd pull localhost:5001/mmortari/mlartifact:v1 -o tmp/a --plain-http
19-
file_count=$(find "tmp/a" -type f | wc -l)
20-
if [ "$file_count" -eq 3 ]; then
21-
echo "Expected 3 files in $DIR, ok."
17+
DIR="tmp/a"
18+
omlmd pull localhost:5001/mmortari/mlartifact:v1 -o "$DIR" --plain-http
19+
file_count=$(find "$DIR" -type f | wc -l)
20+
if [ "$file_count" -eq 2 ]; then
21+
echo "Expected 2 files in $DIR, ok."
2222
else
23-
echo "I was expecting 3 files in $DIR, FAIL."
23+
echo "I was expecting 2 files in $DIR, FAIL."
2424
exit 1
2525
fi
2626

27-
omlmd pull localhost:5001/mmortari/mlartifact:v1 -o tmp/b --media-types "application/x-mlmodel" --plain-http
28-
file_count=$(find "tmp/b" -type f | wc -l)
27+
DIR="tmp/b"
28+
omlmd pull localhost:5001/mmortari/mlartifact:v1 -o "$DIR" --media-types "application/x-mlmodel" --plain-http
29+
file_count=$(find "$DIR" -type f | wc -l)
2930
if [ "$file_count" -eq 1 ]; then
3031
echo "Expected 1 files in $DIR, ok."
3132
else
@@ -38,7 +39,7 @@ omlmd crawl localhost:5001/mmortari/mlartifact:v1 localhost:5001/mmortari/mlarti
3839
omlmd crawl --plain-http \
3940
localhost:5001/mmortari/mlartifact:v1 \
4041
localhost:5001/mmortari/mlartifact:v1 \
41-
localhost:5001/mmortari/mlartifact:v1 \
42-
| jq "max_by(.config.customProperties.accuracy).reference"
42+
localhost:5001/mmortari/mlartifact:v1 |
43+
jq "max_by(.config.customProperties.accuracy).reference"
4344

4445
deactivate

omlmd/cli.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,11 @@ def crawl(plain_http: bool, targets: tuple[str]):
7373
required=True,
7474
type=click.Path(path_type=Path, exists=True, resolve_path=True),
7575
)
76+
@click.option(
77+
"--as-artifact",
78+
is_flag=True,
79+
help="Push as an artifact (default is as a blob)",
80+
)
7681
@cloup.option_group(
7782
"Metadata options",
7883
cloup.option(
@@ -88,6 +93,7 @@ def push(
8893
plain_http: bool,
8994
target: str,
9095
path: Path,
96+
as_artifact: bool,
9197
metadata: Path | None,
9298
empty_metadata: bool,
9399
):
@@ -96,4 +102,6 @@ def push(
96102
if empty_metadata:
97103
logger.warning(f"Pushing to {target} with empty metadata.")
98104
md = deserialize_mdfile(metadata) if metadata else {}
99-
click.echo(Helper.from_default_registry(plain_http).push(target, path, **md))
105+
click.echo(
106+
Helper.from_default_registry(plain_http).push(target, path, as_artifact, **md)
107+
)

omlmd/constants.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
from oras.defaults import default_blob_media_type
2+
13
FILENAME_METADATA_JSON = "model_metadata.omlmd.json"
2-
MIME_APPLICATION_CONFIG = "application/x-config"
34
MIME_APPLICATION_MLMODEL = "application/x-mlmodel"
5+
MIME_APPLICATION_MLMETADATA = "application/x-mlmetadata+json"
6+
MIME_BLOB = default_blob_media_type
7+
MIME_MANIFEST_CONFIG = "application/vnd.oci.image.config.v1+json"

omlmd/helpers.py

Lines changed: 70 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
from __future__ import annotations
22

3+
import json
34
import logging
45
import os
6+
import platform
7+
import tarfile
58
import urllib.request
69
from collections.abc import Sequence
710
from dataclasses import dataclass, field
@@ -10,8 +13,10 @@
1013

1114
from .constants import (
1215
FILENAME_METADATA_JSON,
13-
MIME_APPLICATION_CONFIG,
16+
MIME_APPLICATION_MLMETADATA,
1417
MIME_APPLICATION_MLMODEL,
18+
MIME_BLOB,
19+
MIME_MANIFEST_CONFIG,
1520
)
1621
from .listener import Event, Listener, PushEvent
1722
from .model_metadata import ModelMetadata
@@ -20,6 +25,18 @@
2025
logger = logging.getLogger(__name__)
2126

2227

28+
def get_arch() -> str:
29+
mac = platform.machine()
30+
if mac == "x86_64":
31+
return "amd64"
32+
if mac == "arm64":
33+
return "arm64"
34+
if mac == "aarch64":
35+
return "arm64"
36+
msg = f"Unsupported architecture: {platform.machine()}"
37+
raise NotImplementedError(msg)
38+
39+
2340
def download_file(uri: str):
2441
file_name = os.path.basename(uri)
2542
urllib.request.urlretrieve(uri, file_name)
@@ -41,6 +58,7 @@ def push(
4158
self,
4259
target: str,
4360
path: Path | str,
61+
as_artifact: bool = False,
4462
**kwargs,
4563
):
4664
owns_meta = True
@@ -52,8 +70,7 @@ def push(
5270
owns_meta = False
5371
logger.warning("Reusing intermediate metadata files.")
5472
logger.debug(f"{meta_path}")
55-
with open(meta_path, "r") as f:
56-
model_metadata = ModelMetadata.from_json(f.read())
73+
model_metadata = ModelMetadata.from_dict(json.loads(meta_path.read_bytes()))
5774
elif meta_path.exists():
5875
err = dedent(f"""
5976
OMLMD intermediate metadata files found at '{meta_path}'.
@@ -65,13 +82,51 @@ def push(
6582
raise RuntimeError(err)
6683
else:
6784
model_metadata = ModelMetadata.from_dict(kwargs)
68-
meta_path.write_text(model_metadata.to_json())
85+
meta_path.write_text(json.dumps(model_metadata.to_dict()))
86+
87+
owns_model_tar = False
88+
owns_md_tar = False
89+
manifest_path = path.parent / "manifest.json"
90+
model_tar = None
91+
meta_tar = None
92+
if not as_artifact:
93+
manifest_path.write_text(
94+
json.dumps(
95+
{
96+
"architecture": get_arch(),
97+
"os": "linux",
98+
}
99+
)
100+
)
101+
config = f"{manifest_path}:{MIME_MANIFEST_CONFIG}"
102+
model_tar = path.parent / f"{path.stem}.tar"
103+
meta_tar = path.parent / f"{meta_path.stem}.tar"
104+
if not model_tar.exists():
105+
owns_model_tar = True
106+
with tarfile.open(model_tar, "w") as tf:
107+
tf.add(path, arcname=path.name)
108+
if not meta_tar.exists():
109+
owns_md_tar = True
110+
with tarfile.open(meta_tar, "w:gz") as tf:
111+
tf.add(meta_path, arcname=meta_path.name)
112+
files = [
113+
f"{model_tar}:{MIME_BLOB}",
114+
f"{meta_tar}:{MIME_BLOB}+gzip",
115+
]
116+
else:
117+
manifest_path.write_text(
118+
json.dumps(
119+
{
120+
"artifactType": MIME_APPLICATION_MLMODEL,
121+
}
122+
)
123+
)
124+
config = f"{manifest_path}:{MIME_APPLICATION_MLMODEL}"
125+
files = [
126+
f"{path}:{MIME_APPLICATION_MLMODEL}",
127+
f"{meta_path}:{MIME_APPLICATION_MLMETADATA}",
128+
]
69129

70-
config = f"{meta_path}:{MIME_APPLICATION_CONFIG}"
71-
files = [
72-
f"{path}:{MIME_APPLICATION_MLMODEL}",
73-
config,
74-
]
75130
try:
76131
# print(target, files, model_metadata.to_annotations_dict())
77132
result = self._registry.push(
@@ -88,6 +143,12 @@ def push(
88143
finally:
89144
if owns_meta:
90145
meta_path.unlink()
146+
if owns_model_tar:
147+
assert isinstance(model_tar, Path)
148+
model_tar.unlink()
149+
if owns_md_tar:
150+
assert isinstance(meta_tar, Path)
151+
meta_tar.unlink()
91152

92153
def pull(
93154
self, target: str, outdir: Path | str, media_types: Sequence[str] | None = None

omlmd/listener.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@
1010

1111

1212
class Listener(ABC):
13-
"""
14-
TODO: not yet settled for multi-method or current single update method.
15-
"""
13+
# TODO: not yet settled for multi-method or current single update method.
1614

1715
@abstractmethod
1816
def update(self, source: t.Any, event: Event) -> None:

omlmd/model_metadata.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ class ModelMetadata:
1717
model_format_name: str | None = None
1818
model_format_version: str | None = None
1919

20-
def to_json(self) -> str:
21-
return json.dumps(self.to_dict(), indent=4)
22-
2320
def to_dict(self) -> dict[str, t.Any]:
2421
return asdict(self)
2522

@@ -38,16 +35,6 @@ def to_annotations_dict(self) -> dict[str, str]:
3835
) # post-fix "+json" for OCI annotation which is a str representing a json
3936
return result
4037

41-
@staticmethod
42-
def from_json(json_str: str) -> "ModelMetadata":
43-
data = json.loads(json_str)
44-
return ModelMetadata(**data)
45-
46-
@staticmethod
47-
def from_yaml(yaml_str: str) -> "ModelMetadata":
48-
data = yaml.safe_load(yaml_str)
49-
return ModelMetadata(**data)
50-
5138
@staticmethod
5239
def from_dict(data: dict[str, t.Any]) -> "ModelMetadata":
5340
known_keys = {f.name for f in fields(ModelMetadata)}

tests/test_e2e_model_registry.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,9 @@ def update(self, source: Helper, event: Event) -> None:
7777
assert mv
7878
assert mv.description == "Lorem ipsum"
7979
assert mv.author == "John Doe"
80-
assert mv.custom_properties == {"accuracy": 0.987}
80+
assert mv.custom_properties == {
81+
"accuracy": accuracy_value,
82+
}
8183

8284
ma = model_registry.get_model_artifact("mnist", v)
8385
assert ma

tests/test_helpers.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,27 @@
1+
import io
12
import json
23
import subprocess
4+
import tarfile
35
import tempfile
46
import typing as t
57
from hashlib import sha256
68
from pathlib import Path
79

810
import pytest
911

10-
from omlmd.constants import MIME_APPLICATION_MLMODEL
12+
from omlmd.constants import MIME_BLOB
1113
from omlmd.helpers import Helper
1214
from omlmd.listener import Event, Listener
1315
from omlmd.model_metadata import ModelMetadata, deserialize_mdfile
1416
from omlmd.provider import OMLMDRegistry
1517

1618

19+
def untar(tar: Path, out: Path):
20+
out.write_bytes(
21+
t.cast(io.BufferedReader, tarfile.open(tar, "r").extractfile(tar.stem)).read()
22+
)
23+
24+
1725
def test_call_push_using_md_from_file(mocker):
1826
helper = Helper()
1927
mocker.patch.object(helper, "push", return_value=None)
@@ -100,12 +108,33 @@ def test_push_pull_chunked(tmp_path, target):
100108

101109
omlmd.push(target, temp, **md)
102110
omlmd.pull(target, tmp_path)
103-
assert len(list(tmp_path.iterdir())) == 3
104-
assert tmp_path.joinpath(temp.name).stat().st_size == base_size
111+
files = list(tmp_path.iterdir())
112+
print(files)
113+
assert len(files) == 2
114+
print(tmp_path)
115+
out = tmp_path.joinpath(temp.name)
116+
untar(out.with_suffix(".tar"), out)
117+
assert temp.stat().st_size == base_size
105118
finally:
106119
temp.unlink()
107120

108121

122+
@pytest.mark.e2e
123+
def test_e2e_push_pull_as_artifact(tmp_path, target):
124+
omlmd = Helper()
125+
omlmd.push(
126+
target,
127+
Path(__file__).parent / ".." / "README.md",
128+
as_artifact=True,
129+
name="mnist",
130+
description="Lorem ipsum",
131+
author="John Doe",
132+
accuracy=0.987,
133+
)
134+
omlmd.pull(target, tmp_path)
135+
assert len(list(tmp_path.iterdir())) == 2
136+
137+
109138
@pytest.mark.e2e
110139
def test_e2e_push_pull(tmp_path, target):
111140
omlmd = Helper()
@@ -118,7 +147,7 @@ def test_e2e_push_pull(tmp_path, target):
118147
accuracy=0.987,
119148
)
120149
omlmd.pull(target, tmp_path)
121-
assert len(list(tmp_path.iterdir())) == 3
150+
assert len(list(tmp_path.iterdir())) == 2
122151

123152

124153
@pytest.mark.e2e
@@ -132,7 +161,7 @@ def test_e2e_push_pull_with_filters(tmp_path, target):
132161
author="John Doe",
133162
accuracy=0.987,
134163
)
135-
omlmd.pull(target, tmp_path, media_types=[MIME_APPLICATION_MLMODEL])
164+
omlmd.pull(target, tmp_path, media_types=[MIME_BLOB])
136165
assert len(list(tmp_path.iterdir())) == 1
137166

138167

@@ -155,10 +184,11 @@ def test_e2e_push_pull_column(tmp_path, target):
155184

156185
omlmd.push(target, temp, **md)
157186
omlmd.pull(target, tmp_path)
158-
with open(tmp_path.joinpath(temp.name), "r") as f:
159-
pulled = f.read()
160-
assert pulled == content
161-
pulled_sha = sha256(pulled.encode("utf-8")).hexdigest()
162-
assert pulled_sha == content_sha
187+
out = tmp_path.joinpath(temp.name)
188+
untar(out.with_suffix(".tar"), out)
189+
pulled = out.read_text()
190+
assert pulled == content
191+
pulled_sha = sha256(pulled.encode("utf-8")).hexdigest()
192+
assert pulled_sha == content_sha
163193
finally:
164194
temp.unlink()

tests/test_omlmd.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88

99
def test_dry_run_model_metadata_json_yaml_conversions():
1010
metadata = ModelMetadata(name="Example Model", author="John Doe")
11-
json_str = metadata.to_json()
11+
json_str = json.dumps(metadata.to_dict(), indent=4)
1212
yaml_str = yaml.dump(metadata.to_dict(), default_flow_style=False)
1313

1414
print("JSON representation:\n", json_str)
1515
print("YAML representation:\n", yaml_str)
1616

17-
metadata_from_json = ModelMetadata.from_json(json_str)
18-
metadata_from_yaml = ModelMetadata.from_yaml(yaml_str)
17+
metadata_from_json = ModelMetadata(**json.loads(json_str))
18+
metadata_from_yaml = ModelMetadata(**yaml.safe_load(yaml_str))
1919

2020
print("Metadata from JSON:\n", metadata_from_json)
2121
print("Metadata from YAML:\n", metadata_from_yaml)

0 commit comments

Comments
 (0)