move trial_fn from trial table to experiment table

paulbkoch · paulbkoch · commit fa9e94e2c71e · 2024-08-31T02:49:23.000-07:00
diff --git a/python/powerlift/powerlift/bench/benchmark.py b/python/powerlift/powerlift/bench/benchmark.py
@@ -15,6 +15,7 @@
 
 import os
 import numpy as np
+import inspect
 
 
 class Benchmark:
@@ -81,6 +82,8 @@ def run(
                 wheel = db.Wheel(name=name, embedded=content)
                 wheels.append(wheel)
 
+        trial_fn = inspect.getsource(trial_run_fn)
+
         self._store.reset()
         while self._store.do:
             with self._store:
@@ -92,6 +95,7 @@ def run(
                         shell_install,
                         pip_install,
                         script_contents,
+                        trial_fn,
                         wheels,
                     )
 
@@ -185,9 +189,7 @@ def run(
         if executor is None:
             executor = LocalMachine(self._store)
         self._executors.add(executor)
-        executor.submit(
-            self._experiment_id, trial_run_fn, pending_trials, timeout=timeout
-        )
+        executor.submit(self._experiment_id, pending_trials, timeout=timeout)
         return executor
 
     def wait_until_complete(self):
diff --git a/python/powerlift/powerlift/bench/experiment.py b/python/powerlift/powerlift/bench/experiment.py
@@ -146,6 +146,7 @@ class Experiment:
     shell_install: str
     pip_install: str
     script: str
+    trial_fn: str
     wheels: List[Wheel]
     trials: List
 
diff --git a/python/powerlift/powerlift/bench/store.py b/python/powerlift/powerlift/bench/store.py
@@ -44,26 +44,9 @@
 import traceback as tb
 
 
-def _parse_function(src):
-    src_ast = ast.parse(src)
-    if isinstance(src_ast, ast.Module) and isinstance(src_ast.body[0], ast.FunctionDef):
-        return src_ast
-    return None
-
-
-def _compile_function(src_ast):
-    func_name = r"wired_function"
-    src_ast.body[0].name = func_name
-    compiled = compile(src_ast, "<string>", "exec")
-    scope = locals()
-    exec(compiled, scope, scope)
-    return locals()[func_name]
-
-
 MIMETYPE_DF = "application/vnd.interpretml/parquet-df"
 MIMETYPE_SERIES = "application/vnd.interpretml/parquet-series"
 MIMETYPE_JSON = "application/json"
-MIMETYPE_FUNC = "application/vnd.interpretml/function-str"
 
 
 class BytesParser:
@@ -84,13 +67,6 @@ def deserialize(cls, mimetype, bytes):
             return pd.read_parquet(bstream)
         elif mimetype == MIMETYPE_SERIES:
             return pd.read_parquet(bstream)["Target"]
-        elif mimetype == MIMETYPE_FUNC:
-            src = bstream.getvalue().decode("utf-8")
-            src_ast = _parse_function(src)
-            if src_ast is None:
-                raise RuntimeError("Serialized code not valid.")
-            compiled_func = _compile_function(src_ast)
-            return compiled_func
         else:
             return None
 
@@ -125,13 +101,6 @@ def serialize(cls, obj):
         elif isinstance(obj, dict):
             bstream.write(json.dumps(obj).encode())
             mimetype = MIMETYPE_JSON
-        elif isinstance(obj, FunctionType):
-            src = inspect.getsource(obj)
-            src_ast = _parse_function(src)
-            if src_ast is None:
-                raise RuntimeError("Serialized code not valid.")
-            bstream.write(src.encode("utf-8"))
-            mimetype = MIMETYPE_FUNC
         else:
             return None, None
 
@@ -449,33 +418,6 @@ def end_trial(self, trial_id, errmsg=None):
                     result = self._session.execute(query, params)
                     rowcount = result.rowcount
 
-    def add_trial_run_fn(self, trial_ids, trial_run_fn):
-        import sys
-
-        mimetype, bstream = BytesParser.serialize(trial_run_fn)
-        trial_run_fn_asset_orm = db.Asset(
-            name="trial_run_fn",
-            description="Serialized trial run function.",
-            version=sys.version,
-            is_embedded=True,
-            embedded=bstream.getvalue(),
-            mimetype=mimetype,
-        )
-
-        self.reset()
-        while self.do:
-            with self:
-                trial_orms = self._session.query(db.Trial).filter(
-                    db.Trial.id.in_(trial_ids)
-                )
-                for trial_orm in trial_orms:
-                    trial_orm.input_assets.append(trial_run_fn_asset_orm)
-
-                if trial_orms.first() is not None:
-                    orms = [trial_run_fn_asset_orm]
-                    self._session.bulk_save_objects(orms, return_defaults=True)
-        return None
-
     def measure_from_db_task(self, task_orm):
         self.check_allowed()
         from powerlift.bench.experiment import Measure
@@ -581,6 +523,7 @@ def from_db_experiment(self, experiment_orm):
             experiment_orm.shell_install,
             experiment_orm.pip_install,
             experiment_orm.script,
+            experiment_orm.trial_fn,
             wheels,
             trials,
         )
@@ -631,6 +574,15 @@ def find_task_by_id(self, _id: int):
             return None
         return self.from_db_task(task_orm)
 
+    def get_trial_fn(self, experiment_id) -> str:
+        self.reset()
+        while self.do:
+            with self:
+                trial_fn = self._session.execute(
+                    text(f"SELECT trial_fn FROM experiment WHERE id={experiment_id}")
+                ).scalar()
+        return trial_fn
+
     def pick_trial(self, experiment_id, runner_id):
         self.reset()
         while self.do:
@@ -680,6 +632,7 @@ def create_experiment(
         shell_install: str = None,
         pip_install: str = None,
         script: str = None,
+        trial_fn: str = None,
         wheels=None,
     ) -> Tuple[int, bool]:
         """Create experiment keyed by name."""
@@ -692,6 +645,7 @@ def create_experiment(
             shell_install=shell_install,
             pip_install=pip_install,
             script=script,
+            trial_fn=trial_fn,
         )
 
         if wheels is not None:
diff --git a/python/powerlift/powerlift/db/schema.py b/python/powerlift/powerlift/db/schema.py
@@ -91,12 +91,13 @@ class Experiment(Base):
     """The overall experiment, includes access to trials."""
 
     __tablename__ = "experiment"
-    id = Column(Integer, primary_key=True)
-    name = Column(String(NAME_LEN), unique=True)
+    id = Column(Integer, primary_key=True, nullable=False)
+    name = Column(String(NAME_LEN), unique=True, nullable=False)
     description = Column(String(DESCRIPTION_LEN))
     shell_install = Column(Text)
     pip_install = Column(Text)
-    script = Column(Text)
+    script = Column(Text, nullable=False)
+    trial_fn = Column(Text, nullable=False)
 
     # TODO: consider removing the wheel relationship since it means we
     # spend time downloading the wheels each time we query the experiment
diff --git a/python/powerlift/powerlift/executors/azure_ci.py b/python/powerlift/powerlift/executors/azure_ci.py
@@ -91,13 +91,12 @@ def delete_credentials(self):
         """Deletes credentials in object for accessing Azure Resources."""
         del self._azure_json
 
-    def submit(self, experiment_id, trial_run_fn, trials: List, timeout=None):
+    def submit(self, experiment_id, trials: List, timeout=None):
         from powerlift.run_azure import __main__ as remote_process
 
         uri = (
             self._docker_db_uri if self._docker_db_uri is not None else self._store.uri
         )
-        self._store.add_trial_run_fn([x.id for x in trials], trial_run_fn)
 
         n_runners = min(len(trials), self._n_running_containers)
         params = (
diff --git a/python/powerlift/powerlift/executors/docker.py b/python/powerlift/powerlift/executors/docker.py
@@ -71,11 +71,10 @@ def __init__(
             wheel_filepaths=wheel_filepaths,
         )
 
-    def submit(self, experiment_id, trial_run_fn, trials: List, timeout=None):
+    def submit(self, experiment_id, trials: List, timeout=None):
         uri = (
             self._docker_db_uri if self._docker_db_uri is not None else self._store.uri
         )
-        self._store.add_trial_run_fn([x.id for x in trials], trial_run_fn)
 
         n_runners = min(
             len(trials),
diff --git a/python/powerlift/powerlift/executors/localmachine.py b/python/powerlift/powerlift/executors/localmachine.py
@@ -45,10 +45,9 @@ def __del__(self):
         if self._pool is not None:
             self._pool.close()
 
-    def submit(self, experiment_id, trial_run_fn, trials: List, timeout=None):
+    def submit(self, experiment_id, trials: List, timeout=None):
         from powerlift.run import __main__ as runner
 
-        self._store.add_trial_run_fn([x.id for x in trials], trial_run_fn)
         n_runners = min(
             len(trials),
             multiprocessing.cpu_count() if self._n_cpus is None else self._n_cpus,
diff --git a/python/powerlift/powerlift/run/__main__.py b/python/powerlift/powerlift/run/__main__.py
@@ -14,12 +14,7 @@ def run_trials(
     from powerlift.bench.store import Store
     import traceback
     from powerlift.executors.base import timed_run
-    from powerlift.bench.store import MIMETYPE_FUNC, BytesParser
-    from powerlift.bench.experiment import Store
-    import subprocess
-    import tempfile
-    from pathlib import Path
-    import sys
+    import ast
 
     if is_remote:
         print_exceptions = True
@@ -29,6 +24,24 @@ def run_trials(
         max_attempts = 5
 
     store = Store(db_url, print_exceptions=print_exceptions, max_attempts=max_attempts)
+
+    if debug_fn is not None:
+        trial_run_fn = debug_fn
+    else:
+        trial_run_fn = store.get_trial_fn(experiment_id)
+        trial_run_fn = ast.parse(trial_run_fn)
+        if not isinstance(trial_run_fn, ast.Module) or not isinstance(
+            trial_run_fn.body[0], ast.FunctionDef
+        ):
+            raise RuntimeError("Serialized code not valid.")
+
+        func_name = r"wired_function"
+        trial_run_fn.body[0].name = func_name
+        compiled = compile(trial_run_fn, "<string>", "exec")
+        scope = locals()
+        exec(compiled, scope, scope)
+        trial_run_fn = locals()[func_name]
+
     while True:
         trial_id = store.pick_trial(experiment_id, runner_id)
         if trial_id is None:
@@ -40,21 +53,6 @@ def run_trials(
         if trial is None:
             raise RuntimeError(f"No trial found for id {trial_id}")
 
-        # Handle input assets
-        trial_run_fn = None
-        for input_asset in trial.input_assets:
-            if input_asset.mimetype == MIMETYPE_FUNC:
-                trial_run_fn = BytesParser.deserialize(
-                    MIMETYPE_FUNC, input_asset.embedded
-                )
-            else:
-                continue
-        if debug_fn is not None:
-            trial_run_fn = debug_fn
-
-        if trial_run_fn is None:
-            raise RuntimeError("No trial run function found.")
-
         # Run trial
         errmsg = None
         try:
@@ -72,36 +70,49 @@ def run_trials(
 
 
 if __name__ == "__main__":
-    import os
-    import time
-
-    experiment_id = os.getenv("EXPERIMENT_ID")
-    runner_id = os.getenv("RUNNER_ID")
-    db_url = os.getenv("DB_URL")
-    timeout = float(os.getenv("TIMEOUT", 0.0))
-    raise_exception = True if os.getenv("RAISE_EXCEPTION", False) == "True" else False
-    run_trials(
-        experiment_id, runner_id, db_url, timeout, raise_exception, is_remote=True
-    )
+    print("STARTING RUNNER")
 
-    # below here is Azure specific. Make optional in the future
-
-    from azure.identity import ManagedIdentityCredential
-    from azure.mgmt.containerinstance import ContainerInstanceManagementClient
-
-    subscription_id = os.getenv("SUBSCRIPTION_ID")
-    resource_group_name = os.getenv("RESOURCE_GROUP_NAME")
-    container_group_name = os.getenv("CONTAINER_GROUP_NAME")
-
-    credential = ManagedIdentityCredential()
-    aci_client = ContainerInstanceManagementClient(credential, subscription_id)
-
-    # self-delete the container that we're running on
-    delete_poller = aci_client.container_groups.begin_delete(
-        resource_group_name, container_group_name
-    )
-    while not delete_poller.done():
-        print("Waiting to be deleted..")
-        time.sleep(60)
+    import time
+    import traceback
 
-    print("THIS LINE SHOULD NEVER EXECUTE SINCE THIS CONTAINER SHOULD BE DELETED.")
+    try:
+        import os
+
+        experiment_id = os.getenv("EXPERIMENT_ID")
+        runner_id = os.getenv("RUNNER_ID")
+        db_url = os.getenv("DB_URL")
+        timeout = float(os.getenv("TIMEOUT", 0.0))
+        raise_exception = (
+            True if os.getenv("RAISE_EXCEPTION", False) == "True" else False
+        )
+        run_trials(
+            experiment_id, runner_id, db_url, timeout, raise_exception, is_remote=True
+        )
+
+        # below here is Azure specific. Make optional in the future
+
+        from azure.identity import ManagedIdentityCredential
+        from azure.mgmt.containerinstance import ContainerInstanceManagementClient
+
+        subscription_id = os.getenv("SUBSCRIPTION_ID")
+        resource_group_name = os.getenv("RESOURCE_GROUP_NAME")
+        container_group_name = os.getenv("CONTAINER_GROUP_NAME")
+
+        credential = ManagedIdentityCredential()
+        aci_client = ContainerInstanceManagementClient(credential, subscription_id)
+
+        # self-delete the container that we're running on
+        delete_poller = aci_client.container_groups.begin_delete(
+            resource_group_name, container_group_name
+        )
+        while not delete_poller.done():
+            print("Waiting to be deleted..")
+            time.sleep(60)
+
+        print("THIS LINE SHOULD NEVER EXECUTE SINCE THIS CONTAINER SHOULD BE DELETED.")
+    except Exception as e:
+        print("EXCEPTION:")
+        print("".join(traceback.format_exception(type(e), e, e.__traceback__)))
+        for _ in range(60 * 60 * 24):  # wait 24 hours
+            time.sleep(1)
+            print("Unandled exception.")
diff --git a/python/powerlift/powerlift/run_azure/__main__.py b/python/powerlift/powerlift/run_azure/__main__.py
@@ -47,6 +47,7 @@ def run_azure_process(
         fi
         result=$(psql "$DB_URL" -c "SELECT script FROM Experiment WHERE id='$EXPERIMENT_ID' LIMIT 1;" -t -A)
         printf "%s" "$result" > "startup.py"
+        echo "Running startup.py"
         python startup.py
     """
 

Original file line number	Diff line number	Diff line change
`@@ -71,11 +71,10 @@ def __init__(`
`71`	`71`	`wheel_filepaths=wheel_filepaths,`
`72`	`72`	`)`
`73`	`73`
`74`		`- def submit(self, experiment_id, trial_run_fn, trials: List, timeout=None):`
	`74`	`+ def submit(self, experiment_id, trials: List, timeout=None):`
`75`	`75`	`uri = (`
`76`	`76`	`self._docker_db_uri if self._docker_db_uri is not None else self._store.uri`
`77`	`77`	`)`
`78`		`- self._store.add_trial_run_fn([x.id for x in trials], trial_run_fn)`
`79`	`78`
`80`	`79`	`n_runners = min(`
`81`	`80`	`len(trials),`