@@ -14,12 +14,7 @@ def run_trials(
14
14
from powerlift .bench .store import Store
15
15
import traceback
16
16
from powerlift .executors .base import timed_run
17
- from powerlift .bench .store import MIMETYPE_FUNC , BytesParser
18
- from powerlift .bench .experiment import Store
19
- import subprocess
20
- import tempfile
21
- from pathlib import Path
22
- import sys
17
+ import ast
23
18
24
19
if is_remote :
25
20
print_exceptions = True
@@ -29,6 +24,24 @@ def run_trials(
29
24
max_attempts = 5
30
25
31
26
store = Store (db_url , print_exceptions = print_exceptions , max_attempts = max_attempts )
27
+
28
+ if debug_fn is not None :
29
+ trial_run_fn = debug_fn
30
+ else :
31
+ trial_run_fn = store .get_trial_fn (experiment_id )
32
+ trial_run_fn = ast .parse (trial_run_fn )
33
+ if not isinstance (trial_run_fn , ast .Module ) or not isinstance (
34
+ trial_run_fn .body [0 ], ast .FunctionDef
35
+ ):
36
+ raise RuntimeError ("Serialized code not valid." )
37
+
38
+ func_name = r"wired_function"
39
+ trial_run_fn .body [0 ].name = func_name
40
+ compiled = compile (trial_run_fn , "<string>" , "exec" )
41
+ scope = locals ()
42
+ exec (compiled , scope , scope )
43
+ trial_run_fn = locals ()[func_name ]
44
+
32
45
while True :
33
46
trial_id = store .pick_trial (experiment_id , runner_id )
34
47
if trial_id is None :
@@ -40,21 +53,6 @@ def run_trials(
40
53
if trial is None :
41
54
raise RuntimeError (f"No trial found for id { trial_id } " )
42
55
43
- # Handle input assets
44
- trial_run_fn = None
45
- for input_asset in trial .input_assets :
46
- if input_asset .mimetype == MIMETYPE_FUNC :
47
- trial_run_fn = BytesParser .deserialize (
48
- MIMETYPE_FUNC , input_asset .embedded
49
- )
50
- else :
51
- continue
52
- if debug_fn is not None :
53
- trial_run_fn = debug_fn
54
-
55
- if trial_run_fn is None :
56
- raise RuntimeError ("No trial run function found." )
57
-
58
56
# Run trial
59
57
errmsg = None
60
58
try :
@@ -72,36 +70,49 @@ def run_trials(
72
70
73
71
74
72
if __name__ == "__main__" :
75
- import os
76
- import time
77
-
78
- experiment_id = os .getenv ("EXPERIMENT_ID" )
79
- runner_id = os .getenv ("RUNNER_ID" )
80
- db_url = os .getenv ("DB_URL" )
81
- timeout = float (os .getenv ("TIMEOUT" , 0.0 ))
82
- raise_exception = True if os .getenv ("RAISE_EXCEPTION" , False ) == "True" else False
83
- run_trials (
84
- experiment_id , runner_id , db_url , timeout , raise_exception , is_remote = True
85
- )
73
+ print ("STARTING RUNNER" )
86
74
87
- # below here is Azure specific. Make optional in the future
88
-
89
- from azure .identity import ManagedIdentityCredential
90
- from azure .mgmt .containerinstance import ContainerInstanceManagementClient
91
-
92
- subscription_id = os .getenv ("SUBSCRIPTION_ID" )
93
- resource_group_name = os .getenv ("RESOURCE_GROUP_NAME" )
94
- container_group_name = os .getenv ("CONTAINER_GROUP_NAME" )
95
-
96
- credential = ManagedIdentityCredential ()
97
- aci_client = ContainerInstanceManagementClient (credential , subscription_id )
98
-
99
- # self-delete the container that we're running on
100
- delete_poller = aci_client .container_groups .begin_delete (
101
- resource_group_name , container_group_name
102
- )
103
- while not delete_poller .done ():
104
- print ("Waiting to be deleted.." )
105
- time .sleep (60 )
75
+ import time
76
+ import traceback
106
77
107
- print ("THIS LINE SHOULD NEVER EXECUTE SINCE THIS CONTAINER SHOULD BE DELETED." )
78
+ try :
79
+ import os
80
+
81
+ experiment_id = os .getenv ("EXPERIMENT_ID" )
82
+ runner_id = os .getenv ("RUNNER_ID" )
83
+ db_url = os .getenv ("DB_URL" )
84
+ timeout = float (os .getenv ("TIMEOUT" , 0.0 ))
85
+ raise_exception = (
86
+ True if os .getenv ("RAISE_EXCEPTION" , False ) == "True" else False
87
+ )
88
+ run_trials (
89
+ experiment_id , runner_id , db_url , timeout , raise_exception , is_remote = True
90
+ )
91
+
92
+ # below here is Azure specific. Make optional in the future
93
+
94
+ from azure .identity import ManagedIdentityCredential
95
+ from azure .mgmt .containerinstance import ContainerInstanceManagementClient
96
+
97
+ subscription_id = os .getenv ("SUBSCRIPTION_ID" )
98
+ resource_group_name = os .getenv ("RESOURCE_GROUP_NAME" )
99
+ container_group_name = os .getenv ("CONTAINER_GROUP_NAME" )
100
+
101
+ credential = ManagedIdentityCredential ()
102
+ aci_client = ContainerInstanceManagementClient (credential , subscription_id )
103
+
104
+ # self-delete the container that we're running on
105
+ delete_poller = aci_client .container_groups .begin_delete (
106
+ resource_group_name , container_group_name
107
+ )
108
+ while not delete_poller .done ():
109
+ print ("Waiting to be deleted.." )
110
+ time .sleep (60 )
111
+
112
+ print ("THIS LINE SHOULD NEVER EXECUTE SINCE THIS CONTAINER SHOULD BE DELETED." )
113
+ except Exception as e :
114
+ print ("EXCEPTION:" )
115
+ print ("" .join (traceback .format_exception (type (e ), e , e .__traceback__ )))
116
+ for _ in range (60 * 60 * 24 ): # wait 24 hours
117
+ time .sleep (1 )
118
+ print ("Unandled exception." )
0 commit comments