Skip to content

Commit b024b4f

Browse files
committed
improve speed of benchmark.status, and changed interface of the executors to work with credentials better vs previous secrets
1 parent dd17bf9 commit b024b4f

File tree

9 files changed

+81
-78
lines changed

9 files changed

+81
-78
lines changed

docs/benchmarks/ebm-benchmark.ipynb

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
"metadata": {},
88
"outputs": [],
99
"source": [
10-
"# use exact versions of these in order to preserve RANK ordering better\n",
11-
"requirements = \"numpy==1.26.4 pandas==2.2.2 scikit-learn==1.5.1 xgboost==2.1.0 lightgbm==4.5.0 catboost==1.2.5 aplr==10.6.1\"\n",
10+
"# use exact versions for reproducibility of the RANK ordering\n",
11+
"requirements = \"interpret-core numpy==1.26.4 pandas==2.2.2 scikit-learn==1.5.1 xgboost==2.1.0 lightgbm==4.5.0 catboost==1.2.5 aplr==10.6.1\"\n",
1212
"!pip install -U --quiet {requirements}"
1313
]
1414
},
@@ -461,9 +461,8 @@
461461
" load_dotenv()\n",
462462
" TIMEOUT_SEC = 60 * 60 * 24 * 180 # 180 days\n",
463463
" wheel_filepaths = [\"interpret_core-0.6.3-py3-none-any.whl\", \"powerlift-0.1.11-py3-none-any.whl\"]\n",
464-
" n_containers=198\n",
464+
" n_containers=650\n",
465465
" conn_str = os.getenv(\"DOCKER_DB_URL\")\n",
466-
" azure_client_secret = None # use default credentials instead\n",
467466
" resource_group = os.getenv(\"AZURE_RESOURCE_GROUP\")\n",
468467
"\n",
469468
"from powerlift.bench import retrieve_openml_automl_regression, retrieve_openml_automl_classification, retrieve_openml_cc18, retrieve_catboost_50k, retrieve_pmlb\n",
@@ -486,9 +485,9 @@
486485
"source": [
487486
"cache_dir=\"~/.powerlift\"\n",
488487
"data_retrieval = chain(\n",
488+
" retrieve_openml_cc18(cache_dir=cache_dir),\n",
489489
" retrieve_openml_automl_regression(cache_dir=cache_dir),\n",
490490
" # retrieve_openml_automl_classification(cache_dir=cache_dir),\n",
491-
" retrieve_openml_cc18(cache_dir=cache_dir),\n",
492491
" # retrieve_catboost_50k(cache_dir=cache_dir),\n",
493492
" # retrieve_pmlb(cache_dir=cache_dir),\n",
494493
")\n",
@@ -500,11 +499,11 @@
500499
" benchmark.run(trial_runner, trial_filter, n_replicates=n_replicates, executor=LocalMachine(store, debug_mode=True))\n",
501500
"else:\n",
502501
" executor = AzureContainerInstance(\n",
503-
" store, azure_tenant_id, azure_client_id, azure_client_secret, subscription_id, resource_group, credential,\n",
504-
" image=\"mcr.microsoft.com/devcontainers/python:latest\",\n",
505-
" pip_install= requirements + \" psycopg2-binary\" + \" azure-mgmt-containerinstance azure-identity\", #TODO remove azure-mgmt-containerinstance azure-identity once our powerlift image is updated\n",
502+
" store, azure_tenant_id, subscription_id, azure_client_id, credential,\n",
503+
" resource_group=resource_group,\n",
504+
" pip_install=requirements,\n",
506505
" wheel_filepaths=wheel_filepaths,\n",
507-
" n_running_containers=n_containers, num_cores=4, mem_size_gb=16, delete_group_container_on_complete=True\n",
506+
" n_running_containers=n_containers\n",
508507
" )\n",
509508
" benchmark.run(trial_runner, trial_filter, timeout=TIMEOUT_SEC, n_replicates=n_replicates, executor=executor)"
510509
]

python/powerlift/README.md

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -87,24 +87,21 @@ This can also be run on Azure Container Instances where needed.
8787
```python
8888
# Run experiment (but in ACI).
8989
from powerlift.executors import AzureContainerInstance
90+
store = Store(os.getenv("AZURE_DB_URL"))
9091
azure_tenant_id = os.getenv("AZURE_TENANT_ID")
92+
subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
9193
azure_client_id = os.getenv("AZURE_CLIENT_ID")
9294
azure_client_secret = os.getenv("AZURE_CLIENT_SECRET")
93-
subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
9495
resource_group = os.getenv("AZURE_RESOURCE_GROUP")
95-
store = Store(os.getenv("AZURE_DB_URL"))
9696

9797
executor = AzureContainerInstance(
9898
store,
9999
azure_tenant_id,
100-
azure_client_id,
101-
azure_client_secret,
102100
subscription_id,
103-
resource_group,
104-
n_running_containers=5,
105-
num_cores=1,
106-
mem_size_gb=2,
107-
raise_exception=True,
101+
azure_client_id,
102+
azure_client_secret=azure_client_secret,
103+
resource_group=resource_group,
104+
n_running_containers=5
108105
)
109106
benchmark = Benchmark(store, name="SVM vs RF")
110107
benchmark.run(trial_runner, trial_filter, timeout=10, executor=executor)

python/powerlift/powerlift/bench/benchmark.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -230,15 +230,10 @@ def status(self) -> Optional[pd.DataFrame]:
230230
Returns:
231231
Trial statuses (Optional[pandas.DataFrame]): Experiment's trials' status.
232232
"""
233-
self._store.reset()
234-
while self._store.do:
235-
with self._store:
236-
self._experiment_id = self._store.get_experiment(self._name)
237-
if self._experiment_id is None:
238-
return None
239-
240-
records = list(self._store.iter_status(self._experiment_id))
241-
return pd.DataFrame.from_records(records)
233+
df = self._store.get_status(self._name)
234+
df["meta"] = df["meta"].apply(lambda x: str(x))
235+
df = df.sort_values(by=["task", "method", "meta", "replicate_num"])
236+
return df
242237

243238
def results(self) -> Optional[pd.DataFrame]:
244239
"""Retrieves trial measures of an experiment in long form.

python/powerlift/powerlift/bench/store.py

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -189,11 +189,8 @@ def __enter__(self):
189189
if not self._reset and self._attempts == 0:
190190
raise Exception("Must reset before entering the Store context.")
191191

192-
if 0 < self._attempts:
193-
assert self._session is None
194-
assert self._conn is None
195-
assert self._engine is None
196-
192+
# on first re-attempt, do not sleep
193+
if 2 <= self._attempts:
197194
sleep_time = (
198195
self._wait_secs
199196
* (self._wait_lengthing**self._attempts)
@@ -729,27 +726,41 @@ def iter_experiment_trials(self, experiment_id: int):
729726
trial = self.from_db_trial(trial_orm)
730727
yield trial
731728

732-
def iter_status(self, experiment_id: int) -> Iterable[Mapping[str, object]]:
733-
# TODO(nopdive): Should this be in the store?
734-
self.check_allowed()
735-
trial_orms = self._session.query(db.Trial).filter_by(
736-
experiment_id=experiment_id
729+
def get_status(self, experiment_name: str):
730+
sql = text(
731+
f"""
732+
SELECT
733+
t.id AS trial_id,
734+
ta.name AS task,
735+
m.name AS method,
736+
t.meta AS meta,
737+
t.replicate_num AS replicate_num,
738+
t.status AS status,
739+
t.errmsg AS errmsg,
740+
t.create_time AS create_time,
741+
t.start_time AS start_time,
742+
t.end_time AS end_time,
743+
t.runner_id AS runner_id
744+
FROM
745+
experiment e
746+
JOIN
747+
trial t on e.id = t.experiment_id
748+
JOIN
749+
task ta ON t.task_id = ta.id
750+
JOIN
751+
method m ON t.method_id = m.id
752+
WHERE
753+
e.name = '{experiment_name}'
754+
"""
737755
)
738-
for trial_orm in trial_orms:
739-
record = {
740-
"trial_id": trial_orm.id,
741-
"replicate_num": trial_orm.replicate_num,
742-
"meta": trial_orm.meta,
743-
"method": trial_orm.method.name,
744-
"task": trial_orm.task.name,
745-
"status": trial_orm.status.name,
746-
"errmsg": trial_orm.errmsg,
747-
"create_time": trial_orm.create_time,
748-
"start_time": trial_orm.start_time,
749-
"end_time": trial_orm.end_time,
750-
"runner_id": trial_orm.runner_id,
751-
}
752-
yield record
756+
self.reset()
757+
while self.do:
758+
with self:
759+
result = self._session.execute(sql)
760+
records = result.all()
761+
columns = result.keys()
762+
df = pd.DataFrame.from_records(records, columns=columns)
763+
return df
753764

754765
def get_results(self, experiment_name: str):
755766
sql = text(
@@ -784,7 +795,6 @@ def get_results(self, experiment_name: str):
784795
e.name = '{experiment_name}'
785796
"""
786797
)
787-
788798
self.reset()
789799
while self.do:
790800
with self:
@@ -945,7 +955,6 @@ def _create_task_with_supervised(self, supervised, version):
945955
mimetype=y_mimetype,
946956
embedded=y_bstream.getvalue(),
947957
)
948-
949958
meta_orm = db.Asset(
950959
name=meta_name,
951960
description=f"Metadata for {supervised.name()}",
@@ -977,6 +986,7 @@ def _create_task_with_supervised(self, supervised, version):
977986

978987
self._session.add(X_orm)
979988
self._session.add(y_orm)
989+
self._session.add(meta_orm)
980990
self._session.add(task_orm)
981991
self._session.flush()
982992

@@ -1004,7 +1014,6 @@ def _create_task_with_dataframe(self, data, version):
10041014
mimetype=outputs_mimetype,
10051015
embedded=outputs_bstream.getvalue(),
10061016
)
1007-
10081017
meta_orm = db.Asset(
10091018
name=meta_name,
10101019
description=f"Metadata for {data.name()}",
@@ -1036,6 +1045,7 @@ def _create_task_with_dataframe(self, data, version):
10361045

10371046
self._session.add(inputs_orm)
10381047
self._session.add(outputs_orm)
1048+
self._session.add(meta_orm)
10391049
self._session.add(task_orm)
10401050
self._session.flush()
10411051

@@ -1231,8 +1241,8 @@ def populate_with_datasets(
12311241

12321242
if dataset_iter is None:
12331243
dataset_iter = chain(
1244+
retrieve_openml_cc18(cache_dir=cache_dir),
12341245
retrieve_openml_automl_regression(cache_dir=cache_dir),
1235-
retrieve_openml_automl_classification(cache_dir=cache_dir),
12361246
)
12371247

12381248
for dataset in dataset_iter:

python/powerlift/powerlift/executors/azure_ci.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,21 +19,21 @@ def __init__(
1919
self,
2020
store: Store,
2121
azure_tenant_id: str,
22-
azure_client_id: str,
23-
azure_client_secret: str,
2422
subscription_id: str,
25-
resource_group: str,
23+
azure_client_id: str,
2624
credential=None,
27-
# other images available at:
28-
# https://mcr.microsoft.com/en-us/product/devcontainers/python/tags
29-
# TODO: change default to mcr.microsoft.com/devcontainers/python:latest
30-
image: str = "interpretml/powerlift:0.1.11",
25+
azure_client_secret: str = None,
26+
resource_group: str = "powerlift_rg",
3127
shell_install: str = None,
3228
pip_install: str = None,
33-
n_running_containers: int = 1,
34-
num_cores: int = 1,
35-
mem_size_gb: int = 2,
3629
wheel_filepaths: List[str] = None,
30+
n_running_containers: int = 1,
31+
num_cores: int = 4,
32+
mem_size_gb: int = 16,
33+
# other images available at:
34+
# https://mcr.microsoft.com/en-us/product/devcontainers/python/tags
35+
# TODO: change default to mcr.microsoft.com/devcontainers/python:latest
36+
image: str = "mcr.microsoft.com/devcontainers/python:latest",
3737
docker_db_uri: str = None,
3838
raise_exception: bool = False,
3939
delete_group_container_on_complete: bool = True,
@@ -43,15 +43,18 @@ def __init__(
4343
Args:
4444
store (Store): Store that houses trials.
4545
azure_tenant_id (str): Azure tentant ID.
46+
subscription_id (str): Azure subscription ID.
4647
azure_client_id (str): Azure client ID.
48+
credential: Azure credential
4749
azure_client_secret (str): Azure client secret.
48-
subscription_id (str): Azure subscription ID.
4950
resource_group (str): Azure resource group.
50-
image (str, optional): Image to execute. Defaults to "interpretml/powerlift:0.0.1".
51+
shell_install (str): apt-get install parameters.
52+
pip_install (str): pip install parameters.
53+
wheel_filepaths (List[str], optional): List of wheel filepaths to install on ACI trial run. Defaults to None.
5154
n_running_containers (int, optional): Max number of containers to run simultaneously. Defaults to 1.
5255
num_cores (int, optional): Number of cores per container. Defaults to 1.
5356
mem_size_gb (int, optional): RAM size in GB per container. Defaults to 2.
54-
wheel_filepaths (List[str], optional): List of wheel filepaths to install on ACI trial run. Defaults to None.
57+
image (str, optional): Image to execute. Defaults to "mcr.microsoft.com/devcontainers/python:latest".
5558
docker_db_uri (str, optional): Database URI for container. Defaults to None.
5659
raise_exception (bool, optional): Raise exception on failure.
5760
delete_group_container_on_complete (bool, optional): Delete group containers after completion. Defaults to True.

python/powerlift/powerlift/executors/docker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ class InsecureDocker(LocalMachine):
4646
def __init__(
4747
self,
4848
store: Store,
49-
image: str = "interpretml/powerlift:0.1.11",
49+
image: str = "mcr.microsoft.com/devcontainers/python:latest",
5050
n_running_containers: int = None,
5151
wheel_filepaths: List[str] = None,
5252
docker_db_uri: str = None,
@@ -56,7 +56,7 @@ def __init__(
5656
5757
Args:
5858
store (Store): Store that houses trials.
59-
image (str, optional): Image to execute in container. Defaults to "interpretml/powerlift:0.0.1".
59+
image (str, optional): Image to execute in container. Defaults to "mcr.microsoft.com/devcontainers/python:latest".
6060
n_running_containers (int, optional): Max number of containers running simultaneously. Defaults to None.
6161
wheel_filepaths (List[str], optional): List of wheel filepaths to install on docker trial run. Defaults to None.
6262
docker_db_uri (str, optional): Database URI for container. Defaults to None.

python/powerlift/powerlift/executors/localmachine.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,14 +55,12 @@ def submit(self, experiment_id, trials: List, timeout=None):
5555
for runner_id in range(n_runners):
5656
if self._pool is None:
5757
try:
58-
debug_fn = trial_run_fn if self._debug_mode else None
5958
res = runner.run_trials(
6059
experiment_id,
6160
runner_id,
6261
self._store.uri,
6362
timeout,
6463
self._raise_exception or self._debug_mode,
65-
debug_fn=debug_fn,
6664
)
6765
self._runner_id_to_result[runner_id] = res
6866
except Exception as e:

python/powerlift/powerlift/run_azure/__main__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,18 @@ def run_azure_process(
3131
cmd="apt-get --yes install $shell_install"
3232
eval $cmd
3333
fi
34+
python -m pip install powerlift psycopg2-binary azure-identity azure-mgmt-containerinstance
3435
pip_install=$(psql "$DB_URL" -c "SELECT pip_install FROM Experiment WHERE id='$EXPERIMENT_ID' LIMIT 1;" -t -A)
3536
if [ -n "$pip_install" ]; then
36-
cmd="python -m pip install $pip_install"
37+
cmd="python -m pip install --force-reinstall $pip_install"
3738
eval $cmd
3839
fi
3940
filenames=$(psql "$DB_URL" -c "SELECT name FROM wheel WHERE experiment_id='$EXPERIMENT_ID';" -t -A)
4041
if [ -n "$filenames" ]; then
4142
echo "$filenames" | while IFS= read -r filename; do
4243
echo "Processing filename: $filename"
4344
psql "$DB_URL" -c "COPY (SELECT embedded FROM wheel WHERE experiment_id='$EXPERIMENT_ID' AND name='$filename') TO STDOUT WITH BINARY;" > "$filename"
44-
cmd="python -m pip install $filename"
45+
cmd="python -m pip install --force-reinstall $filename"
4546
eval $cmd
4647
done
4748
fi

python/powerlift/tests/powerlift/bench/test_experiment.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,10 +145,10 @@ def test_scikit_experiment_aci(populated_azure_store):
145145
executor = AzureContainerInstance(
146146
store,
147147
azure_tenant_id,
148-
azure_client_id,
149-
azure_client_secret,
150148
subscription_id,
151-
resource_group,
149+
azure_client_id,
150+
azure_client_secret=azure_client_secret,
151+
resource_group=resource_group,
152152
n_running_containers=5,
153153
num_cores=2,
154154
mem_size_gb=8,

0 commit comments

Comments
 (0)