Skip to content

Commit

Permalink
improve speed of benchmark.status, and changed interface of the execu…
Browse files Browse the repository at this point in the history
…tors to work with credentials better vs previous secrets
  • Loading branch information
paulbkoch committed Sep 1, 2024
1 parent dd17bf9 commit b024b4f
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 78 deletions.
17 changes: 8 additions & 9 deletions docs/benchmarks/ebm-benchmark.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"metadata": {},
"outputs": [],
"source": [
"# use exact versions of these in order to preserve RANK ordering better\n",
"requirements = \"numpy==1.26.4 pandas==2.2.2 scikit-learn==1.5.1 xgboost==2.1.0 lightgbm==4.5.0 catboost==1.2.5 aplr==10.6.1\"\n",
"# use exact versions for reproducibility of the RANK ordering\n",
"requirements = \"interpret-core numpy==1.26.4 pandas==2.2.2 scikit-learn==1.5.1 xgboost==2.1.0 lightgbm==4.5.0 catboost==1.2.5 aplr==10.6.1\"\n",
"!pip install -U --quiet {requirements}"
]
},
Expand Down Expand Up @@ -461,9 +461,8 @@
" load_dotenv()\n",
" TIMEOUT_SEC = 60 * 60 * 24 * 180 # 180 days\n",
" wheel_filepaths = [\"interpret_core-0.6.3-py3-none-any.whl\", \"powerlift-0.1.11-py3-none-any.whl\"]\n",
" n_containers=198\n",
" n_containers=650\n",
" conn_str = os.getenv(\"DOCKER_DB_URL\")\n",
" azure_client_secret = None # use default credentials instead\n",
" resource_group = os.getenv(\"AZURE_RESOURCE_GROUP\")\n",
"\n",
"from powerlift.bench import retrieve_openml_automl_regression, retrieve_openml_automl_classification, retrieve_openml_cc18, retrieve_catboost_50k, retrieve_pmlb\n",
Expand All @@ -486,9 +485,9 @@
"source": [
"cache_dir=\"~/.powerlift\"\n",
"data_retrieval = chain(\n",
" retrieve_openml_cc18(cache_dir=cache_dir),\n",
" retrieve_openml_automl_regression(cache_dir=cache_dir),\n",
" # retrieve_openml_automl_classification(cache_dir=cache_dir),\n",
" retrieve_openml_cc18(cache_dir=cache_dir),\n",
" # retrieve_catboost_50k(cache_dir=cache_dir),\n",
" # retrieve_pmlb(cache_dir=cache_dir),\n",
")\n",
Expand All @@ -500,11 +499,11 @@
" benchmark.run(trial_runner, trial_filter, n_replicates=n_replicates, executor=LocalMachine(store, debug_mode=True))\n",
"else:\n",
" executor = AzureContainerInstance(\n",
" store, azure_tenant_id, azure_client_id, azure_client_secret, subscription_id, resource_group, credential,\n",
" image=\"mcr.microsoft.com/devcontainers/python:latest\",\n",
" pip_install= requirements + \" psycopg2-binary\" + \" azure-mgmt-containerinstance azure-identity\", #TODO remove azure-mgmt-containerinstance azure-identity once our powerlift image is updated\n",
" store, azure_tenant_id, subscription_id, azure_client_id, credential,\n",
" resource_group=resource_group,\n",
" pip_install=requirements,\n",
" wheel_filepaths=wheel_filepaths,\n",
" n_running_containers=n_containers, num_cores=4, mem_size_gb=16, delete_group_container_on_complete=True\n",
" n_running_containers=n_containers\n",
" )\n",
" benchmark.run(trial_runner, trial_filter, timeout=TIMEOUT_SEC, n_replicates=n_replicates, executor=executor)"
]
Expand Down
15 changes: 6 additions & 9 deletions python/powerlift/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,24 +87,21 @@ This can also be run on Azure Container Instances where needed.
```python
# Run experiment (but in ACI).
from powerlift.executors import AzureContainerInstance
store = Store(os.getenv("AZURE_DB_URL"))
azure_tenant_id = os.getenv("AZURE_TENANT_ID")
subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
azure_client_id = os.getenv("AZURE_CLIENT_ID")
azure_client_secret = os.getenv("AZURE_CLIENT_SECRET")
subscription_id = os.getenv("AZURE_SUBSCRIPTION_ID")
resource_group = os.getenv("AZURE_RESOURCE_GROUP")
store = Store(os.getenv("AZURE_DB_URL"))

executor = AzureContainerInstance(
store,
azure_tenant_id,
azure_client_id,
azure_client_secret,
subscription_id,
resource_group,
n_running_containers=5,
num_cores=1,
mem_size_gb=2,
raise_exception=True,
azure_client_id,
azure_client_secret=azure_client_secret,
resource_group=resource_group,
n_running_containers=5
)
benchmark = Benchmark(store, name="SVM vs RF")
benchmark.run(trial_runner, trial_filter, timeout=10, executor=executor)
Expand Down
13 changes: 4 additions & 9 deletions python/powerlift/powerlift/bench/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,15 +230,10 @@ def status(self) -> Optional[pd.DataFrame]:
Returns:
Trial statuses (Optional[pandas.DataFrame]): Experiment's trials' status.
"""
self._store.reset()
while self._store.do:
with self._store:
self._experiment_id = self._store.get_experiment(self._name)
if self._experiment_id is None:
return None

records = list(self._store.iter_status(self._experiment_id))
return pd.DataFrame.from_records(records)
df = self._store.get_status(self._name)
df["meta"] = df["meta"].apply(lambda x: str(x))
df = df.sort_values(by=["task", "method", "meta", "replicate_num"])
return df

def results(self) -> Optional[pd.DataFrame]:
"""Retrieves trial measures of an experiment in long form.
Expand Down
68 changes: 39 additions & 29 deletions python/powerlift/powerlift/bench/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,8 @@ def __enter__(self):
if not self._reset and self._attempts == 0:
raise Exception("Must reset before entering the Store context.")

if 0 < self._attempts:
assert self._session is None
assert self._conn is None
assert self._engine is None

# on first re-attempt, do not sleep
if 2 <= self._attempts:
sleep_time = (
self._wait_secs
* (self._wait_lengthing**self._attempts)
Expand Down Expand Up @@ -729,27 +726,41 @@ def iter_experiment_trials(self, experiment_id: int):
trial = self.from_db_trial(trial_orm)
yield trial

def iter_status(self, experiment_id: int) -> Iterable[Mapping[str, object]]:
# TODO(nopdive): Should this be in the store?
self.check_allowed()
trial_orms = self._session.query(db.Trial).filter_by(
experiment_id=experiment_id
def get_status(self, experiment_name: str):
sql = text(
f"""
SELECT
t.id AS trial_id,
ta.name AS task,
m.name AS method,
t.meta AS meta,
t.replicate_num AS replicate_num,
t.status AS status,
t.errmsg AS errmsg,
t.create_time AS create_time,
t.start_time AS start_time,
t.end_time AS end_time,
t.runner_id AS runner_id
FROM
experiment e
JOIN
trial t on e.id = t.experiment_id
JOIN
task ta ON t.task_id = ta.id
JOIN
method m ON t.method_id = m.id
WHERE
e.name = '{experiment_name}'
"""
)
for trial_orm in trial_orms:
record = {
"trial_id": trial_orm.id,
"replicate_num": trial_orm.replicate_num,
"meta": trial_orm.meta,
"method": trial_orm.method.name,
"task": trial_orm.task.name,
"status": trial_orm.status.name,
"errmsg": trial_orm.errmsg,
"create_time": trial_orm.create_time,
"start_time": trial_orm.start_time,
"end_time": trial_orm.end_time,
"runner_id": trial_orm.runner_id,
}
yield record
self.reset()
while self.do:
with self:
result = self._session.execute(sql)
records = result.all()
columns = result.keys()
df = pd.DataFrame.from_records(records, columns=columns)
return df

def get_results(self, experiment_name: str):
sql = text(
Expand Down Expand Up @@ -784,7 +795,6 @@ def get_results(self, experiment_name: str):
e.name = '{experiment_name}'
"""
)

self.reset()
while self.do:
with self:
Expand Down Expand Up @@ -945,7 +955,6 @@ def _create_task_with_supervised(self, supervised, version):
mimetype=y_mimetype,
embedded=y_bstream.getvalue(),
)

meta_orm = db.Asset(
name=meta_name,
description=f"Metadata for {supervised.name()}",
Expand Down Expand Up @@ -977,6 +986,7 @@ def _create_task_with_supervised(self, supervised, version):

self._session.add(X_orm)
self._session.add(y_orm)
self._session.add(meta_orm)
self._session.add(task_orm)
self._session.flush()

Expand Down Expand Up @@ -1004,7 +1014,6 @@ def _create_task_with_dataframe(self, data, version):
mimetype=outputs_mimetype,
embedded=outputs_bstream.getvalue(),
)

meta_orm = db.Asset(
name=meta_name,
description=f"Metadata for {data.name()}",
Expand Down Expand Up @@ -1036,6 +1045,7 @@ def _create_task_with_dataframe(self, data, version):

self._session.add(inputs_orm)
self._session.add(outputs_orm)
self._session.add(meta_orm)
self._session.add(task_orm)
self._session.flush()

Expand Down Expand Up @@ -1231,8 +1241,8 @@ def populate_with_datasets(

if dataset_iter is None:
dataset_iter = chain(
retrieve_openml_cc18(cache_dir=cache_dir),
retrieve_openml_automl_regression(cache_dir=cache_dir),
retrieve_openml_automl_classification(cache_dir=cache_dir),
)

for dataset in dataset_iter:
Expand Down
29 changes: 16 additions & 13 deletions python/powerlift/powerlift/executors/azure_ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,21 @@ def __init__(
self,
store: Store,
azure_tenant_id: str,
azure_client_id: str,
azure_client_secret: str,
subscription_id: str,
resource_group: str,
azure_client_id: str,
credential=None,
# other images available at:
# https://mcr.microsoft.com/en-us/product/devcontainers/python/tags
# TODO: change default to mcr.microsoft.com/devcontainers/python:latest
image: str = "interpretml/powerlift:0.1.11",
azure_client_secret: str = None,
resource_group: str = "powerlift_rg",
shell_install: str = None,
pip_install: str = None,
n_running_containers: int = 1,
num_cores: int = 1,
mem_size_gb: int = 2,
wheel_filepaths: List[str] = None,
n_running_containers: int = 1,
num_cores: int = 4,
mem_size_gb: int = 16,
# other images available at:
# https://mcr.microsoft.com/en-us/product/devcontainers/python/tags
# TODO: change default to mcr.microsoft.com/devcontainers/python:latest
image: str = "mcr.microsoft.com/devcontainers/python:latest",
docker_db_uri: str = None,
raise_exception: bool = False,
delete_group_container_on_complete: bool = True,
Expand All @@ -43,15 +43,18 @@ def __init__(
Args:
store (Store): Store that houses trials.
azure_tenant_id (str): Azure tentant ID.
subscription_id (str): Azure subscription ID.
azure_client_id (str): Azure client ID.
credential: Azure credential
azure_client_secret (str): Azure client secret.
subscription_id (str): Azure subscription ID.
resource_group (str): Azure resource group.
image (str, optional): Image to execute. Defaults to "interpretml/powerlift:0.0.1".
shell_install (str): apt-get install parameters.
pip_install (str): pip install parameters.
wheel_filepaths (List[str], optional): List of wheel filepaths to install on ACI trial run. Defaults to None.
n_running_containers (int, optional): Max number of containers to run simultaneously. Defaults to 1.
num_cores (int, optional): Number of cores per container. Defaults to 1.
mem_size_gb (int, optional): RAM size in GB per container. Defaults to 2.
wheel_filepaths (List[str], optional): List of wheel filepaths to install on ACI trial run. Defaults to None.
image (str, optional): Image to execute. Defaults to "mcr.microsoft.com/devcontainers/python:latest".
docker_db_uri (str, optional): Database URI for container. Defaults to None.
raise_exception (bool, optional): Raise exception on failure.
delete_group_container_on_complete (bool, optional): Delete group containers after completion. Defaults to True.
Expand Down
4 changes: 2 additions & 2 deletions python/powerlift/powerlift/executors/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class InsecureDocker(LocalMachine):
def __init__(
self,
store: Store,
image: str = "interpretml/powerlift:0.1.11",
image: str = "mcr.microsoft.com/devcontainers/python:latest",
n_running_containers: int = None,
wheel_filepaths: List[str] = None,
docker_db_uri: str = None,
Expand All @@ -56,7 +56,7 @@ def __init__(
Args:
store (Store): Store that houses trials.
image (str, optional): Image to execute in container. Defaults to "interpretml/powerlift:0.0.1".
image (str, optional): Image to execute in container. Defaults to "mcr.microsoft.com/devcontainers/python:latest".
n_running_containers (int, optional): Max number of containers running simultaneously. Defaults to None.
wheel_filepaths (List[str], optional): List of wheel filepaths to install on docker trial run. Defaults to None.
docker_db_uri (str, optional): Database URI for container. Defaults to None.
Expand Down
2 changes: 0 additions & 2 deletions python/powerlift/powerlift/executors/localmachine.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,12 @@ def submit(self, experiment_id, trials: List, timeout=None):
for runner_id in range(n_runners):
if self._pool is None:
try:
debug_fn = trial_run_fn if self._debug_mode else None
res = runner.run_trials(
experiment_id,
runner_id,
self._store.uri,
timeout,
self._raise_exception or self._debug_mode,
debug_fn=debug_fn,
)
self._runner_id_to_result[runner_id] = res
except Exception as e:
Expand Down
5 changes: 3 additions & 2 deletions python/powerlift/powerlift/run_azure/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,18 @@ def run_azure_process(
cmd="apt-get --yes install $shell_install"
eval $cmd
fi
python -m pip install powerlift psycopg2-binary azure-identity azure-mgmt-containerinstance
pip_install=$(psql "$DB_URL" -c "SELECT pip_install FROM Experiment WHERE id='$EXPERIMENT_ID' LIMIT 1;" -t -A)
if [ -n "$pip_install" ]; then
cmd="python -m pip install $pip_install"
cmd="python -m pip install --force-reinstall $pip_install"
eval $cmd
fi
filenames=$(psql "$DB_URL" -c "SELECT name FROM wheel WHERE experiment_id='$EXPERIMENT_ID';" -t -A)
if [ -n "$filenames" ]; then
echo "$filenames" | while IFS= read -r filename; do
echo "Processing filename: $filename"
psql "$DB_URL" -c "COPY (SELECT embedded FROM wheel WHERE experiment_id='$EXPERIMENT_ID' AND name='$filename') TO STDOUT WITH BINARY;" > "$filename"
cmd="python -m pip install $filename"
cmd="python -m pip install --force-reinstall $filename"
eval $cmd
done
fi
Expand Down
6 changes: 3 additions & 3 deletions python/powerlift/tests/powerlift/bench/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,10 @@ def test_scikit_experiment_aci(populated_azure_store):
executor = AzureContainerInstance(
store,
azure_tenant_id,
azure_client_id,
azure_client_secret,
subscription_id,
resource_group,
azure_client_id,
azure_client_secret=azure_client_secret,
resource_group=resource_group,
n_running_containers=5,
num_cores=2,
mem_size_gb=8,
Expand Down

0 comments on commit b024b4f

Please sign in to comment.