Skip to content

Commit

Permalink
added autogluon test and cli support
Browse files Browse the repository at this point in the history
  • Loading branch information
Oufattole committed Aug 21, 2024
1 parent e6cf085 commit 94dfde2
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 227 deletions.
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ meds-tab-describe = "MEDS_tabular_automl.scripts.describe_codes:main"
meds-tab-tabularize-static = "MEDS_tabular_automl.scripts.tabularize_static:main"
meds-tab-tabularize-time-series = "MEDS_tabular_automl.scripts.tabularize_time_series:main"
meds-tab-cache-task = "MEDS_tabular_automl.scripts.cache_task:main"
meds-tab-xgboost = "MEDS_tabular_automl.scripts.launch_xgboost:main"
meds-tab-xgboost = "MEDS_tabular_automl.scripts.launch_model:main"
meds-tab-model = "MEDS_tabular_automl.scripts.launch_model:main"
meds-tab-autogluon = "MEDS_tabular_automl.scripts.launch_autogluon:main"
generate-subsets = "MEDS_tabular_automl.scripts.generate_subsets:main"


Expand Down
2 changes: 2 additions & 0 deletions src/MEDS_tabular_automl/configs/launch_autogluon.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
defaults:
- default
- tabularization: default
- imputer: default
- normalization: default
- override hydra/sweeper: optuna
- override hydra/sweeper/sampler: tpe
- override hydra/launcher: joblib
Expand Down
33 changes: 0 additions & 33 deletions src/MEDS_tabular_automl/configs/launch_sklearnmodel.yaml

This file was deleted.

58 changes: 0 additions & 58 deletions src/MEDS_tabular_automl/configs/launch_xgboost.yaml

This file was deleted.

58 changes: 0 additions & 58 deletions src/MEDS_tabular_automl/scripts/launch_sklearnmodel.py

This file was deleted.

58 changes: 0 additions & 58 deletions src/MEDS_tabular_automl/scripts/launch_xgboost.py

This file was deleted.

10 changes: 8 additions & 2 deletions src/MEDS_tabular_automl/tabular_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,10 @@ def _get_approximate_correlation_per_feature(self, X: sp.csc_matrix, y: np.ndarr

def _set_imputer(self):
"""Sets the imputer for the data."""
if self.cfg.model_params.iterator.imputer.imputer_target:
if (
hasattr(self.cfg.model_params.iterator, "imputer")
and self.cfg.model_params.iterator.imputer.imputer_target
):
imputer = self.cfg.model_params.iterator.imputer.imputer_target
if hasattr(imputer, "partial_fit"):
for i in range(len(self._data_shards)):
Expand All @@ -240,7 +243,10 @@ def _set_imputer(self):

def _set_scaler(self):
"""Sets the scaler for the data."""
if self.cfg.model_params.iterator.normalization.normalizer:
if (
hasattr(self.cfg.model_params.iterator, "normalization")
and self.cfg.model_params.iterator.normalization.normalizer
):
scaler = self.cfg.model_params.iterator.normalization.normalizer
if hasattr(scaler, "partial_fit"):
for i in range(len(self._data_shards)):
Expand Down
42 changes: 25 additions & 17 deletions tests/test_tabularize.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

root = rootutils.setup_root(__file__, dotenv=True, pythonpath=True, cwd=True)

import importlib.util
import json
import os
import subprocess
import tempfile
from io import StringIO
Expand Down Expand Up @@ -370,25 +372,31 @@ def test_tabularize():
output_files = list(output_dir.glob("**/*.pkl"))
assert len(output_files) == 1

# autogluon_config_kwargs = {
# **shared_config,
# "tabularization.min_code_inclusion_count": 1,
# "tabularization.window_sizes": "[30d,365d,full]",
# "model_params.iterator.keep_data_in_memory": False,
# "model_dir": "${output_cohort_dir}/model_online/model_${now:%Y-%m-%d_%H-%M-%S}",
# }
if importlib.util.find_spec("autogluon") is not None:
import autogluon as ag

# with initialize(
# version_base=None, config_path="../src/MEDS_tabular_automl/configs/"
# ): # path to config.yaml
# overrides = [f"{k}={v}" for k, v in sklearnmodel_config_kwargs.items()]
# cfg = compose(config_name="launch_sklearnmodel", overrides=overrides) # config.yaml
from MEDS_tabular_automl.scripts import launch_autogluon

# output_dir = Path(cfg.output_cohort_dir) / "model_online"
autogluon_config_kwargs = {
**shared_config,
"tabularization.min_code_inclusion_count": 1,
"tabularization.window_sizes": "[30d,365d,full]",
"model_params.iterator.keep_data_in_memory": False,
"model_dir": "${output_cohort_dir}/model_online/model_${now:%Y-%m-%d_%H-%M-%S}",
}

# launch_model.main(cfg)
# output_files = list(output_dir.glob("**/*.pkl"))
# assert len(output_files) == 1
with initialize(
version_base=None, config_path="../src/MEDS_tabular_automl/configs/"
): # path to config.yaml
overrides = [f"{k}={v}" for k, v in autogluon_config_kwargs.items()]
cfg = compose(config_name="launch_autogluon", overrides=overrides) # config.yaml

output_dir = Path(cfg.output_cohort_dir) / "model_online"

launch_autogluon.main(cfg)
output_files = list(output_dir.glob("*"))
most_recent_file = max(output_files, key=os.path.getmtime)
ag.tabular.TabularPredictor.load(most_recent_file)


def run_command(script: str, args: list[str], hydra_kwargs: dict[str, str], test_name: str):
Expand Down Expand Up @@ -421,5 +429,5 @@ def test_xgboost_config():
version_base=None, config_path="../src/MEDS_tabular_automl/configs/"
): # path to config.yaml
overrides = [f"{k}={v}" for k, v in xgboost_config_kwargs.items()]
cfg = compose(config_name="launch_xgboost", overrides=overrides) # config.yaml
cfg = compose(config_name="launch_model", overrides=overrides) # config.yaml
assert cfg.tabularization.window_sizes

0 comments on commit 94dfde2

Please sign in to comment.