mmcdermott · Oufattole · Sep 9, 2024 · Sep 7, 2024 · Sep 8, 2024 · Sep 9, 2024
diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml
@@ -36,7 +36,7 @@ jobs:
     runs-on: ubuntu-latest
     environment:
       name: pypi
-      url: https://pypi.org/p/<package-name> # Replace <package-name> with your PyPI project name
+      url: https://pypi.org/p/meds-tab # Replace <package-name> with your PyPI project name
     permissions:
       id-token: write # IMPORTANT: mandatory for trusted publishing
 
@@ -91,27 +91,3 @@ jobs:
           gh release upload
           '${{ github.ref_name }}' dist/**
           --repo '${{ github.repository }}'
-
-  publish-to-testpypi:
-    name: Publish Python 🐍 distribution 📦 to TestPyPI
-    needs:
-      - build
-    runs-on: ubuntu-latest
-
-    environment:
-      name: testpypi
-      url: https://test.pypi.org/p/<package-name>
-
-    permissions:
-      id-token: write # IMPORTANT: mandatory for trusted publishing
-
-    steps:
-      - name: Download all the dists
-        uses: actions/download-artifact@v3
-        with:
-          name: python-package-distributions
-          path: dist/
-      - name: Publish distribution 📦 to TestPyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
-        with:
-          repository-url: https://test.pypi.org/legacy/
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,8 @@ dependencies = [
   "scikit-learn", "hydra-optuna-sweeper", "hydra-joblib-launcher", "ml-mixins", "meds==0.3.3", "meds-transforms==0.0.7",
 ]
 
+[tool.setuptools_scm]
+
 [project.scripts]
 meds-tab-describe = "MEDS_tabular_automl.scripts.describe_codes:main"
 meds-tab-tabularize-static = "MEDS_tabular_automl.scripts.tabularize_static:main"

diff --git a/src/MEDS_tabular_automl/configs/describe_codes.yaml b/src/MEDS_tabular_automl/configs/describe_codes.yaml
@@ -2,7 +2,7 @@ defaults:
   - default
   - _self_
 
-input_dir: ${output_cohort_dir}/data
+input_dir: ${MEDS_cohort_dir}/data
 # Where to store output code frequency data
 output_filepath: ${output_cohort_dir}/metadata/codes.parquet
 

diff --git a/src/MEDS_tabular_automl/configs/launch_autogluon.yaml b/src/MEDS_tabular_automl/configs/launch_autogluon.yaml
@@ -3,26 +3,25 @@ defaults:
   - tabularization: default
   - imputer: default
   - normalization: default
+  - model_launcher: autogluon
   - _self_
 
-task_name: task
+task_name: ???
 
 # Task cached data dir
 input_dir: ${output_cohort_dir}/${task_name}/task_cache
 # Directory with task labels
 input_label_dir: ${output_cohort_dir}/${task_name}/labels/
 # Where to output the model and cached data
-model_dir: ${output_cohort_dir}/autogluon/autogluon_${now:%Y-%m-%d_%H-%M-%S}
-model_log_dir: ${model_dir}/.logs/
-output_filepath: ${model_dir}
-
-# Model parameters
-model_params:
-  iterator:
-    keep_data_in_memory: True
-    binarize_task: True
-
-log_dir: ${model_dir}/.logs/
-log_filepath: ${log_dir}/log.txt
+output_dir: ???
 
 name: launch_autogluon
+
+hydra:
+  verbose: False
+  job:
+    name: MEDS_TAB_${name}_${worker}_${now:%Y-%m-%d_%H-%M-%S}
+  sweep:
+    dir: ${model_log_dir}
+  run:
+    dir: ${model_log_dir}
diff --git a/src/MEDS_tabular_automl/configs/launch_model.yaml b/src/MEDS_tabular_automl/configs/launch_model.yaml
@@ -2,30 +2,22 @@ defaults:
   - _self_
   - default
   - tabularization: default
-  - model: xgboost # This can be changed to sgd_classifier or any other model
-  - imputer: default
-  - normalization: default
+  - model_launcher: xgboost
   - override hydra/callbacks: evaluation_callback
   - override hydra/sweeper: optuna
   - override hydra/sweeper/sampler: tpe
   - override hydra/launcher: joblib
 
-task_name: task
+task_name: ???
 
 # Task cached data dir
 input_dir: ${output_cohort_dir}/${task_name}/task_cache
 # Directory with task labels
 input_label_dir: ${output_cohort_dir}/${task_name}/labels/
 # Where to output the model and cached data
-model_saving:
-  model_dir: ${output_cohort_dir}/model/model_${now:%Y-%m-%d_%H-%M-%S}
-  model_file_stem: model
-  model_file_extension: .json
-  delete_below_top_k: -1
-model_logging:
-  model_log_dir: ${model_saving.model_dir}/.logs/
-  performance_log_stem: performance
-  config_log_stem: config
+output_dir: ???
+
+delete_below_top_k: -1
 
 name: launch_model
 

diff --git a/src/MEDS_tabular_automl/configs/model/knn_classifier.yaml b/src/MEDS_tabular_automl/configs/model/knn_classifier.yaml
diff --git a/src/MEDS_tabular_automl/configs/model/logistic_regression.yaml b/src/MEDS_tabular_automl/configs/model/logistic_regression.yaml
diff --git a/src/MEDS_tabular_automl/configs/model/random_forest_classifier.yaml b/src/MEDS_tabular_automl/configs/model/random_forest_classifier.yaml
diff --git a/src/MEDS_tabular_automl/configs/model/sgd_classifier.yaml b/src/MEDS_tabular_automl/configs/model/sgd_classifier.yaml
diff --git a/src/MEDS_tabular_automl/configs/model/xgboost.yaml b/src/MEDS_tabular_automl/configs/model/xgboost.yaml
diff --git a/src/MEDS_tabular_automl/configs/model_launcher/autogluon.yaml b/src/MEDS_tabular_automl/configs/model_launcher/autogluon.yaml
@@ -0,0 +1,3 @@
+defaults:
+  - default
+  - _self_
diff --git a/src/MEDS_tabular_automl/configs/model_launcher/data_loading_params/default.yaml b/src/MEDS_tabular_automl/configs/model_launcher/data_loading_params/default.yaml
@@ -0,0 +1,2 @@
+keep_data_in_memory: True
+binarize_task: True
diff --git a/src/MEDS_tabular_automl/configs/model_launcher/data_processing_params/default.yaml b/src/MEDS_tabular_automl/configs/model_launcher/data_processing_params/default.yaml
@@ -0,0 +1,3 @@
+defaults:
+  - imputer: default
+  - normalization: default
diff --git a/...bular_automl/configs/imputer/default.yaml → ...ta_processing_params/imputer/default.yaml b/...bular_automl/configs/imputer/default.yaml → ...ta_processing_params/imputer/default.yaml
diff --git a/..._automl/configs/imputer/mean_imputer.yaml → ...ocessing_params/imputer/mean_imputer.yaml b/..._automl/configs/imputer/mean_imputer.yaml → ...ocessing_params/imputer/mean_imputer.yaml
diff --git a/...utoml/configs/imputer/median_imputer.yaml → ...essing_params/imputer/median_imputer.yaml b/...utoml/configs/imputer/median_imputer.yaml → ...essing_params/imputer/median_imputer.yaml
diff --git a/..._automl/configs/imputer/mode_imputer.yaml → ...ocessing_params/imputer/mode_imputer.yaml b/..._automl/configs/imputer/mode_imputer.yaml → ...ocessing_params/imputer/mode_imputer.yaml
diff --git a/...automl/configs/normalization/default.yaml → ...cessing_params/normalization/default.yaml b/...automl/configs/normalization/default.yaml → ...cessing_params/normalization/default.yaml
diff --git a/...configs/normalization/max_abs_scaler.yaml → ..._params/normalization/max_abs_scaler.yaml b/...configs/normalization/max_abs_scaler.yaml → ..._params/normalization/max_abs_scaler.yaml
diff --git a/...onfigs/normalization/standard_scaler.yaml → ...params/normalization/standard_scaler.yaml b/...onfigs/normalization/standard_scaler.yaml → ...params/normalization/standard_scaler.yaml
diff --git a/src/MEDS_tabular_automl/configs/model_launcher/default.yaml b/src/MEDS_tabular_automl/configs/model_launcher/default.yaml
@@ -0,0 +1,7 @@
+defaults:
+  - path: default
+  - data_processing_params: default
+  - data_loading_params: default
+  - _self_
+
+tabularization: ${tabularization}
diff --git a/src/MEDS_tabular_automl/configs/model_launcher/hydra/sweeper/default.yaml b/src/MEDS_tabular_automl/configs/model_launcher/hydra/sweeper/default.yaml
@@ -0,0 +1,5 @@
+hydra:
+  sweeper:
+    direction: maximize
+    n_trials: 250
+    n_jobs: 25