Skip to content

Commit

Permalink
Merge pull request #21 from mmcdermott/scripts/cache_task
Browse files Browse the repository at this point in the history
Added docstrings for scripts/cache_task.py
  • Loading branch information
aleksiakolo authored Jun 13, 2024
2 parents 4300b25 + 0b34538 commit e95e5a0
Showing 1 changed file with 22 additions and 6 deletions.
28 changes: 22 additions & 6 deletions src/MEDS_tabular_automl/scripts/cache_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,19 @@
]


def generate_row_cached_matrix(matrix, label_df):
"""Generates row-cached matrix for a given matrix and label_df."""
def generate_row_cached_matrix(matrix: sp.coo_array, label_df: pl.LazyFrame) -> sp.coo_array:
"""Generates row-cached matrix for a given matrix and label DataFrame.
Args:
matrix: The input sparse matrix.
label_df: A LazyFrame with an 'event_id' column indicating valid row indices in the matrix.
Returns:
A COOrdinate formatted sparse matrix containing only the rows specified by label_df's event_ids.
Raises:
ValueError: If the maximum event_id in label_df exceeds the number of rows in the matrix.
"""
label_len = label_df.select(pl.col("event_id").max()).collect().item()
if matrix.shape[0] <= label_len:
raise ValueError(
Expand All @@ -51,10 +62,15 @@ def generate_row_cached_matrix(matrix, label_df):


@hydra.main(version_base=None, config_path=str(config_yaml.parent.resolve()), config_name=config_yaml.stem)
def main(
cfg: DictConfig,
):
"""Performs row splicing of tabularized data for a specific task."""
def main(cfg: DictConfig):
"""Performs row splicing of tabularized data for a specific task based on configuration.
Uses Hydra to manage configurations and logging. The function processes data files based on specified
task configurations, loading matrices, applying transformations, and writing results.
Args:
cfg: The configuration for processing, loaded from a YAML file.
"""
iter_wrapper = load_tqdm(cfg.tqdm)
if not cfg.loguru_init:
hydra_loguru_init()
Expand Down

0 comments on commit e95e5a0

Please sign in to comment.