Skip to content

Commit

Permalink
Added docstrings for src/MEDS_tabular_automl/utils.py
Browse files Browse the repository at this point in the history
  • Loading branch information
aleksiakolo committed Jun 12, 2024
1 parent 8d3c5f9 commit 97b6160
Showing 1 changed file with 42 additions and 3 deletions.
45 changes: 42 additions & 3 deletions src/MEDS_tabular_automl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,15 @@ def hydra_loguru_init() -> None:
logger.add(os.path.join(hydra_path, "main.log"))


def load_tqdm(use_tqdm):
def load_tqdm(use_tqdm: bool):
"""Conditionally loads and returns tqdm progress bar handler or a no-operation function.
Args:
use_tqdm: Flag indicating whether to use tqdm progress bar.
Returns:
A function that either encapsulates tqdm or simply returns the input it is given.
"""
if use_tqdm:
from tqdm import tqdm

Expand All @@ -61,13 +69,36 @@ def noop(x, **kwargs):


def parse_static_feature_column(c: str) -> tuple[str, str, str, str]:
"""Parses a flat feature column format into component parts.
Args:
c: The column string in 'category/subcategory/feature' format.
Returns:
A tuple containing separate strings of the feature column format.
Raises:
ValueError: If the column string format is incorrect.
"""
parts = c.split("/")
if len(parts) < 3:
raise ValueError(f"Column {c} is not a valid flat feature column!")
return ("/".join(parts[:-2]), parts[-2], parts[-1])


def array_to_sparse_matrix(array: np.ndarray, shape: tuple[int, int]):
def array_to_sparse_matrix(array: np.ndarray, shape: tuple[int, int]) -> coo_array:
"""Converts a numpy array representation into a sparse matrix.
Args:
array: The array containing data, rows, and columns.
shape: The shape of the resulting sparse matrix.
Returns:
The formatted sparse matrix.
Raises:
AssertionError: If the input array's first dimension is not 3.
"""
assert array.shape[0] == 3
data, row, col = array
return coo_array((data, (row, col)), shape=shape)
Expand Down Expand Up @@ -112,7 +143,15 @@ def get_min_dtype(array: np.ndarray) -> np.dtype:
return array.dtype


def sparse_matrix_to_array(coo_matrix: coo_array):
def sparse_matrix_to_array(coo_matrix: coo_array) -> tuple[np.ndarray, tuple[int, int]]:
"""Converts a sparse matrix to a numpy array format with shape information.
Args:
coo_matrix: The sparse matrix to convert.
Returns:
A tuple of a numpy array ([data, row, col]) and the shape of the original matrix.
"""
data, row, col = coo_matrix.data, coo_matrix.row, coo_matrix.col
# Remove invalid indices
valid_indices = (data == 0) | np.isnan(data)
Expand Down

0 comments on commit 97b6160

Please sign in to comment.