From f0ee7b727ba6538a3480d54b5a47adae57fceff9 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Fri, 8 Nov 2024 17:52:35 +0100 Subject: [PATCH] Fix reporting (#914) * fix extract component * add to par * fix par * add more checks * simplify * use pd function * fix function * fix function * update changelog --- CHANGELOG.md | 4 ++++ src/utils/extract_uns_metadata/script.py | 11 ++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 276eb63a8..3213323e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,10 @@ - Update `dataset_id` for `tenx_visium`, `zenodo_spatial`, `zenodo_spatial_slidetags` datasets and use `mouse_brain_coronal` as a test resource in the `spatially_variable_genes` task (PR #908). +## Bug fixes + +- Fix extracting metadata from anndata files in the `extract_metadata` component (PR #914). + # openproblems v2.0.0 A major update to the OpenProblems framework, switching from a Python-based framework to a Viash + Nextflow-based framework. This update features the same concepts as the previous version, but with a new implementation that is more flexible, scalable, and maintainable. diff --git a/src/utils/extract_uns_metadata/script.py b/src/utils/extract_uns_metadata/script.py index 5d759b60a..ac5e93f13 100644 --- a/src/utils/extract_uns_metadata/script.py +++ b/src/utils/extract_uns_metadata/script.py @@ -11,6 +11,7 @@ 'input': 'resources_test/common/pancreas/dataset.h5ad', 'schema': 'src/datasets/api/file_raw.yaml', 'output': 'output/meta.yaml', + 'uns_length_cutoff': 100 } ## VIASH END @@ -36,13 +37,13 @@ ## Helper functions for extracting the dataset metadata in uns ## #################################################################################################### def is_atomic(obj): - return isinstance(obj, str) or isinstance(obj, int) or isinstance(obj, bool) or isinstance(obj, float) + return pd.api.types.is_scalar(obj) def to_atomic(obj): - if isinstance(obj, np.float64): - return float(obj) - elif isinstance(obj, np.int64): + if isinstance(obj, (np.int32,np.int64)): return int(obj) + elif isinstance(obj, (np.float32,np.float64)): + return float(obj) elif isinstance(obj, np.bool_): return bool(obj) elif isinstance(obj, np.str_): @@ -50,7 +51,7 @@ def to_atomic(obj): return obj def is_list_of_atomics(obj): - if not isinstance(obj, (list,pd.core.series.Series,np.ndarray)): + if not isinstance(obj, (list, pd.core.series.Series, np.ndarray)): return False return all(is_atomic(elem) for elem in obj)