diff --git a/README.md b/README.md index 139b7811..a684ba1e 100755 --- a/README.md +++ b/README.md @@ -77,6 +77,10 @@ Release Notes * `run_magic_imputation` now has a boolean parameter `sparse` to control output sparsity * **bugfix**: `run_local_variability` for dense expression arrays now runs much faster and more accurate + ### Version 1.3.5rc + * `run_magic_imputation` now has a boolean parameter `sparse` to control output sparsity + +>>>>>>> 4d0f0314270d4c8463fa8fd69a217ac1a6491f6a ### Version 1.3.4 * avoid devision by zero in `select_branch_cells` for very small datasets * make branch selection robust against NaNs diff --git a/src/palantir/utils.py b/src/palantir/utils.py index 1721375c..ede2a842 100644 --- a/src/palantir/utils.py +++ b/src/palantir/utils.py @@ -588,6 +588,8 @@ def run_magic_imputation( expression_key: str = None, imputation_key: str = "MAGIC_imputed_data", n_jobs: int = -1, + sparse: bool = True, + clip_threshold: float = 1e-2, ) -> Union[pd.DataFrame, None, csr_matrix]: """ Run MAGIC imputation on the data. @@ -611,6 +613,10 @@ def run_magic_imputation( Key to store the imputed data in layers of data if it is a sc.AnnData object. Default is 'MAGIC_imputed_data'. n_jobs : int, optional Number of cores to use for parallel processing. If -1, all available cores are used. Default is -1. + sparse : bool, optional + If True, sets values below `clip_threshold` to 0 to return a sparse matrix. If False, return a dense matrix. Default is True. + clip_threshold: float, optional + Threshold value for setting values to 0 when returning a sparse matrix. Default is 1e-2. Unused if `sparse` is False. Returns ------- @@ -656,20 +662,31 @@ def run_magic_imputation( # Stack the results together if issparse(X): - imputed_data = hstack(res).todense() + imputed_data = hstack(res) else: imputed_data = np.hstack(res) - # Set small values to zero - imputed_data[imputed_data < 1e-2] = 0 + # Set small values to zero if returning sparse matrix + if sparse: + if issparse(X): + imputed_data.data[imputed_data.data < clip_threshold] = 0 + imputed_data.eliminate_zeros() + else: + imputed_data = np.where(imputed_data < clip_threshold, 0, imputed_data) + imputed_data = csr_matrix(imputed_data) + else: + if issparse(X): + imputed_data = imputed_data.todense() # Clean up gc.collect() if isinstance(data, sc.AnnData): - data.layers[imputation_key] = np.asarray(imputed_data) + data.layers[imputation_key] = imputed_data if isinstance(data, pd.DataFrame): + if issparse(imputed_data): + imputed_data = imputed_data.toarray() imputed_data = pd.DataFrame( imputed_data, index=data.index, columns=data.columns ) diff --git a/src/palantir/version.py b/src/palantir/version.py index bc369be7..d91680f7 100644 --- a/src/palantir/version.py +++ b/src/palantir/version.py @@ -1,3 +1,3 @@ -__version__ = "1.3.4" +__version__ = "1.3.5rc" __author__ = "Palantir development team" __author_email__ = "manu.talanki@gmail.com" diff --git a/tests/utils_run_magic_imputation.py b/tests/utils_run_magic_imputation.py index 0f18eb1b..dbbd0c5f 100644 --- a/tests/utils_run_magic_imputation.py +++ b/tests/utils_run_magic_imputation.py @@ -16,6 +16,8 @@ def mock_dm_res(): def test_run_magic_imputation_ndarray(mock_dm_res): data = np.random.rand(50, 20) result = run_magic_imputation(data, dm_res=mock_dm_res) + assert isinstance(result, csr_matrix) + result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False) assert isinstance(result, np.ndarray) @@ -30,6 +32,8 @@ def test_run_magic_imputation_dataframe(mock_dm_res): def test_run_magic_imputation_csr(mock_dm_res): data = csr_matrix(np.random.rand(50, 20)) result = run_magic_imputation(data, dm_res=mock_dm_res) + assert isinstance(result, csr_matrix) + result = run_magic_imputation(data, dm_res=mock_dm_res, sparse=False) assert isinstance(result, np.ndarray) @@ -39,7 +43,7 @@ def test_run_magic_imputation_anndata(): data.obsp["DM_Similarity"] = np.random.rand(50, 50) result = run_magic_imputation(data) assert "MAGIC_imputed_data" in data.layers - assert isinstance(result, np.ndarray) + assert isinstance(result, csr_matrix) # Test with AnnData and custom keys diff --git a/tests/util_run_pca.py b/tests/utils_run_pca.py similarity index 100% rename from tests/util_run_pca.py rename to tests/utils_run_pca.py