From bdf6446bb8740520d615e8ef56c93c0a4505627e Mon Sep 17 00:00:00 2001 From: ktpolanski Date: Thu, 9 Nov 2023 10:20:06 +0000 Subject: [PATCH] store identified QC ranges in uns --- sctk/_pipeline.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sctk/_pipeline.py b/sctk/_pipeline.py index 302f2b3..adee15c 100644 --- a/sctk/_pipeline.py +++ b/sctk/_pipeline.py @@ -376,7 +376,7 @@ def fit_gaussian( columns = ["min", "max", "scale", "side", "min_pass_rate"] ) -def cellwise_qc(adata, metrics=None, cell_qc_key="cell_passed_qc", **kwargs): +def cellwise_qc(adata, metrics=None, cell_qc_key="cell_passed_qc", uns_qc_key="scautoqc_ranges", **kwargs): """ Filter cells in an AnnData object based on quality control metrics. The object is modified in-place. @@ -394,6 +394,7 @@ def cellwise_qc(adata, metrics=None, cell_qc_key="cell_passed_qc", **kwargs): uses a set of default metrics. For defaults and an explanation, please refer to the QC workflow demo notebook. cell_qc_key: Obs column in the object to store the per-cell QC calls in. + uns_qc_key: Uns key to store the determined QC ranges used in filtering. **kwargs: Additional keyword arguments to pass to the :py:func:`fit_gaussian` function. @@ -442,6 +443,7 @@ def cellwise_qc(adata, metrics=None, cell_qc_key="cell_passed_qc", **kwargs): n_obs = adata.n_obs + range_df = pd.DataFrame(columns=["low","high"]) pass_filter = {} for m, v in metric_params.items(): min_x, max_x, scale, side, min_pass_rate = v @@ -481,12 +483,16 @@ def cellwise_qc(adata, metrics=None, cell_qc_key="cell_passed_qc", **kwargs): print( f"{m}: [{x_low_str}, {x_high_str}], {pass_filter[m].sum()}/{n_obs} passed" ) + # stash the identified ranges in the df + range_df.loc[m, "low"] = x_low_str + range_df.loc[m, "high"] = x_high_str all_passed = np.ones(n_obs).astype(bool) for m, k_pass in pass_filter.items(): all_passed = all_passed & k_pass print(f"{all_passed.sum()}/{n_obs} pass") adata.obs[cell_qc_key] = all_passed + adata.uns[uns_qc_key] = range_df if adata.obs[cell_qc_key].sum() == 0: print( "No cells passed. Performing simple filtering on counts, genes and mito%"