From 1802a7f1a1abc3385293fd6d3d6537d0521ad14b Mon Sep 17 00:00:00 2001 From: bio-la Date: Thu, 29 Feb 2024 16:23:59 +0100 Subject: [PATCH] fixes to make MultiVI run --- .github/workflows/integration01-ci.yml | 6 ++++-- .../python_scripts/batch_correct_multivi.py | 20 +++++++++++++++---- pyproject.toml | 7 ++++++- tests/integration_1/pipeline.yml | 6 +++--- 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/.github/workflows/integration01-ci.yml b/.github/workflows/integration01-ci.yml index 5e42806d..83871bad 100644 --- a/.github/workflows/integration01-ci.yml +++ b/.github/workflows/integration01-ci.yml @@ -36,11 +36,13 @@ jobs: channel-priority: strict activate-environment: pipeline_env environment-file: pipeline_env.yaml - +# important: this patch is only to test if multivi integration works +# issues are not related to panpipes https://discourse.scverse.org/t/error-when-training-model-on-m3-max-mps/1896/2 +# https://discourse.scverse.org/t/macbook-m1-m2-mps-acceleration-with-scvi/2075/4 - name: Install Panpipes shell: bash -el {0} run: | - pip install -e . + pip install '.[multivipatch]' conda list - name: Conda info diff --git a/panpipes/python_scripts/batch_correct_multivi.py b/panpipes/python_scripts/batch_correct_multivi.py index 6472b118..42d1037c 100644 --- a/panpipes/python_scripts/batch_correct_multivi.py +++ b/panpipes/python_scripts/batch_correct_multivi.py @@ -78,9 +78,12 @@ del mdata if check_for_bool(params["multimodal"]["MultiVI"]["lowmem"]): - L.info("subsetting atac to top 25k HVF") + if 'hvg' in atac.uns.keys(): + L.info("subsetting atac to top HVF") + atac = atac[:, atac.var.highly_variable].copy() + L.info("calculating and subsetting atac to top 25k HVF") sc.pp.highly_variable_genes(atac, n_top_genes=25000) - atac = atac[:, atac.var.highly_variable] + atac = atac[:, atac.var.highly_variable].copy() @@ -125,6 +128,15 @@ L.info("concatenating modalities to comply with multiVI") # adata_paired = ad.concat([rna, atac], join="outer") # adata_paired.var = pd.concat([rna.var,atac.var]) +if rna.is_view: + L.info("RNA is view") + atac = rna.copy() +if atac.is_view: + L.info("ATAC is view") + atac = atac.copy() + +L.info(atac.is_view) + adata_paired = ad.concat([rna.T, atac.T]).T @@ -222,14 +234,14 @@ multivi_training_args={} else: multivi_training_args = {k: v for k, v in params["multimodal"]['MultiVI']['training_args'].items() if v is not None} - +L.info("multivi training args") print(multivi_training_args) if params["multimodal"]['MultiVI']['training_plan'] is None: multivi_training_plan = {} else: multivi_training_plan = {k: v for k, v in params["multimodal"]['MultiVI']['training_plan'].items() if v is not None} - +L.info("multivi training plan") print(multivi_training_plan) mvi.view_anndata_setup() diff --git a/pyproject.toml b/pyproject.toml index 747581c8..002cb865 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dependencies = [ "scikit-misc", "scirpy", "scrublet", - "scvi-tools>=1.1.1", + "scvi-tools", #>=1.1.1 "sqlalchemy", ] @@ -61,6 +61,11 @@ spatial = [ "cell2location", "tangram-sc" ] + +multivipatch = [ + "scvi-tools<=0.20.3", + "requests" +] [project.scripts] panpipes = "panpipes:entry.main" diff --git a/tests/integration_1/pipeline.yml b/tests/integration_1/pipeline.yml index 7111b991..eaaffce0 100644 --- a/tests/integration_1/pipeline.yml +++ b/tests/integration_1/pipeline.yml @@ -211,7 +211,7 @@ multimodal: # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, # integration_by_tissue ... column_continuous: - column_categorical: sample_id + column_categorical: dataset # extra params: totalvi: # this is a minimal set of parameters that will be expected @@ -269,8 +269,8 @@ multimodal: batch_size : 128 #float (default: 0.001) weight_decay : 0.001 - #float (default: 1e-08) - eps : 1e-08 + #float (default: 1.0e-08) + eps : 1.0e-08 #bool (default: True) early_stopping : True #bool (default: True)