From 01f94de3cc5432b77f0c3896134cd74101acb5fb Mon Sep 17 00:00:00 2001 From: bio-la Date: Wed, 28 Feb 2024 10:48:16 +0100 Subject: [PATCH 01/15] created pipeline for test --- tests/integration_1/pipeline.yml | 390 +++++++++++++++++++++++++++++++ 1 file changed, 390 insertions(+) create mode 100644 tests/integration_1/pipeline.yml diff --git a/tests/integration_1/pipeline.yml b/tests/integration_1/pipeline.yml new file mode 100644 index 00000000..0f5c7407 --- /dev/null +++ b/tests/integration_1/pipeline.yml @@ -0,0 +1,390 @@ +# Pipeline pipeline_integration.py configuration file +# ============================================== + +# compute resource options +# ------------------------ +resources: + # Number of threads used for parallel jobs + # this must be enough memory to load your mudata and do computationally intensive tasks + threads_high: 1 + # this must be enough memory to load your mudata and do computationally light tasks + threads_medium: 1 + # this must be enough memory to load text files and do plotting, requires much less memory than the other two + threads_low: 1 + # if you access to a gpu-specific queue, how many gpu threads to request, make sure to edit the queues section below, + # so that panpipes can find your gpu queue + threads_gpu: 2 +# path to conda env, leave blank if running native or your cluster automatically inherits the login node environment +condaenv: /Users/fabiola.curion/Documents/devel/miniconda3/envs/pipeline_env + +# allows for tweaking which queues jobs get submitted to, +# in case there is a special queue for long jobs or you have access to a gpu-specific queue +# the default queue should be specified in your .cgat.yml file +# leave blank if you do not want to use the alternative queues +queues: + long: + gpu: + +# Start +# -------------------------- +# either one that exists already with +sample_prefix: teaseq +#this is what comes out of the filtering/preprocessing +preprocessed_obj: teaseq.h5mu +# contains layers: raw_counts, logged_counts, and has scaled or logged counts in X + + +#-------------------------- +# Batch correction +# ------------------------- +# unimodal: correct each modality independently +rna: + # True or false depending on whether you want to run batch correction + run: True + # what method(s) to use to run batch correction, you can specify multiple + # choices: harmony,bbknn,scanorama,scvi (comma-seprated string, no spaces) + tools: harmony,scvi,bbknn + # this is the column you want to batch correct on. if you specify a comma separated list, + # they will be all used simultaneosly. + # Specifically all columns specified will be merged into one 'batch' columns. + # if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column: dataset + #----------------------------- + # Harmony args + #----------------------------- + harmony: + # sigma value, used by Harmony + sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 + # number of pcs, used by Harmony + npcs: 30 + #---------------------------- + # BBKNN args # https://bbknn.readthedocs.io/en/latest/ + #----------------------------- + bbknn: + neighbors_within_batch: 20 + #----------------------------- + # SCVI args + #----------------------------- + scvi: + exclude_mt_genes: True + mt_column: mt + model_args: + n_layers: + n_latent: + gene_likelihood: zinb + training_args: + max_epochs: 400 + train_size: 0.9 + early_stopping: True + training_plan: + lr: 0.001 + n_epochs_kl_warmup: 400 + reduce_lr_on_plateau: True + lr_scheduler_metric: + lr_patience: 8 + lr_factor: 0.1 + #---------------------------- + # find neighbour parameters + #----------------------------- + # to reuse these params, (for example for WNN) please use anchors (&) and scalars (*) in the relevant place + # i.e. &rna_neighbors will be called by *rna_neighbors where referenced + neighbors: &rna_neighbors + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy + +#-------------------------- +prot: + # True or false depending on whether you want to run batch correction + run: True + # what method(s) to use to run batch correction, you can specify multiple + # choices: harmony,bbknn,combat + tools: harmony + # this is the column you want to batch correct on. if you specify a comma separated list (no spaces), + # they will be all used simultaneosly. if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column: orig.ident + #---------------------------- + # Harmony args + #----------------------------- + harmony: + # sigma value, used by Harmony + sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 + # number of pcs, used by Harmony + npcs: 30 + #---------------------------- + # BBKNN args # https://bbknn.readthedocs.io/en/latest/ + #----------------------------- + bbknn: + neighbors_within_batch: 20 + #----------------------------› + # find neighbour parameters + #----------------------------- + neighbors: &prot_neighbors + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy +#-------------------------- +atac: + # True or false depending on whether you want to run batch correction + run: True + # which dimensionality reduction to expect, LSI or PCA + dimred: LSI + # what method(s) to use to run batch correction, you can specify multiple + # (comma-seprated string, no spaces) + # choices: harmony,bbknn,combat + tools: harmony,bbknn + # this is the column you want to batch correct on. if you specify a comma separated list, + # they will be all used simultaneosly. if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column: dataset + #---------------------------- + # Harmony args + #----------------------------- + harmony: + # sigma value, used by Harmony + sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 + # number of pcs, used by Harmony + npcs: 30 + #---------------------------- + # BBKNN args # https://bbknn.readthedocs.io/en/latest/ + #----------------------------- + bbknn: + neighbors_within_batch: + #---------------------------- + # find neighbour parameters + #----------------------------- + neighbors: &atac_neighbors + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy +#---------------------------------------------- +# multimodal integration +# remember to specify knn graph params in the section "neighbors" +#---------------------------------------------- +multimodal: + # True or false depending on whether you want to run batch correction + run: True + # what method(s) to use to run batch correction, you can specify multiple + # choices: totalvi, mofa, MultiVI, WNN + # list e.g. below + tools: + - WNN + - totalvi + + # this is the column you want to batch correct on. if you specify a comma separated list, + # they will be all used simultaneosly. if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column_categorical: sample_id + # extra params: + totalvi: + # this is a minimal set of parameters that will be expected + # you can add any other param from the tutorials and they will + # be parsed alongside the others + + # totalvi will run on rna and prot + modalities: rna,prot + exclude_mt_genes: True + mt_column: mt + # to filter outliers manually create a column called adt_outliers in mdata['prot'].obs + filter_by_hvg: True + filter_prot_outliers: False + model_args: + latent_distribution: "normal" + training_args: + max_epochs: 100 + train_size: 0.9 + early_stopping: True + training_plan: None + MultiVI: + # this is a minimal set of parameters that will be expected + # you can add any other param from the tutorials and they will + # be parsed alongside the others + # leave arguments blank for default + lowmem: True + # Set lowmem to True will subset the atac to the top 25k HVF. + # This is to deal with concatenation of atac,rna on large datasets which at the moment is suboptimally required by scvitools. + # >100GB of RAM are required to concatenate atac,rna with 15k cells and 120k total features (union rna,atac) + model_args: + # (default: None) + n_hidden : + # (default: None) + n_latent : + #(bool,default: True) + region_factors : True + #{‘normal’, ‘ln’} (default: 'normal') + latent_distribution : 'normal' + #(bool,default: False) + deeply_inject_covariates : False + #(bool, default: False) + fully_paired : False + training_args: + #(default: 500) + max_epochs : 500 + #float (default: 0.0001) + lr : 0.0001 + #leave blanck for default str | int | bool | None (default: None) + use_gpu : + # float (default: 0.9) + train_size : 0.9 + # leave blanck for default, float | None (default: None) + validation_size : + # int (default: 128) + batch_size : 128 + #float (default: 0.001) + weight_decay : 0.001 + #float (default: 1e-08) + eps : 1e-08 + #bool (default: True) + early_stopping : True + #bool (default: True) + save_best : True + #leave blanck for default int | None (default: None) + check_val_every_n_epoch : + #leave blanck for default int | None (default: None) + n_steps_kl_warmup : + # int | None (default: 50) + n_epochs_kl_warmup : 50 + #bool (default: True) + adversarial_mixing : True + #leave blanck for default dict | None (default: None) + training_plan : + mofa: + # this is a minimal set of parameters that will be expected + # you can add any other param from the tutorials and they will + # be parsed alongside the others + # (comma-separated string, no spaces) + modalities: + filter_by_hvg: True + n_factors: 10 + n_iterations: 1000 + #pick one among fast, medium, slow + convergence_mode: fast + save_parameters: False + #if save_parameters True, set the following, otherwise leave blank + outfile: + WNN: + # muon implementation of WNN + modalities: rna,prot,atac + # run wnn on batch corrected unimodal data, set each of the modalities you want to use to calc WNN to ONE method. + # leave to None and it will default to de novo calculation of neighbours on non corrected data for that modality using specified params + batch_corrected: + # options are: "bbknn", "scVI", "harmony", "scanorama" + rna: None + # options are "harmony", "bbknn" + prot: None + # options are "harmony" + atac: None + # please use anchors (&) and scalars (*) in the relevant place + # i.e. &rna_neighbors will be called by *rna_neighbors where referenced + knn: + rna: *rna_neighbors + prot: *prot_neighbors + atac: *atac_neighbors + #WNN has its own neighbors search, specify here + n_neighbors: #leave blank and it will default to aritmetic mean across modalities neighbors + n_bandwidth_neighbors: 20 + n_multineighbors: 200 + metric: 'euclidean' + low_memory: True + + ### + # neighbours knn calculation for multimodal analysis. + ### + neighbors: + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy + + + +#----------------------------- +# Plot +#----------------------------- +plotqc: + # grouping var must be a categorical varible, + # (comma-seprated strings, no spaces) + # umaps comparing the integration (one plot per value in the group) + # for each batch correction column plus any extras in grouping var + grouping_var: dataset,sample_id + # what other metrics do you want to plot on each modalities embedding, (one plot per group) + # use mod:variable notation, + # any metrics that you want to plot on all modality umaps go under "all" + # these can be categorical or numeric + all: rep:receptor_subtype + rna: rna:total_counts + prot: prot:total_counts + atac: + multimodal: rna:total_counts + # if you want to add any additional plots, just remove the log file logs/plot_batch_corrected_umaps.log + # and run panpipes integration make plot_umaps + +# ---------------- +# Make final object +# ---------------- +# Final choices: Leave blank until you have reviewed the results from running +# panpipes integration make full +# This step will produce a mudata object with one layer per modality with +# one correction per modality and one multimodal layer. +# Choose the integration results you want to merge in the final object +# For unimodal integration: to pick the uncorrected version use "no_correction" +# then run +# panpipes integration make merge_integration +final_obj: + rna: + include: True + bc_choice: no_correction + prot: + include: True + bc_choice: harmony + atac: + include: False + bc_choice: bbknn + multimodal: + include: True + bc_choice: WNN + From edc853caf29f6c372e70f1f5dd14ca0b49f8a515 Mon Sep 17 00:00:00 2001 From: bio-la Date: Wed, 28 Feb 2024 10:50:47 +0100 Subject: [PATCH 02/15] multiVI, atac test --- .github/workflows/integration01-ci.yml | 98 ++++++++++++++++++++++++++ tests/integration_1/pipeline.yml | 9 ++- 2 files changed, 102 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/integration01-ci.yml diff --git a/.github/workflows/integration01-ci.yml b/.github/workflows/integration01-ci.yml new file mode 100644 index 00000000..5f3f8c57 --- /dev/null +++ b/.github/workflows/integration01-ci.yml @@ -0,0 +1,98 @@ +name: Run tutorials (integration) + +on: + push: + branches: + - main + pull_request: + branches: + - main + +env: + debug: 'true' + +jobs: + integration: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] # , "macos-latest", "windows-latest" + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + + - name: File tree + if: env.debug == 'true' + run: tree + + - uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-version: latest + auto-activate-base: true + auto-update-conda: true + channels: conda-forge + channel-priority: strict + activate-environment: pipeline_env + environment-file: pipeline_env.yaml + + - name: Install Panpipes + shell: bash -el {0} + run: | + pip install -e . + conda list + + - name: Conda info + if: env.debug == 'true' + shell: bash -el {0} + run: conda info + + - name: Conda list + if: env.debug == 'true' + shell: pwsh + run: conda list + + # Note: all three files are renamed during the download to trim the "subsample_" prefix + - name: Preparing the data + run: | + mkdir -p teaseq/integration && cd teaseq/integration + curl -L -o teaseq.h5mu https://figshare.com/ndownloader/files/44471927 + + # Note: we run the following to test that the commands works + # However, the following task will replacing the file anyway + - name: Preparing the configuration file + shell: bash -el {0} + run: | + cd teaseq/integration + panpipes integration config + + - name: Edit the submission file + run: | + cd teaseq/integration + curl -o pipeline.yml https://github.com/DendrouLab/panpipes/blob/fc_cipy/tests/integration_1/pipeline.yml + + - name: Replace template contents in configuration file + run: | + cd teaseq/integration + sed -i 's+/Users/fabiola.curion/Documents/devel/miniconda3/envs/pipeline_env+pipeline_env+g' pipeline.yml + + - name: File tree + if: env.debug == 'true' + run: tree teaseq + + - name: Review pipeline tasks + shell: bash -el {0} + run: | + cd teaseq/integration + panpipes integration show full --local + + - name: Run pipeline tasks + shell: bash -el {0} + run: | + cd teaseq/integration + panpipes integration make full --local + + - name: File tree + if: env.debug == 'true' + run: tree teaseq diff --git a/tests/integration_1/pipeline.yml b/tests/integration_1/pipeline.yml index 0f5c7407..e84655fb 100644 --- a/tests/integration_1/pipeline.yml +++ b/tests/integration_1/pipeline.yml @@ -40,7 +40,7 @@ preprocessed_obj: teaseq.h5mu # unimodal: correct each modality independently rna: # True or false depending on whether you want to run batch correction - run: True + run: False # what method(s) to use to run batch correction, you can specify multiple # choices: harmony,bbknn,scanorama,scvi (comma-seprated string, no spaces) tools: harmony,scvi,bbknn @@ -109,7 +109,7 @@ rna: #-------------------------- prot: # True or false depending on whether you want to run batch correction - run: True + run: False # what method(s) to use to run batch correction, you can specify multiple # choices: harmony,bbknn,combat tools: harmony @@ -177,7 +177,7 @@ atac: # BBKNN args # https://bbknn.readthedocs.io/en/latest/ #----------------------------- bbknn: - neighbors_within_batch: + neighbors_within_batch: 30 #---------------------------- # find neighbour parameters #----------------------------- @@ -204,8 +204,7 @@ multimodal: # choices: totalvi, mofa, MultiVI, WNN # list e.g. below tools: - - WNN - - totalvi + - MultiVI # this is the column you want to batch correct on. if you specify a comma separated list, # they will be all used simultaneosly. if you want to test correction for one at a time, From 8ce5bee80e38f2972041eca8a70d0716635068dc Mon Sep 17 00:00:00 2001 From: bio-la Date: Wed, 28 Feb 2024 10:52:03 +0100 Subject: [PATCH 03/15] change title --- .github/workflows/integration01-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration01-ci.yml b/.github/workflows/integration01-ci.yml index 5f3f8c57..73e6c0f8 100644 --- a/.github/workflows/integration01-ci.yml +++ b/.github/workflows/integration01-ci.yml @@ -1,4 +1,4 @@ -name: Run tutorials (integration) +name: Run integration01 on: push: From 2508fdf527a61638a835681e3168220982959b18 Mon Sep 17 00:00:00 2001 From: bio-la Date: Wed, 28 Feb 2024 14:54:03 +0100 Subject: [PATCH 04/15] fixed path to pipeline.yml --- .github/workflows/integration01-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration01-ci.yml b/.github/workflows/integration01-ci.yml index 73e6c0f8..8836ca57 100644 --- a/.github/workflows/integration01-ci.yml +++ b/.github/workflows/integration01-ci.yml @@ -70,7 +70,7 @@ jobs: - name: Edit the submission file run: | cd teaseq/integration - curl -o pipeline.yml https://github.com/DendrouLab/panpipes/blob/fc_cipy/tests/integration_1/pipeline.yml + curl -O pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes/fc_cipy/tests/integration_1/pipeline.yml - name: Replace template contents in configuration file run: | From 353f7f2710ed7b3a649f339fe5ff60618c2db756 Mon Sep 17 00:00:00 2001 From: bio-la Date: Wed, 28 Feb 2024 15:03:24 +0100 Subject: [PATCH 05/15] typo --- .github/workflows/integration01-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration01-ci.yml b/.github/workflows/integration01-ci.yml index 8836ca57..5e42806d 100644 --- a/.github/workflows/integration01-ci.yml +++ b/.github/workflows/integration01-ci.yml @@ -70,7 +70,7 @@ jobs: - name: Edit the submission file run: | cd teaseq/integration - curl -O pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes/fc_cipy/tests/integration_1/pipeline.yml + curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes/fc_cipy/tests/integration_1/pipeline.yml - name: Replace template contents in configuration file run: | From 71dde70c964f8d4062b7701149702b4f6ea5b98d Mon Sep 17 00:00:00 2001 From: bio-la Date: Thu, 29 Feb 2024 10:08:01 +0100 Subject: [PATCH 06/15] missing arg --- tests/integration_1/pipeline.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration_1/pipeline.yml b/tests/integration_1/pipeline.yml index e84655fb..7111b991 100644 --- a/tests/integration_1/pipeline.yml +++ b/tests/integration_1/pipeline.yml @@ -210,6 +210,7 @@ multimodal: # they will be all used simultaneosly. if you want to test correction for one at a time, # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, # integration_by_tissue ... + column_continuous: column_categorical: sample_id # extra params: totalvi: From f230085e2c4299fa55ab59b2190b6eeafa36bfa3 Mon Sep 17 00:00:00 2001 From: bio-la Date: Thu, 29 Feb 2024 13:20:48 +0100 Subject: [PATCH 07/15] test .copy() --- panpipes/python_scripts/batch_correct_multivi.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/panpipes/python_scripts/batch_correct_multivi.py b/panpipes/python_scripts/batch_correct_multivi.py index ca77e646..6472b118 100644 --- a/panpipes/python_scripts/batch_correct_multivi.py +++ b/panpipes/python_scripts/batch_correct_multivi.py @@ -71,12 +71,11 @@ # ------------------------------------------------------------------ L.info("Running multivi") -# mdata = mu.read(args.scaled_anndata) -# rna = mdata['rna'].copy() -# atac = mdata['atac'].copy() -rna= mu.read(args.scaled_anndata +"/" + "rna") -atac= mu.read(args.scaled_anndata +"/" + "atac") +mdata = mu.read(args.scaled_anndata) +rna = mdata['rna'].copy() +atac = mdata['atac'].copy() +del mdata if check_for_bool(params["multimodal"]["MultiVI"]["lowmem"]): L.info("subsetting atac to top 25k HVF") From 1802a7f1a1abc3385293fd6d3d6537d0521ad14b Mon Sep 17 00:00:00 2001 From: bio-la Date: Thu, 29 Feb 2024 16:23:59 +0100 Subject: [PATCH 08/15] fixes to make MultiVI run --- .github/workflows/integration01-ci.yml | 6 ++++-- .../python_scripts/batch_correct_multivi.py | 20 +++++++++++++++---- pyproject.toml | 7 ++++++- tests/integration_1/pipeline.yml | 6 +++--- 4 files changed, 29 insertions(+), 10 deletions(-) diff --git a/.github/workflows/integration01-ci.yml b/.github/workflows/integration01-ci.yml index 5e42806d..83871bad 100644 --- a/.github/workflows/integration01-ci.yml +++ b/.github/workflows/integration01-ci.yml @@ -36,11 +36,13 @@ jobs: channel-priority: strict activate-environment: pipeline_env environment-file: pipeline_env.yaml - +# important: this patch is only to test if multivi integration works +# issues are not related to panpipes https://discourse.scverse.org/t/error-when-training-model-on-m3-max-mps/1896/2 +# https://discourse.scverse.org/t/macbook-m1-m2-mps-acceleration-with-scvi/2075/4 - name: Install Panpipes shell: bash -el {0} run: | - pip install -e . + pip install '.[multivipatch]' conda list - name: Conda info diff --git a/panpipes/python_scripts/batch_correct_multivi.py b/panpipes/python_scripts/batch_correct_multivi.py index 6472b118..42d1037c 100644 --- a/panpipes/python_scripts/batch_correct_multivi.py +++ b/panpipes/python_scripts/batch_correct_multivi.py @@ -78,9 +78,12 @@ del mdata if check_for_bool(params["multimodal"]["MultiVI"]["lowmem"]): - L.info("subsetting atac to top 25k HVF") + if 'hvg' in atac.uns.keys(): + L.info("subsetting atac to top HVF") + atac = atac[:, atac.var.highly_variable].copy() + L.info("calculating and subsetting atac to top 25k HVF") sc.pp.highly_variable_genes(atac, n_top_genes=25000) - atac = atac[:, atac.var.highly_variable] + atac = atac[:, atac.var.highly_variable].copy() @@ -125,6 +128,15 @@ L.info("concatenating modalities to comply with multiVI") # adata_paired = ad.concat([rna, atac], join="outer") # adata_paired.var = pd.concat([rna.var,atac.var]) +if rna.is_view: + L.info("RNA is view") + atac = rna.copy() +if atac.is_view: + L.info("ATAC is view") + atac = atac.copy() + +L.info(atac.is_view) + adata_paired = ad.concat([rna.T, atac.T]).T @@ -222,14 +234,14 @@ multivi_training_args={} else: multivi_training_args = {k: v for k, v in params["multimodal"]['MultiVI']['training_args'].items() if v is not None} - +L.info("multivi training args") print(multivi_training_args) if params["multimodal"]['MultiVI']['training_plan'] is None: multivi_training_plan = {} else: multivi_training_plan = {k: v for k, v in params["multimodal"]['MultiVI']['training_plan'].items() if v is not None} - +L.info("multivi training plan") print(multivi_training_plan) mvi.view_anndata_setup() diff --git a/pyproject.toml b/pyproject.toml index 747581c8..002cb865 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dependencies = [ "scikit-misc", "scirpy", "scrublet", - "scvi-tools>=1.1.1", + "scvi-tools", #>=1.1.1 "sqlalchemy", ] @@ -61,6 +61,11 @@ spatial = [ "cell2location", "tangram-sc" ] + +multivipatch = [ + "scvi-tools<=0.20.3", + "requests" +] [project.scripts] panpipes = "panpipes:entry.main" diff --git a/tests/integration_1/pipeline.yml b/tests/integration_1/pipeline.yml index 7111b991..eaaffce0 100644 --- a/tests/integration_1/pipeline.yml +++ b/tests/integration_1/pipeline.yml @@ -211,7 +211,7 @@ multimodal: # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, # integration_by_tissue ... column_continuous: - column_categorical: sample_id + column_categorical: dataset # extra params: totalvi: # this is a minimal set of parameters that will be expected @@ -269,8 +269,8 @@ multimodal: batch_size : 128 #float (default: 0.001) weight_decay : 0.001 - #float (default: 1e-08) - eps : 1e-08 + #float (default: 1.0e-08) + eps : 1.0e-08 #bool (default: True) early_stopping : True #bool (default: True) From 421999ac3e29790f31c34d9efe8227297974b03d Mon Sep 17 00:00:00 2001 From: bio-la Date: Fri, 1 Mar 2024 17:13:01 +0100 Subject: [PATCH 09/15] action on smaller multivi --- .github/multiviintegration02-ci.yml | 100 +++++++++++++++++++++++++ .github/workflows/integration01-ci.yml | 4 +- 2 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 .github/multiviintegration02-ci.yml diff --git a/.github/multiviintegration02-ci.yml b/.github/multiviintegration02-ci.yml new file mode 100644 index 00000000..0645ad6c --- /dev/null +++ b/.github/multiviintegration02-ci.yml @@ -0,0 +1,100 @@ +name: testing environment + +on: + push: + branches: + - main + pull_request: + branches: + - main + +env: + debug: 'true' + +jobs: + integration: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest"] # , "macos-latest", "windows-latest" + python-version: ["3.10"] + + steps: + - uses: actions/checkout@v4 + + - name: File tree + if: env.debug == 'true' + run: tree + + - uses: conda-incubator/setup-miniconda@v3 + with: + miniforge-version: latest + auto-activate-base: true + auto-update-conda: true + channels: conda-forge + channel-priority: strict + activate-environment: pipeline_env + environment-file: pipeline_env.yaml +# important: this patch is only to test if multivi integration works +# issues are not related to panpipes https://discourse.scverse.org/t/error-when-training-model-on-m3-max-mps/1896/2 +# https://discourse.scverse.org/t/macbook-m1-m2-mps-acceleration-with-scvi/2075/4 + - name: Install Panpipes + shell: bash -el {0} + run: | + pip install . + conda list + + - name: Conda info + if: env.debug == 'true' + shell: bash -el {0} + run: conda info + + - name: Conda list + if: env.debug == 'true' + shell: pwsh + run: conda list + + # Note: all three files are renamed during the download to trim the "subsample_" prefix + - name: Preparing the data + run: | + mkdir -p teaseq/integration && cd teaseq/integration + curl -L -o teaseq.h5mu https://figshare.com/ndownloader/files/44796985 + + # Note: we run the following to test that the commands works + # However, the following task will replacing the file anyway + - name: Preparing the configuration file + shell: bash -el {0} + run: | + cd teaseq/integration + panpipes integration config + + - name: Edit the submission file + run: | + cd teaseq/integration + curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes/fc_cipy/tests/integration_1/pipeline.yml + + - name: Replace template contents in configuration file + run: | + cd teaseq/integration + sed -i 's+/Users/fabiola.curion/Documents/devel/miniconda3/envs/pipeline_env+pipeline_env+g' pipeline.yml + + - name: File tree + if: env.debug == 'true' + run: tree teaseq + + - name: Review pipeline tasks + shell: bash -el {0} + run: | + cd teaseq/integration + panpipes integration show full --local + + - name: Run pipeline tasks + shell: bash -el {0} + run: | + cd teaseq/integration + panpipes integration make full --local + + - name: File tree + if: env.debug == 'true' + run: tree teaseq diff --git a/.github/workflows/integration01-ci.yml b/.github/workflows/integration01-ci.yml index 83871bad..2baaf3dc 100644 --- a/.github/workflows/integration01-ci.yml +++ b/.github/workflows/integration01-ci.yml @@ -58,8 +58,8 @@ jobs: # Note: all three files are renamed during the download to trim the "subsample_" prefix - name: Preparing the data run: | - mkdir -p teaseq/integration && cd teaseq/integration - curl -L -o teaseq.h5mu https://figshare.com/ndownloader/files/44471927 + mkdir -p teaseq/integration && cd teaseq/integration + curl -L -o teaseq.h5mu https://figshare.com/ndownloader/files/44796985 # Note: we run the following to test that the commands works # However, the following task will replacing the file anyway From bed37b1954d1896d1623abff0d8a8ad8ccdc57c0 Mon Sep 17 00:00:00 2001 From: bio-la Date: Fri, 1 Mar 2024 17:14:26 +0100 Subject: [PATCH 10/15] fixed file --- .github/{ => workflows}/multiviintegration02-ci.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{ => workflows}/multiviintegration02-ci.yml (100%) diff --git a/.github/multiviintegration02-ci.yml b/.github/workflows/multiviintegration02-ci.yml similarity index 100% rename from .github/multiviintegration02-ci.yml rename to .github/workflows/multiviintegration02-ci.yml From 9015f6473d3dd3d6efe67ad918af9356960ed9de Mon Sep 17 00:00:00 2001 From: bio-la Date: Fri, 1 Mar 2024 18:00:22 +0100 Subject: [PATCH 11/15] test seed --- panpipes/python_scripts/batch_correct_multivi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/panpipes/python_scripts/batch_correct_multivi.py b/panpipes/python_scripts/batch_correct_multivi.py index 42d1037c..533369a9 100644 --- a/panpipes/python_scripts/batch_correct_multivi.py +++ b/panpipes/python_scripts/batch_correct_multivi.py @@ -57,7 +57,7 @@ sc.settings.autoshow = False sc.settings.figdir = args.figdir - +scvi.settings.seed = 1492 # load parameters threads_available = multiprocessing.cpu_count() From e59921dc68c2f72c7f2c393e37d4b611c583c12d Mon Sep 17 00:00:00 2001 From: bio-la Date: Mon, 4 Mar 2024 10:58:23 +0100 Subject: [PATCH 12/15] check learning rate --- tests/integration_1/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration_1/pipeline.yml b/tests/integration_1/pipeline.yml index eaaffce0..9e42dd81 100644 --- a/tests/integration_1/pipeline.yml +++ b/tests/integration_1/pipeline.yml @@ -258,7 +258,7 @@ multimodal: #(default: 500) max_epochs : 500 #float (default: 0.0001) - lr : 0.0001 + lr : 1.0e-05 #leave blanck for default str | int | bool | None (default: None) use_gpu : # float (default: 0.9) From 4b878b2181cb68474ef7c992784d85f131ae9926 Mon Sep 17 00:00:00 2001 From: Wojciech Lason Date: Mon, 18 Mar 2024 13:52:56 +0000 Subject: [PATCH 13/15] change legend and point size --- .../python_scripts/plot_umaps_batch_correct.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/panpipes/python_scripts/plot_umaps_batch_correct.py b/panpipes/python_scripts/plot_umaps_batch_correct.py index 20e330fb..a1bb2be9 100644 --- a/panpipes/python_scripts/plot_umaps_batch_correct.py +++ b/panpipes/python_scripts/plot_umaps_batch_correct.py @@ -38,6 +38,7 @@ # cmap_choice = "BuPu" # previous default = "viridis" bupu = cm.get_cmap('BuPu', 512) cmap_choice= ListedColormap(bupu(np.linspace(0.1, 1, 256))) +dpi_use = 400 # previous default of 300 made the plots look blurry with high cell numbers (200k+) # load metadata @@ -87,7 +88,7 @@ os.mkdir(os.path.join(args.fig_dir, mod)) L.info("plotting modality: %s" % mod) plt_df = umaps_df[umaps_df['mod'] == mod].copy() - pointsize = 10000 / plt_df.shape[0] + pointsize = 100000 / plt_df.shape[0] plt_df["method"] = plt_df["method"].astype("category") # put none at the top of the list if mod != "multimodal": @@ -113,11 +114,11 @@ #plt_df = plt_df.sort_values(by="umap_1") g = sns.FacetGrid(plt_df, col="method", col_wrap=3, sharex=False, sharey=False) g = (g.map(sns.scatterplot, "umap_1", "umap_2", col, s=pointsize, linewidth=0)) - g.add_legend() + plt.legend(loc = 'center left', bbox_to_anchor = (1, 0.5), markerscale = 20) g.savefig(os.path.join(args.fig_dir, mod, "umap_method_" + str(col) + ".png")) fig, ax = batch_scatter_two_var(plt_df, "method", col, palette_choice=palette_choice) if fig is not None: - fig.savefig(os.path.join(args.fig_dir, mod, "umap_method_facet_" + str(col) + ".png"), dpi=300) + fig.savefig(os.path.join(args.fig_dir, mod, "umap_method_facet_" + str(col) + ".png"), dpi=dpi_use) plt.clf() ncats = len(plt_df['method'].unique()) @@ -165,7 +166,7 @@ cbar_ax = fig.add_axes([0.85, 0.35, 0.025, 0.35]) fig.colorbar(im, cbar_ax) fig.suptitle(qc) - plt.savefig(os.path.join(args.fig_dir, mod , "umap_method_" + qc + ".png"), dpi = 300) + plt.savefig(os.path.join(args.fig_dir, mod , "umap_method_" + qc + ".png"), dpi = dpi_use) plt.clf() else: # this is a categorical colored plot @@ -174,10 +175,10 @@ L.info("plotting qc var (categorical) %s"%qc) g = sns.FacetGrid(plt_df, col="method", col_wrap=3, sharex=False, sharey=False) g = (g.map(sns.scatterplot, "umap_1", "umap_2", qc, s=pointsize, linewidth=0)) - g.add_legend() - g.savefig(os.path.join(args.fig_dir, mod, "umap_method_" + qc + ".png"), dpi = 300) + plt.legend(loc = 'center left', bbox_to_anchor = (1, 0.5), markerscale = 20) + g.savefig(os.path.join(args.fig_dir, mod, "umap_method_" + qc + ".png"), dpi = dpi_use) plt.clf() else: - L.info('skipping plot as too many categorys %s' % qc ) + L.info('skipping plot as too many categories %s' % qc ) L.info('done') \ No newline at end of file From 6c94839ecf4e56641ef4f88823a319757917d43a Mon Sep 17 00:00:00 2001 From: bio-la Date: Tue, 19 Mar 2024 16:43:29 +0100 Subject: [PATCH 14/15] changed path to file --- .github/workflows/integration01-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/integration01-ci.yml b/.github/workflows/integration01-ci.yml index 2baaf3dc..0dbac9cc 100644 --- a/.github/workflows/integration01-ci.yml +++ b/.github/workflows/integration01-ci.yml @@ -72,7 +72,7 @@ jobs: - name: Edit the submission file run: | cd teaseq/integration - curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes/fc_cipy/tests/integration_1/pipeline.yml + curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes/main/tests/integration_1/pipeline.yml - name: Replace template contents in configuration file run: | From aa031b5c88a3f44b325110f5c03b087e1c806f61 Mon Sep 17 00:00:00 2001 From: bio-la Date: Tue, 19 Mar 2024 16:49:25 +0100 Subject: [PATCH 15/15] removed multivi_env test --- .github/workflows/multiviintegration02-ci.yml | 100 ------------------ 1 file changed, 100 deletions(-) delete mode 100644 .github/workflows/multiviintegration02-ci.yml diff --git a/.github/workflows/multiviintegration02-ci.yml b/.github/workflows/multiviintegration02-ci.yml deleted file mode 100644 index 0645ad6c..00000000 --- a/.github/workflows/multiviintegration02-ci.yml +++ /dev/null @@ -1,100 +0,0 @@ -name: testing environment - -on: - push: - branches: - - main - pull_request: - branches: - - main - -env: - debug: 'true' - -jobs: - integration: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - os: ["ubuntu-latest"] # , "macos-latest", "windows-latest" - python-version: ["3.10"] - - steps: - - uses: actions/checkout@v4 - - - name: File tree - if: env.debug == 'true' - run: tree - - - uses: conda-incubator/setup-miniconda@v3 - with: - miniforge-version: latest - auto-activate-base: true - auto-update-conda: true - channels: conda-forge - channel-priority: strict - activate-environment: pipeline_env - environment-file: pipeline_env.yaml -# important: this patch is only to test if multivi integration works -# issues are not related to panpipes https://discourse.scverse.org/t/error-when-training-model-on-m3-max-mps/1896/2 -# https://discourse.scverse.org/t/macbook-m1-m2-mps-acceleration-with-scvi/2075/4 - - name: Install Panpipes - shell: bash -el {0} - run: | - pip install . - conda list - - - name: Conda info - if: env.debug == 'true' - shell: bash -el {0} - run: conda info - - - name: Conda list - if: env.debug == 'true' - shell: pwsh - run: conda list - - # Note: all three files are renamed during the download to trim the "subsample_" prefix - - name: Preparing the data - run: | - mkdir -p teaseq/integration && cd teaseq/integration - curl -L -o teaseq.h5mu https://figshare.com/ndownloader/files/44796985 - - # Note: we run the following to test that the commands works - # However, the following task will replacing the file anyway - - name: Preparing the configuration file - shell: bash -el {0} - run: | - cd teaseq/integration - panpipes integration config - - - name: Edit the submission file - run: | - cd teaseq/integration - curl -o pipeline.yml https://raw.githubusercontent.com/DendrouLab/panpipes/fc_cipy/tests/integration_1/pipeline.yml - - - name: Replace template contents in configuration file - run: | - cd teaseq/integration - sed -i 's+/Users/fabiola.curion/Documents/devel/miniconda3/envs/pipeline_env+pipeline_env+g' pipeline.yml - - - name: File tree - if: env.debug == 'true' - run: tree teaseq - - - name: Review pipeline tasks - shell: bash -el {0} - run: | - cd teaseq/integration - panpipes integration show full --local - - - name: Run pipeline tasks - shell: bash -el {0} - run: | - cd teaseq/integration - panpipes integration make full --local - - - name: File tree - if: env.debug == 'true' - run: tree teaseq