From ab93d9b75d17596d08f4254bb4908614cb420b62 Mon Sep 17 00:00:00 2001 From: bio-la Date: Tue, 19 Mar 2024 16:17:34 +0100 Subject: [PATCH 1/2] adding integration pipelines --- tests/integration_1/pipeline.yml | 390 +++++++++++++++++++++++++++++++ tests/integration_2/pipeline.yml | 390 +++++++++++++++++++++++++++++++ 2 files changed, 780 insertions(+) create mode 100644 tests/integration_1/pipeline.yml create mode 100644 tests/integration_2/pipeline.yml diff --git a/tests/integration_1/pipeline.yml b/tests/integration_1/pipeline.yml new file mode 100644 index 00000000..9e42dd81 --- /dev/null +++ b/tests/integration_1/pipeline.yml @@ -0,0 +1,390 @@ +# Pipeline pipeline_integration.py configuration file +# ============================================== + +# compute resource options +# ------------------------ +resources: + # Number of threads used for parallel jobs + # this must be enough memory to load your mudata and do computationally intensive tasks + threads_high: 1 + # this must be enough memory to load your mudata and do computationally light tasks + threads_medium: 1 + # this must be enough memory to load text files and do plotting, requires much less memory than the other two + threads_low: 1 + # if you access to a gpu-specific queue, how many gpu threads to request, make sure to edit the queues section below, + # so that panpipes can find your gpu queue + threads_gpu: 2 +# path to conda env, leave blank if running native or your cluster automatically inherits the login node environment +condaenv: /Users/fabiola.curion/Documents/devel/miniconda3/envs/pipeline_env + +# allows for tweaking which queues jobs get submitted to, +# in case there is a special queue for long jobs or you have access to a gpu-specific queue +# the default queue should be specified in your .cgat.yml file +# leave blank if you do not want to use the alternative queues +queues: + long: + gpu: + +# Start +# -------------------------- +# either one that exists already with +sample_prefix: teaseq +#this is what comes out of the filtering/preprocessing +preprocessed_obj: teaseq.h5mu +# contains layers: raw_counts, logged_counts, and has scaled or logged counts in X + + +#-------------------------- +# Batch correction +# ------------------------- +# unimodal: correct each modality independently +rna: + # True or false depending on whether you want to run batch correction + run: False + # what method(s) to use to run batch correction, you can specify multiple + # choices: harmony,bbknn,scanorama,scvi (comma-seprated string, no spaces) + tools: harmony,scvi,bbknn + # this is the column you want to batch correct on. if you specify a comma separated list, + # they will be all used simultaneosly. + # Specifically all columns specified will be merged into one 'batch' columns. + # if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column: dataset + #----------------------------- + # Harmony args + #----------------------------- + harmony: + # sigma value, used by Harmony + sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 + # number of pcs, used by Harmony + npcs: 30 + #---------------------------- + # BBKNN args # https://bbknn.readthedocs.io/en/latest/ + #----------------------------- + bbknn: + neighbors_within_batch: 20 + #----------------------------- + # SCVI args + #----------------------------- + scvi: + exclude_mt_genes: True + mt_column: mt + model_args: + n_layers: + n_latent: + gene_likelihood: zinb + training_args: + max_epochs: 400 + train_size: 0.9 + early_stopping: True + training_plan: + lr: 0.001 + n_epochs_kl_warmup: 400 + reduce_lr_on_plateau: True + lr_scheduler_metric: + lr_patience: 8 + lr_factor: 0.1 + #---------------------------- + # find neighbour parameters + #----------------------------- + # to reuse these params, (for example for WNN) please use anchors (&) and scalars (*) in the relevant place + # i.e. &rna_neighbors will be called by *rna_neighbors where referenced + neighbors: &rna_neighbors + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy + +#-------------------------- +prot: + # True or false depending on whether you want to run batch correction + run: False + # what method(s) to use to run batch correction, you can specify multiple + # choices: harmony,bbknn,combat + tools: harmony + # this is the column you want to batch correct on. if you specify a comma separated list (no spaces), + # they will be all used simultaneosly. if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column: orig.ident + #---------------------------- + # Harmony args + #----------------------------- + harmony: + # sigma value, used by Harmony + sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 + # number of pcs, used by Harmony + npcs: 30 + #---------------------------- + # BBKNN args # https://bbknn.readthedocs.io/en/latest/ + #----------------------------- + bbknn: + neighbors_within_batch: 20 + #----------------------------› + # find neighbour parameters + #----------------------------- + neighbors: &prot_neighbors + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy +#-------------------------- +atac: + # True or false depending on whether you want to run batch correction + run: True + # which dimensionality reduction to expect, LSI or PCA + dimred: LSI + # what method(s) to use to run batch correction, you can specify multiple + # (comma-seprated string, no spaces) + # choices: harmony,bbknn,combat + tools: harmony,bbknn + # this is the column you want to batch correct on. if you specify a comma separated list, + # they will be all used simultaneosly. if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column: dataset + #---------------------------- + # Harmony args + #----------------------------- + harmony: + # sigma value, used by Harmony + sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 + # number of pcs, used by Harmony + npcs: 30 + #---------------------------- + # BBKNN args # https://bbknn.readthedocs.io/en/latest/ + #----------------------------- + bbknn: + neighbors_within_batch: 30 + #---------------------------- + # find neighbour parameters + #----------------------------- + neighbors: &atac_neighbors + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy +#---------------------------------------------- +# multimodal integration +# remember to specify knn graph params in the section "neighbors" +#---------------------------------------------- +multimodal: + # True or false depending on whether you want to run batch correction + run: True + # what method(s) to use to run batch correction, you can specify multiple + # choices: totalvi, mofa, MultiVI, WNN + # list e.g. below + tools: + - MultiVI + + # this is the column you want to batch correct on. if you specify a comma separated list, + # they will be all used simultaneosly. if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column_continuous: + column_categorical: dataset + # extra params: + totalvi: + # this is a minimal set of parameters that will be expected + # you can add any other param from the tutorials and they will + # be parsed alongside the others + + # totalvi will run on rna and prot + modalities: rna,prot + exclude_mt_genes: True + mt_column: mt + # to filter outliers manually create a column called adt_outliers in mdata['prot'].obs + filter_by_hvg: True + filter_prot_outliers: False + model_args: + latent_distribution: "normal" + training_args: + max_epochs: 100 + train_size: 0.9 + early_stopping: True + training_plan: None + MultiVI: + # this is a minimal set of parameters that will be expected + # you can add any other param from the tutorials and they will + # be parsed alongside the others + # leave arguments blank for default + lowmem: True + # Set lowmem to True will subset the atac to the top 25k HVF. + # This is to deal with concatenation of atac,rna on large datasets which at the moment is suboptimally required by scvitools. + # >100GB of RAM are required to concatenate atac,rna with 15k cells and 120k total features (union rna,atac) + model_args: + # (default: None) + n_hidden : + # (default: None) + n_latent : + #(bool,default: True) + region_factors : True + #{‘normal’, ‘ln’} (default: 'normal') + latent_distribution : 'normal' + #(bool,default: False) + deeply_inject_covariates : False + #(bool, default: False) + fully_paired : False + training_args: + #(default: 500) + max_epochs : 500 + #float (default: 0.0001) + lr : 1.0e-05 + #leave blanck for default str | int | bool | None (default: None) + use_gpu : + # float (default: 0.9) + train_size : 0.9 + # leave blanck for default, float | None (default: None) + validation_size : + # int (default: 128) + batch_size : 128 + #float (default: 0.001) + weight_decay : 0.001 + #float (default: 1.0e-08) + eps : 1.0e-08 + #bool (default: True) + early_stopping : True + #bool (default: True) + save_best : True + #leave blanck for default int | None (default: None) + check_val_every_n_epoch : + #leave blanck for default int | None (default: None) + n_steps_kl_warmup : + # int | None (default: 50) + n_epochs_kl_warmup : 50 + #bool (default: True) + adversarial_mixing : True + #leave blanck for default dict | None (default: None) + training_plan : + mofa: + # this is a minimal set of parameters that will be expected + # you can add any other param from the tutorials and they will + # be parsed alongside the others + # (comma-separated string, no spaces) + modalities: + filter_by_hvg: True + n_factors: 10 + n_iterations: 1000 + #pick one among fast, medium, slow + convergence_mode: fast + save_parameters: False + #if save_parameters True, set the following, otherwise leave blank + outfile: + WNN: + # muon implementation of WNN + modalities: rna,prot,atac + # run wnn on batch corrected unimodal data, set each of the modalities you want to use to calc WNN to ONE method. + # leave to None and it will default to de novo calculation of neighbours on non corrected data for that modality using specified params + batch_corrected: + # options are: "bbknn", "scVI", "harmony", "scanorama" + rna: None + # options are "harmony", "bbknn" + prot: None + # options are "harmony" + atac: None + # please use anchors (&) and scalars (*) in the relevant place + # i.e. &rna_neighbors will be called by *rna_neighbors where referenced + knn: + rna: *rna_neighbors + prot: *prot_neighbors + atac: *atac_neighbors + #WNN has its own neighbors search, specify here + n_neighbors: #leave blank and it will default to aritmetic mean across modalities neighbors + n_bandwidth_neighbors: 20 + n_multineighbors: 200 + metric: 'euclidean' + low_memory: True + + ### + # neighbours knn calculation for multimodal analysis. + ### + neighbors: + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy + + + +#----------------------------- +# Plot +#----------------------------- +plotqc: + # grouping var must be a categorical varible, + # (comma-seprated strings, no spaces) + # umaps comparing the integration (one plot per value in the group) + # for each batch correction column plus any extras in grouping var + grouping_var: dataset,sample_id + # what other metrics do you want to plot on each modalities embedding, (one plot per group) + # use mod:variable notation, + # any metrics that you want to plot on all modality umaps go under "all" + # these can be categorical or numeric + all: rep:receptor_subtype + rna: rna:total_counts + prot: prot:total_counts + atac: + multimodal: rna:total_counts + # if you want to add any additional plots, just remove the log file logs/plot_batch_corrected_umaps.log + # and run panpipes integration make plot_umaps + +# ---------------- +# Make final object +# ---------------- +# Final choices: Leave blank until you have reviewed the results from running +# panpipes integration make full +# This step will produce a mudata object with one layer per modality with +# one correction per modality and one multimodal layer. +# Choose the integration results you want to merge in the final object +# For unimodal integration: to pick the uncorrected version use "no_correction" +# then run +# panpipes integration make merge_integration +final_obj: + rna: + include: True + bc_choice: no_correction + prot: + include: True + bc_choice: harmony + atac: + include: False + bc_choice: bbknn + multimodal: + include: True + bc_choice: WNN + diff --git a/tests/integration_2/pipeline.yml b/tests/integration_2/pipeline.yml new file mode 100644 index 00000000..136f5961 --- /dev/null +++ b/tests/integration_2/pipeline.yml @@ -0,0 +1,390 @@ +# Pipeline pipeline_integration.py configuration file +# ============================================== + +# compute resource options +# ------------------------ +resources: + # Number of threads used for parallel jobs + # this must be enough memory to load your mudata and do computationally intensive tasks + threads_high: 1 + # this must be enough memory to load your mudata and do computationally light tasks + threads_medium: 1 + # this must be enough memory to load text files and do plotting, requires much less memory than the other two + threads_low: 1 + # if you access to a gpu-specific queue, how many gpu threads to request, make sure to edit the queues section below, + # so that panpipes can find your gpu queue + threads_gpu: 2 +# path to conda env, leave blank if running native or your cluster automatically inherits the login node environment +condaenv: /Users/fabiola.curion/Documents/devel/miniconda3/envs/pipeline_env + +# allows for tweaking which queues jobs get submitted to, +# in case there is a special queue for long jobs or you have access to a gpu-specific queue +# the default queue should be specified in your .cgat.yml file +# leave blank if you do not want to use the alternative queues +queues: + long: + gpu: + +# Start +# -------------------------- +# either one that exists already with +sample_prefix: teaseq +#this is what comes out of the filtering/preprocessing +preprocessed_obj: teaseq.h5mu +# contains layers: raw_counts, logged_counts, and has scaled or logged counts in X + + +#-------------------------- +# Batch correction +# ------------------------- +# unimodal: correct each modality independently +rna: + # True or false depending on whether you want to run batch correction + run: False + # what method(s) to use to run batch correction, you can specify multiple + # choices: harmony,bbknn,scanorama,scvi (comma-seprated string, no spaces) + tools: harmony,scvi + # this is the column you want to batch correct on. if you specify a comma separated list, + # they will be all used simultaneosly. + # Specifically all columns specified will be merged into one 'batch' columns. + # if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column: dataset + #----------------------------- + # Harmony args + #----------------------------- + harmony: + # sigma value, used by Harmony + sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 + # number of pcs, used by Harmony + npcs: 30 + #---------------------------- + # BBKNN args # https://bbknn.readthedocs.io/en/latest/ + #----------------------------- + bbknn: + neighbors_within_batch: 20 + #----------------------------- + # SCVI args + #----------------------------- + scvi: + exclude_mt_genes: True + mt_column: mt + model_args: + n_layers: + n_latent: + gene_likelihood: zinb + training_args: + max_epochs: 40 + train_size: 0.9 + early_stopping: True + training_plan: + lr: 0.001 + n_epochs_kl_warmup: 40 + reduce_lr_on_plateau: True + lr_scheduler_metric: + lr_patience: 8 + lr_factor: 0.1 + #---------------------------- + # find neighbour parameters + #----------------------------- + # to reuse these params, (for example for WNN) please use anchors (&) and scalars (*) in the relevant place + # i.e. &rna_neighbors will be called by *rna_neighbors where referenced + neighbors: &rna_neighbors + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # the maximum number of dims for neighbors calculation can only only be lower or equal to the total number of dims for PCA or Harmony + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy + +#-------------------------- +prot: + # True or false depending on whether you want to run batch correction + run: False + # what method(s) to use to run batch correction, you can specify multiple + # choices: harmony,bbknn,combat + tools: harmony + # this is the column you want to batch correct on. if you specify a comma separated list (no spaces), + # they will be all used simultaneosly. if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column: orig.ident + #---------------------------- + # Harmony args + #----------------------------- + harmony: + # sigma value, used by Harmony + sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 + # number of pcs, used by Harmony + npcs: 30 + #---------------------------- + # BBKNN args # https://bbknn.readthedocs.io/en/latest/ + #----------------------------- + bbknn: + neighbors_within_batch: 20 + #----------------------------› + # find neighbour parameters + #----------------------------- + neighbors: &prot_neighbors + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy +#-------------------------- +atac: + # True or false depending on whether you want to run batch correction + run: True + # which dimensionality reduction to expect, LSI or PCA + dimred: LSI + # what method(s) to use to run batch correction, you can specify multiple + # (comma-seprated string, no spaces) + # choices: harmony,bbknn,combat + tools: harmony,bbknn + # this is the column you want to batch correct on. if you specify a comma separated list, + # they will be all used simultaneosly. if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column: dataset + #---------------------------- + # Harmony args + #----------------------------- + harmony: + # sigma value, used by Harmony + sigma: 0.1 + # theta value used by Harmony, default is 1 + theta: 1.0 + # number of pcs, used by Harmony + npcs: 30 + #---------------------------- + # BBKNN args # https://bbknn.readthedocs.io/en/latest/ + #----------------------------- + bbknn: + neighbors_within_batch: 30 + #---------------------------- + # find neighbour parameters + #----------------------------- + neighbors: &atac_neighbors + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy +#---------------------------------------------- +# multimodal integration +# remember to specify knn graph params in the section "neighbors" +#---------------------------------------------- +multimodal: + # True or false depending on whether you want to run batch correction + run: True + # what method(s) to use to run batch correction, you can specify multiple + # choices: totalvi, mofa, MultiVI, WNN + # list e.g. below + tools: + - totalvi + + # this is the column you want to batch correct on. if you specify a comma separated list, + # they will be all used simultaneosly. if you want to test correction for one at a time, + # specify one at a time and run the pipeline in different folders i.e. integration_by_sample, + # integration_by_tissue ... + column_continuous: + column_categorical: dataset + # extra params: + totalvi: + # this is a minimal set of parameters that will be expected + # you can add any other param from the tutorials and they will + # be parsed alongside the others + + # totalvi will run on rna and prot + modalities: rna,prot + exclude_mt_genes: True + mt_column: mt + # to filter outliers manually create a column called adt_outliers in mdata['prot'].obs + filter_by_hvg: True + filter_prot_outliers: False + model_args: + latent_distribution: "normal" + training_args: + max_epochs: 100 + train_size: 0.9 + early_stopping: True + training_plan: None + MultiVI: + # this is a minimal set of parameters that will be expected + # you can add any other param from the tutorials and they will + # be parsed alongside the others + # leave arguments blank for default + lowmem: True + # Set lowmem to True will subset the atac to the top 25k HVF. + # This is to deal with concatenation of atac,rna on large datasets which at the moment is suboptimally required by scvitools. + # >100GB of RAM are required to concatenate atac,rna with 15k cells and 120k total features (union rna,atac) + model_args: + # (default: None) + n_hidden : + # (default: None) + n_latent : + #(bool,default: True) + region_factors : True + #{‘normal’, ‘ln’} (default: 'normal') + latent_distribution : 'normal' + #(bool,default: False) + deeply_inject_covariates : False + #(bool, default: False) + fully_paired : False + training_args: + #(default: 500) + max_epochs : 500 + #float (default: 0.0001) + lr : 1.0e-05 + #leave blanck for default str | int | bool | None (default: None) + use_gpu : + # float (default: 0.9) + train_size : 0.9 + # leave blanck for default, float | None (default: None) + validation_size : + # int (default: 128) + batch_size : 128 + #float (default: 0.001) + weight_decay : 0.001 + #float (default: 1.0e-08) + eps : 1.0e-08 + #bool (default: True) + early_stopping : True + #bool (default: True) + save_best : True + #leave blanck for default int | None (default: None) + check_val_every_n_epoch : + #leave blanck for default int | None (default: None) + n_steps_kl_warmup : + # int | None (default: 50) + n_epochs_kl_warmup : 50 + #bool (default: True) + adversarial_mixing : True + #leave blanck for default dict | None (default: None) + training_plan : + mofa: + # this is a minimal set of parameters that will be expected + # you can add any other param from the tutorials and they will + # be parsed alongside the others + # (comma-separated string, no spaces) + modalities: + filter_by_hvg: True + n_factors: 10 + n_iterations: 1000 + #pick one among fast, medium, slow + convergence_mode: fast + save_parameters: False + #if save_parameters True, set the following, otherwise leave blank + outfile: + WNN: + # muon implementation of WNN + modalities: rna,prot,atac + # run wnn on batch corrected unimodal data, set each of the modalities you want to use to calc WNN to ONE method. + # leave to None and it will default to de novo calculation of neighbours on non corrected data for that modality using specified params + batch_corrected: + # options are: "bbknn", "scVI", "harmony", "scanorama" + rna: None + # options are "harmony", "bbknn" + prot: None + # options are "harmony" + atac: None + # please use anchors (&) and scalars (*) in the relevant place + # i.e. &rna_neighbors will be called by *rna_neighbors where referenced + knn: + rna: *rna_neighbors + prot: *prot_neighbors + atac: *atac_neighbors + #WNN has its own neighbors search, specify here + n_neighbors: #leave blank and it will default to aritmetic mean across modalities neighbors + n_bandwidth_neighbors: 20 + n_multineighbors: 200 + metric: 'euclidean' + low_memory: True + + ### + # neighbours knn calculation for multimodal analysis. + ### + neighbors: + # number of Principal Components to calculate for neighbours and umap: + # -if no correction is applied, PCA will be calculated and used to run UMAP and clustering on + # -if Harmony is the method of choice, it will use these components to create a corrected dim red.) + # note: scvelo default is 30 + npcs: 30 + # number of neighbours + k: 30 + # metric: euclidean | cosine + metric: euclidean + # scanpy | hnsw (from scvelo) + method: scanpy + + + +#----------------------------- +# Plot +#----------------------------- +plotqc: + # grouping var must be a categorical varible, + # (comma-seprated strings, no spaces) + # umaps comparing the integration (one plot per value in the group) + # for each batch correction column plus any extras in grouping var + grouping_var: dataset,sample_id + # what other metrics do you want to plot on each modalities embedding, (one plot per group) + # use mod:variable notation, + # any metrics that you want to plot on all modality umaps go under "all" + # these can be categorical or numeric + all: rep:receptor_subtype + rna: rna:total_counts + prot: prot:total_counts + atac: + multimodal: rna:total_counts + # if you want to add any additional plots, just remove the log file logs/plot_batch_corrected_umaps.log + # and run panpipes integration make plot_umaps + +# ---------------- +# Make final object +# ---------------- +# Final choices: Leave blank until you have reviewed the results from running +# panpipes integration make full +# This step will produce a mudata object with one layer per modality with +# one correction per modality and one multimodal layer. +# Choose the integration results you want to merge in the final object +# For unimodal integration: to pick the uncorrected version use "no_correction" +# then run +# panpipes integration make merge_integration +final_obj: + rna: + include: True + bc_choice: no_correction + prot: + include: True + bc_choice: harmony + atac: + include: False + bc_choice: bbknn + multimodal: + include: True + bc_choice: WNN + From fc8f20bdfeeb94763ace6de28c3e0226576a6a73 Mon Sep 17 00:00:00 2001 From: bio-la Date: Tue, 19 Mar 2024 16:24:00 +0100 Subject: [PATCH 2/2] fixed pipeline --- tests/integration_2/pipeline.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integration_2/pipeline.yml b/tests/integration_2/pipeline.yml index 136f5961..ce1a95b7 100644 --- a/tests/integration_2/pipeline.yml +++ b/tests/integration_2/pipeline.yml @@ -40,7 +40,7 @@ preprocessed_obj: teaseq.h5mu # unimodal: correct each modality independently rna: # True or false depending on whether you want to run batch correction - run: False + run: True # what method(s) to use to run batch correction, you can specify multiple # choices: harmony,bbknn,scanorama,scvi (comma-seprated string, no spaces) tools: harmony,scvi @@ -151,7 +151,7 @@ prot: #-------------------------- atac: # True or false depending on whether you want to run batch correction - run: True + run: False # which dimensionality reduction to expect, LSI or PCA dimred: LSI # what method(s) to use to run batch correction, you can specify multiple @@ -228,7 +228,7 @@ multimodal: model_args: latent_distribution: "normal" training_args: - max_epochs: 100 + max_epochs: 40 train_size: 0.9 early_stopping: True training_plan: None