Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleaned pipeline.yml for clustering #256

Merged
merged 2 commits into from
Apr 23, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 9 additions & 62 deletions panpipes/panpipes/pipeline_clustering/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,11 @@
# compute resource options
# ------------------------
resources:
# Number of threads used for parallel jobs
# this must be enough memory to load your mudata and do computationally intensive tasks
threads_high: 2
# this must be enough memory to load your mudata and do computationally light tasks
threads_medium: 2
# this must be enough memory to load text files and do plotting, required much less memory than the other two
threads_low: 2

fewer_jobs: True
# path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv:

# --------------------------
Expand All @@ -25,10 +20,6 @@ condaenv:

sample_prefix: mdata
scaled_obj: mdata_scaled.h5mu
# full obj only applicable if you have filtered your scaled object by hvgs
# in this case panpipes will use the full obj to calculate rank_gene_groups and for plotting those genes
# it should contain all the genes you want to include in rank_gene_groups, plus logged_counts as a layer
# if your scaled_obj contains all the genes then leave full_obj blank
full_obj:

# run clustering on each individual modality:
Expand All @@ -41,7 +32,6 @@ modalities:
# if True, will look for WNN, or totalVI output
multimodal:
run_clustering: True
#WNN, mofa, totalVI # this will tell us where to look for
integration_method:


Expand All @@ -50,68 +40,41 @@ multimodal:
# ---------------------------------------
#
# -----------------------------
# number of neighbors to use when calculating the graph for clustering and umap.
neighbors:
rna:
use_existing: True
# which representation in .obsm to use for nearest neighbors
# if dim_red=X_pca and X_pca not in .obsm, will be computed with default parameters
dim_red: X_pca
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy
prot:
use_existing: True
# which representation in .obsm to use for nearest neighbors
# if dim_red=X_pca and X_pca not in .obsm, will be computed with default parameters
dim_red: X_pca
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy
atac:
use_existing: True
# which representation in .obsm to use for nearest neighbors
# if dim_red=X_lsi/X_pca and X_lsi/X_pca not in .obsm, will be computed with default parameters
dim_red: X_lsi
# if dim_red=X_lsi/X_pca and X_lsi/X_pca not in .obsm, which dimension to remove
dim_remove: 1
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy
spatial:
use_existing: False
# which representation in .obsm to use for nearest neighbors
# if dim_red=X_pca and X_pca not in .obsm, will be computed with default parameters
dim_red: X_pca
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy

# ---------------------------------------
# parameters for umap calculation
# ---------------------------------------
umap:
# set run to False if you are happy with the existing umap from integration
run: True
rna:
mindist:
Expand Down Expand Up @@ -141,19 +104,19 @@ clusterspecs:
- 0.2
- 0.6
- 1
algorithm: leiden # (louvain or leiden)
algorithm: leiden
prot:
resolutions:
- 0.2
- 0.6
- 1
algorithm: leiden # (louvain or leiden)
algorithm: leiden
atac:
resolutions:
- 0.2
- 0.6
- 1
algorithm: leiden # (louvain or leiden)
algorithm: leiden
multimodal:
resolutions:
- 0.5
Expand All @@ -168,31 +131,21 @@ clusterspecs:
# ---------------------------------------
# parameters for finding marker genes
# ---------------------------------------
# where pseudo_suerat is set to False
# we run https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
# where pseudo_seurat is set to True, we run a python implementation of Seurat::FindMarkers (written by CRG)

# args https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
markerspecs:
rna:
run: True
layer: logged_counts
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
method: t-test_overestim_var
mincells: 10 # if a cluster contains less than n cells then do not bother doing marker analysis
# where pseudo_suerat is set to False
# we run https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
# where pseudo_seurat is set to True, we run a python implementation of Seurat::FindMarkers (written by CRG),
mincells: 10
pseudo_seurat: False
# these next two settings do not matter unless pseudo_seurat is set to True,
# If applicable look at Seurat documentation for FindMarkers for details
minpct: 0.1
threshuse: 0.25

prot:
run:
layer: clr #options clr,dsb
mincells: 10 # if a cluster contains less than n cells then do not bother doing marker analysis
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
layer: clr
mincells: 10
method: wilcoxon
pseudo_seurat: False
minpct: 0.1
Expand All @@ -202,7 +155,6 @@ markerspecs:
run:
layer: logged_counts #options logged_counts, signac_norm , logTF_norm,logIDF_norm
mincells: 10
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
method: wilcoxon
pseudo_seurat: False
minpct: 0.1
Expand All @@ -217,17 +169,12 @@ markerspecs:
threshuse: 0.25

spatial:
# args for spatial https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
run: True
layer: norm_pearson_resid
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
method: t-test_overestim_var
mincells: 10 # if a cluster contains less than n cells then do not bother doing marker analysis
# where pseudo_suerat is set to False
# we run https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
# where pseudo_seurat is set to True, we run a python implementation of Seurat::FindMarkers (written by CRG),
mincells: 10
pseudo_seurat: False
# these next two settings do not matter unless pseudo_seurat is set to True,
# If applicable look at Seurat documentation for FindMarkers for details
minpct: 0.1
threshuse: 0.25

Expand Down