Skip to content

Commit

Permalink
Merge pull request #256 from DendrouLab/clean_clustering
Browse files Browse the repository at this point in the history
Cleaned pipeline.yml for clustering
  • Loading branch information
giuliaelgarcia authored Apr 23, 2024
2 parents 10cff65 + 134911a commit 97fed7d
Showing 1 changed file with 9 additions and 62 deletions.
71 changes: 9 additions & 62 deletions panpipes/panpipes/pipeline_clustering/pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,11 @@
# compute resource options
# ------------------------
resources:
# Number of threads used for parallel jobs
# this must be enough memory to load your mudata and do computationally intensive tasks
threads_high: 2
# this must be enough memory to load your mudata and do computationally light tasks
threads_medium: 2
# this must be enough memory to load text files and do plotting, required much less memory than the other two
threads_low: 2

fewer_jobs: True
# path to conda env, leave blank if running native or your cluster automatically inherits the login node environment
condaenv:

# --------------------------
Expand All @@ -25,10 +20,6 @@ condaenv:

sample_prefix: mdata
scaled_obj: mdata_scaled.h5mu
# full obj only applicable if you have filtered your scaled object by hvgs
# in this case panpipes will use the full obj to calculate rank_gene_groups and for plotting those genes
# it should contain all the genes you want to include in rank_gene_groups, plus logged_counts as a layer
# if your scaled_obj contains all the genes then leave full_obj blank
full_obj:

# run clustering on each individual modality:
Expand All @@ -41,7 +32,6 @@ modalities:
# if True, will look for WNN, or totalVI output
multimodal:
run_clustering: True
#WNN, mofa, totalVI # this will tell us where to look for
integration_method:


Expand All @@ -50,68 +40,41 @@ multimodal:
# ---------------------------------------
#
# -----------------------------
# number of neighbors to use when calculating the graph for clustering and umap.
neighbors:
rna:
use_existing: True
# which representation in .obsm to use for nearest neighbors
# if dim_red=X_pca and X_pca not in .obsm, will be computed with default parameters
dim_red: X_pca
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy
prot:
use_existing: True
# which representation in .obsm to use for nearest neighbors
# if dim_red=X_pca and X_pca not in .obsm, will be computed with default parameters
dim_red: X_pca
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy
atac:
use_existing: True
# which representation in .obsm to use for nearest neighbors
# if dim_red=X_lsi/X_pca and X_lsi/X_pca not in .obsm, will be computed with default parameters
dim_red: X_lsi
# if dim_red=X_lsi/X_pca and X_lsi/X_pca not in .obsm, which dimension to remove
dim_remove: 1
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy
spatial:
use_existing: False
# which representation in .obsm to use for nearest neighbors
# if dim_red=X_pca and X_pca not in .obsm, will be computed with default parameters
dim_red: X_pca
#how many components to use for clustering
n_dim_red: 30
# number of neighbours
k: 30
# metric: euclidean | cosine
metric: euclidean
# scanpy | hnsw (from scvelo)
method: scanpy

# ---------------------------------------
# parameters for umap calculation
# ---------------------------------------
umap:
# set run to False if you are happy with the existing umap from integration
run: True
rna:
mindist:
Expand Down Expand Up @@ -141,19 +104,19 @@ clusterspecs:
- 0.2
- 0.6
- 1
algorithm: leiden # (louvain or leiden)
algorithm: leiden
prot:
resolutions:
- 0.2
- 0.6
- 1
algorithm: leiden # (louvain or leiden)
algorithm: leiden
atac:
resolutions:
- 0.2
- 0.6
- 1
algorithm: leiden # (louvain or leiden)
algorithm: leiden
multimodal:
resolutions:
- 0.5
Expand All @@ -168,31 +131,21 @@ clusterspecs:
# ---------------------------------------
# parameters for finding marker genes
# ---------------------------------------
# where pseudo_suerat is set to False
# we run https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
# where pseudo_seurat is set to True, we run a python implementation of Seurat::FindMarkers (written by CRG)

# args https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
markerspecs:
rna:
run: True
layer: logged_counts
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
method: t-test_overestim_var
mincells: 10 # if a cluster contains less than n cells then do not bother doing marker analysis
# where pseudo_suerat is set to False
# we run https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
# where pseudo_seurat is set to True, we run a python implementation of Seurat::FindMarkers (written by CRG),
mincells: 10
pseudo_seurat: False
# these next two settings do not matter unless pseudo_seurat is set to True,
# If applicable look at Seurat documentation for FindMarkers for details
minpct: 0.1
threshuse: 0.25

prot:
run:
layer: clr #options clr,dsb
mincells: 10 # if a cluster contains less than n cells then do not bother doing marker analysis
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
layer: clr
mincells: 10
method: wilcoxon
pseudo_seurat: False
minpct: 0.1
Expand All @@ -202,7 +155,6 @@ markerspecs:
run:
layer: logged_counts #options logged_counts, signac_norm , logTF_norm,logIDF_norm
mincells: 10
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
method: wilcoxon
pseudo_seurat: False
minpct: 0.1
Expand All @@ -217,17 +169,12 @@ markerspecs:
threshuse: 0.25

spatial:
# args for spatial https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
run: True
layer: norm_pearson_resid
# method options: [‘logreg’, ‘t-test’, ‘wilcoxon’, ‘t-test_overestim_var’]]
method: t-test_overestim_var
mincells: 10 # if a cluster contains less than n cells then do not bother doing marker analysis
# where pseudo_suerat is set to False
# we run https://scanpy.readthedocs.io/en/stable/generated/scanpy.tl.rank_genes_groups.html
# where pseudo_seurat is set to True, we run a python implementation of Seurat::FindMarkers (written by CRG),
mincells: 10
pseudo_seurat: False
# these next two settings do not matter unless pseudo_seurat is set to True,
# If applicable look at Seurat documentation for FindMarkers for details
minpct: 0.1
threshuse: 0.25

Expand Down

0 comments on commit 97fed7d

Please sign in to comment.