Skip to content

Commit

Permalink
add example config and metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
sreichl committed Feb 21, 2024
1 parent e974d6f commit 7d6eb5a
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
name,data,assay,metadata,control
Lee2020NatGenet_cellTypes,test/data/Lee2020NatGenet/object.rds,SCT,cell_type,ALL
50 changes: 50 additions & 0 deletions test/config/Lee2020NatGenet/Lee2020NatGenet_dea_seurat_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

##### RESOURCES #####
mem: '32000'
threads: 1 # only DEA rule is multicore and gets 8*threads
partition: 'shortq'

##### GENERAL #####
annotation: test/config/Lee2020NatGenet/Lee2020NatGenet_dea_seurat_annotation.csv
result_path: test/results/Lee2020NatGenet/
project_name: Lee2020NatGenet

##### DEA #####

# parameters for https://satijalab.org/seurat/reference/findmarkers
logfc_threshold: 0.1
test_use: "wilcox"
min_pct: 0.1

# additional parameter for https://satijalab.org/seurat/reference/findallmarkers
return_thresh: 1

##### AGGREGATION #####
# score_formula is used to calculate a score for each gene and comparison (group) that can be used downstream e.g., for preranked GSEA
# eval based score calculation -> eval(parse(text=" "))
# DEA result dataframe is called: dea_results
# available numerical columns: p_val, avg_log2FC, pct.1, pct.2, p_val_adj
# common/popular example: "-log10(dea_results$p_val)*sign(dea_results$logFC)"
# if not used leave empty: ""
score_formula: "-log10(dea_results$p_val)*sign(dea_results$avg_log2FC)"

# filters are applied to the DEA results and used for all outputs
filters:
adj_pval: 0.05
lfc: 0.1
min_pct: 0.1

##### VISUALIZATION #####

# parameters for https://bioconductor.org/packages/release/bioc/html/EnhancedVolcano.html
volcano:
pCutoff: 0.05
FCcutoff: 0.1

# path(s) to feature lists as plain text files (.txt) with one gene per line.
# used to highlight features in volcano plots and generate LFC clustered heatmaps.
# only use camelCase for the feature_list names like in the examples below.
# if not used: put an empty entry e.g., noGenes: ""
feature_lists:
CellTypeMarkers: "test/data/gene_lists/CellTypeMarkers.txt"
Epithelial: "test/data/gene_lists/Epithelial.txt"
12 changes: 12 additions & 0 deletions test/data/gene_lists/CellTypeMarkers.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
KRT19
EPCAM
CDH5
VWF
COL1A2
COL1A1
CD79A
CD14
AIF1
CD3D
CD2
PTPRC
10 changes: 10 additions & 0 deletions test/data/gene_lists/Epithelial.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
EPCAM
CDH1
KRT20
KRT19
OCLN
CLDN7
MUC2
CEACAM5
CDX2
VIL1

0 comments on commit 7d6eb5a

Please sign in to comment.