diff --git a/test/config/Lee2020NatGenet/Lee2020NatGenet_dea_seurat_annotation.csv b/test/config/Lee2020NatGenet/Lee2020NatGenet_dea_seurat_annotation.csv new file mode 100644 index 0000000..45c347a --- /dev/null +++ b/test/config/Lee2020NatGenet/Lee2020NatGenet_dea_seurat_annotation.csv @@ -0,0 +1,2 @@ +name,data,assay,metadata,control +Lee2020NatGenet_cellTypes,test/data/Lee2020NatGenet/object.rds,SCT,cell_type,ALL \ No newline at end of file diff --git a/test/config/Lee2020NatGenet/Lee2020NatGenet_dea_seurat_config.yaml b/test/config/Lee2020NatGenet/Lee2020NatGenet_dea_seurat_config.yaml new file mode 100644 index 0000000..cf7214f --- /dev/null +++ b/test/config/Lee2020NatGenet/Lee2020NatGenet_dea_seurat_config.yaml @@ -0,0 +1,50 @@ + +##### RESOURCES ##### +mem: '32000' +threads: 1 # only DEA rule is multicore and gets 8*threads +partition: 'shortq' + +##### GENERAL ##### +annotation: test/config/Lee2020NatGenet/Lee2020NatGenet_dea_seurat_annotation.csv +result_path: test/results/Lee2020NatGenet/ +project_name: Lee2020NatGenet + +##### DEA ##### + +# parameters for https://satijalab.org/seurat/reference/findmarkers +logfc_threshold: 0.1 +test_use: "wilcox" +min_pct: 0.1 + +# additional parameter for https://satijalab.org/seurat/reference/findallmarkers +return_thresh: 1 + +##### AGGREGATION ##### +# score_formula is used to calculate a score for each gene and comparison (group) that can be used downstream e.g., for preranked GSEA +# eval based score calculation -> eval(parse(text=" ")) +# DEA result dataframe is called: dea_results +# available numerical columns: p_val, avg_log2FC, pct.1, pct.2, p_val_adj +# common/popular example: "-log10(dea_results$p_val)*sign(dea_results$logFC)" +# if not used leave empty: "" +score_formula: "-log10(dea_results$p_val)*sign(dea_results$avg_log2FC)" + +# filters are applied to the DEA results and used for all outputs +filters: + adj_pval: 0.05 + lfc: 0.1 + min_pct: 0.1 + +##### VISUALIZATION ##### + +# parameters for https://bioconductor.org/packages/release/bioc/html/EnhancedVolcano.html +volcano: + pCutoff: 0.05 + FCcutoff: 0.1 + +# path(s) to feature lists as plain text files (.txt) with one gene per line. +# used to highlight features in volcano plots and generate LFC clustered heatmaps. +# only use camelCase for the feature_list names like in the examples below. +# if not used: put an empty entry e.g., noGenes: "" +feature_lists: + CellTypeMarkers: "test/data/gene_lists/CellTypeMarkers.txt" + Epithelial: "test/data/gene_lists/Epithelial.txt" \ No newline at end of file diff --git a/test/data/gene_lists/CellTypeMarkers.txt b/test/data/gene_lists/CellTypeMarkers.txt new file mode 100644 index 0000000..1354a5a --- /dev/null +++ b/test/data/gene_lists/CellTypeMarkers.txt @@ -0,0 +1,12 @@ +KRT19 +EPCAM +CDH5 +VWF +COL1A2 +COL1A1 +CD79A +CD14 +AIF1 +CD3D +CD2 +PTPRC \ No newline at end of file diff --git a/test/data/gene_lists/Epithelial.txt b/test/data/gene_lists/Epithelial.txt new file mode 100644 index 0000000..7303f02 --- /dev/null +++ b/test/data/gene_lists/Epithelial.txt @@ -0,0 +1,10 @@ +EPCAM +CDH1 +KRT20 +KRT19 +OCLN +CLDN7 +MUC2 +CEACAM5 +CDX2 +VIL1 \ No newline at end of file