forked from wbaopaul/scATAC-pro
-
Notifications
You must be signed in to change notification settings - Fork 8
/
configure_user.txt
169 lines (124 loc) · 7.08 KB
/
configure_user.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
######################################################################
## provide the user specific paramters
## ONLY change the value, DO NOT change variable name
######################################################################
## Start editing ##
######################################################################
## global setting ##
######################################################################
OUTPUT_PREFIX = pbmc10k ## prefix for ouput file name
isSingleEnd = FALSE ## scTHS-seq uses single end sequencing
## annotation files (blacklist, promoters, enhancers, TSS, etc)
BLACKLIST = annotation/hg38_blacklist.bed
PROMOTERS = annotation/hg38_promoter.bed
ENHANCERS = annotation/hg38_enhancer.bed
TSS = annotation/hg38_tss.bed
CHROM_SIZE_FILE = annotation/chrom_hg38.sizes
GENOME_NAME = hg38 ## used for TF motif enrichemnt and footprinting analysis
plotEPS = TRUE ## print figures in eps format when generating summary report
#####################################
## Adapter trimming ##
#####################################
TRIM_METHOD = trim_galore ## one of trim_galore (default), Trimmomatic or not specified (means DONOT trim)
## adapter sequence should be speficied if Trimmomatic was used to trim
## you dont need to specify this if your TRIM_METHOD is trim_galore or not specified
ADAPTER_SEQ = /mnt/isilon/tan_lab/yuw1/local_tools/Trimmomatic-0.39/adapters/NexteraPE-PE.fa
#########################################################
## mapping ##
## if your bwa is selected, you don't
## have to specify options for bowtie/bowtie2, vice versa
#########################################################
MAPPING_METHOD = bwa ## one of bwa/bowtie/bowtie2
## extra mapping options (NO NEED TO SPECIFY INPUT FASTQS AND OUTPUT FILE/DIRECTORY)
BWA_OPTS = -t 16
BWA_INDEX = hg38_genome/hg38_genome.fa
BOWTIE_OPTS = --quiet -p 16
BOWTIE_INDEX = bowtie-1.2.2/indexes/GRCh38/GRCh38_no_alt
BOWTIE2_OPTS = --end-to-end -p 16
BOWTIE2_INDEX = bowtie2.2.9/indexes/GRCh38/GRCh38
MAPQ = 30 ## filter bam by MAPQ for downstream analysis
CELL_MAP_QC = FALSE ## output mapping stats for cell barcodes
SHIFT_READS_IN_BAM = FALSE ## if TRUE, shift reads in + strand +4bp, in - strand -5bp
#################################################################################
## peak calling
#################################################################################
PEAK_CALLER = MACS2 ## one of MACS2, BIN, and COMBINED
## provided extra options for macs2 (NO NEED TO SPECIFY -t, -n, -f here), like
#MACS2_OPTS = -q 0.05 -g hs
## (change to -g mm if you are using mouse data)
## or something like this to ignore building shifting model
MACS2_OPTS = -q 0.05 -g hs --nomodel --extsize 200 --shift -100
BIN_RESL = 5000 ## bin resolution in bp
########################################################
## cell calling
########################################################
CELL_CALLER = FILTER ## EmptyDrop/cellranger/FILTER
EmptyDrop_FDR = 0.001
# set cutoff to define cell if CELL_CALLER is specified as FILTER
# ignored if CELL CALLER was specified other than FILTER
FILTER_BC_CUTOFF = --min_uniq_frags 2000 --max_uniq_frags 60000 --min_frac_peak 0.5 --min_frac_tss 0.0 --min_frac_promoter 0 --min_frac_enhancer 0.0 --max_frac_mito 0.1 --min_tss_escore 1
##############################################################################
## clustering and dimension reduction
## a seurat object will be created and saved, embeded information about
## reduced dimensions, normalized data, and
## cluster labels ( extract by seuat_obj$active_cluster)
## Note if normalization method is tf-idf, then the matrix was presumely set as binary
##############################################################################
norm_by = tf-idf ## or log (just log transformation) or NA
Top_Variable_Features = 5000 ## number/fraction of variable features used for seurat
REDUCTION = pca ## pca/lda, note that UMAP and TSNE will be automatically calculated correspondly
nREDUCTION = 30 ## the reduced dimension
CLUSTERING_METHOD = seurat ## seurat/cisTopic/kmeans/LSI/SCRAT/chromVAR/scABC
K_CLUSTERS = 0.2 ## the number of cluster (in integer) or the resolution parameter (in float) for louvain algorithm (implemented by seurat)
prepCello = TRUE ## generate object for VisCello (for visualization)
rmDoublets = TRUE ## remove potential doublets or not
exptDoubletRate = 0.03 ## default expected doublet rate, can be overwritten in rmDoublets module
#######################################################################################
## differential accessible analysis
## compare two groups
## if group1 is specified as 'one' and group1 as 'rest', then will
## conduct all one-vs-rest comparison
#######################################################################################
RUN_DA = TRUE ## run differential analysis or not
group1 = 0:1 ## either one or multiple cluster names, separated by colon, or 'one'
group2 = 2 ## cluster name as above or 'rest'
test_use = wilcox ## one of negbinom, LR, wilcox, t, DESeq2
#######################################################################################
## GO analysis (need to do differential analysis first)
#######################################################################################
RUN_GO = TRUE ## run GO analysis after DA
GO_TYPE = BP ## BP/CC/kegg
#######################################
## split bam by cluster
## and output bw and bedgraph files
#######################################
SPLIT_BAM2CLUSTER = TRUE
##################################################################
## TF footprinting analysis ##
## support following three types of comparions, like
## group1_fp = 0:1, group2_fp = 2 --- cluster0,1 vs cluster2
## group1_fp = one, group2_fp = rest --- all one vs rest
##################################################################
DO_FOOTPRINT = FALSE
## comparing two groups, set similarly as in doing DA
group1_fp = 0:1 ## either set of clusters name or 'one'
group2_fp = 2 ## cluster name as above or 'rest'
pvalue_fp = 0.05
############################################################################
## cicero cis-interaction ##
###########################################################################
RUN_Cicero = TRUE
## plot interactions within Cicero_Plot_Region on the summary report
## you can also specify it as a genomic location, a gene name or none (skip the plot)
Cicero_Plot_Region = chr5:140610000-140640000
############################################################################
## about integrate module ##
## integrate two or more samples
## by VFACS, cca (or seurat), rpca, rlsi(or signac), harmony, or pool
############################################################################
Integrate_By = VFACS ## one of VFACS, cca, rpca, rlsi, pool or harmony
mergePeaksWithin = 500 ## merge peaks within 500bp
filterPeaksQvalue = 0.05 ## further filter peaks by qvalue before merging them
nDim4Integration = 30 ## reduced dimemsions for integrated data
nFeature4Integration = 5000 ## top variable features selected for integration, not used for rlsi
prepCello4Integration = FALSE ## prepare cello for integrated object