-
Notifications
You must be signed in to change notification settings - Fork 0
/
Dockstore.cwl
223 lines (219 loc) · 7.88 KB
/
Dockstore.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env cwl-runner
class: CommandLineTool
dct:creator:
foaf:name: Andy Yang
foaf:mbox: mailto:[email protected]
cwlVersion: v1.0
requirements:
- class: DockerRequirement
dockerPull: quay.io/cancercollaboratory/dockstore-tool-rnaseqc:1.0
inputs:
rRNA:
type: File?
inputBinding:
position: 8
prefix: -rRNA
doc: intervalFile for rRNA loci (must end in .list). This is an alternative flag
to the -BWArRNA flag.
strat:
type: string?
inputBinding:
position: 8
prefix: -strat
doc: 'Stratification options: current supported option is ''gc'''
strictMode:
type: boolean?
inputBinding:
position: 8
prefix: -strictMode
doc: When counting reads per exon or generating RPKMs, reads will be filtered
out that have a mapping quality of zero, more than 6 non-reference bases or
improper pairs.
gc:
type: File?
inputBinding:
position: 7
prefix: -gc
doc: File of transcript id <tab> gc content. Used for sstratification.
out: string
gatkFlags:
type: string?
inputBinding:
position: 8
prefix: -gatkFlags
doc: Pass a string of quotes directly to the GATK (e.g. -gatkFlags "-DBQ 0" to
set missing base qualities to zero).
transcriptDetails:
type: File?
inputBinding:
position: 8
prefix: -transcriptDetails
doc: Provide an HTML report for each transcript.
noReadCounting:
type: int?
inputBinding:
position: 8
prefix: --noReadCounting
doc: Suppresses read count-based metrics.
ttype:
type: int?
inputBinding:
position: 8
prefix: -ttype
doc: The column in GTF to use to look for rRNA transcript type. Mainly used for
running on Ensembl GTF (specify "-ttype 2"). Otherwise, for spec-conforming
GTF files, disregard.
rRNAdSampleTarget:
type: int?
inputBinding:
position: 8
prefix: -rRNAdSampleTarget
doc: Downsamples to calculate rRNA rate more efficiently. Default is 1 million.
Set to 0 to disable.
noDoC:
type: boolean?
inputBinding:
position: 8
prefix: -noDoC
doc: Suppresses GATK Depth of Coverage calculations.
bwa:
type: File?
inputBinding:
position: 1
prefix: -bwa
doc: Path to BWA, which should be set if it's not in your path and BWArRNA is
used.
corr:
type: string?
inputBinding:
position: 3
prefix: -corr
doc: GCT file for expression correlation comparison. Note, that the values must
be log normalized, and the identifiers must match those of the GTF file.
BWArRNA:
type: string?
inputBinding:
position: 2
prefix: -BWArRNA
doc: Use an on the fly BWA alignment for estimating rRNA content. The value should
be the rRNA reference fasta. If this flag is absent, rRNA estimation will be
based upon the rRNA transcript intervals provided in the GTF (a faster but less
robust method).
singleEnd:
type: File?
inputBinding:
position: 8
prefix: -singleEnd
doc: This BAM contains single end reads.
e:
type: string?
inputBinding:
position: 5
prefix: -e
doc: Change the definition of a transcripts end (5' or 3') to the given length.
(50, 100, 200 are acceptable values; 200 is default)
d:
type: string?
inputBinding:
position: 4
prefix: -d
doc: Perform downsampling to the given number of reads.
expr:
type: File?
inputBinding:
position: 6
prefix: -expr
doc: Uses provided GCT file for expression values instead of on-the-fly RPKM calculation
gld:
type: boolean?
inputBinding:
position: 8
prefix: -gld
doc: 'Gap Length Distribution: if flag is present, the distribution of gap lengths
will be plotted.'
o:
type: string?
inputBinding:
position: 8
prefix: -o
doc: Output directory (will be created if doesn't exist).
n:
type: int?
inputBinding:
position: 8
prefix: -n
doc: Number of top transcripts to use. Default is 1000.
s:
type: string
inputBinding:
position: 8
prefix: -s
doc: 'Sample File: tab-delimited description of samples and their bams'
r:
type: File
inputBinding:
position: 8
prefix: -r
secondaryFiles:
- .fai
- ^.dict
- ^.bam
- ^.bam.bai
doc: Reference Genome in fasta format.
gcMargin:
type: double?
inputBinding:
position: 8
prefix: -gcMargin
doc: Used in conjunction with '-strat gc' to specify the percent gc content to
use as boundaries. E.g. .25 would set a lower cutoff of 25% and an upper cutoff
of 75% (default is 0.375).
t:
type: File
inputBinding:
position: 8
prefix: -t
doc: iGTF File defining transcripts (must end in '.gtf').
outputs:
out:
type: File
outputBinding:
glob: $(inputs.out)
doc: Required output sam or bam file
baseCommand: [wrapper.sh]
doc: "Computes a series of quality control metrics for RNA-seq data.\n\nUsage:\n\n\
-bwa <arg>\nPath to BWA, which should be set if it's not in your path and BWArRNA\
\ is used.\n\n-BWArRNA <arg>\nUse an on the fly BWA alignment for estimating rRNA\
\ content. The value should be the rRNA reference fasta. If this flag is absent,\
\ rRNA estimation will be based upon the rRNA transcript intervals provided in the\
\ GTF (a faster but less robust method).\n\n-corr <arg>\nGCT file for expression\
\ correlation comparison. Note, that the values must be log normalized, and the\
\ identifiers must match those of the GTF file.\n\n-d <arg>\nPerform downsampling\
\ to the given number of reads.\n\n-e <arg>\nChange the definition of a transcripts\
\ end (5' or 3') to the given length. (50, 100, 200 are acceptable values; 200 is\
\ default)\n\n-expr <arg>\nUses provided GCT file for expression values instead\
\ of on-the-fly RPKM calculation\n\n-gc <arg>\nFile of transcript id <tab> gc content.\
\ Used for stratification.\n\n-n <arg>\nNumber of top transcripts to use. Default\
\ is 1000.\n\n-noDoC\nSuppresses GATK Depth of Coverage calculations.\n\n-noReadCounting\n\
Suppresses read count-based metrics.\n\n-o <arg>\nOutput directory (will be created\
\ if doesn't exist).\n\n-r <arg>\nReference Genome in fasta format.\n\n-rRNA <arg>\n\
intervalFIle for rRNA loci (must end in .list). This is an alternative flag to the\
\ -BWArRNA flag.\n\n-s <arg>\nSample File: tab-delimited description of samples\
\ and their bams. This file header is:\nSample ID Bam File Notes\nWhen running\
\ on just one sample, this argument can be a string of the form\n\"Sample ID|Bam\
\ File|Notes\", where Bam File is the path to the input file.\n\n-singleEnd\nThis\
\ BAM contains single end reads.\n\n-strat <arg>\nStratification options: current\
\ supported option is 'gc'\n\n-strictMode <arg>\nWhen counting reads per exon or\
\ generating RPKMs, reads will be filtered out that have a mapping quality of zero,\
\ more than 6 non-reference bases or improper pairs.\n\n-t <arg>\nGTF File defining\
\ transcripts (must end in '.gtf').\n\n-transcriptDetails\nProvide an HTML report\
\ for each transcript.\n\n-ttype <arg>\nThe column in GTF to use to look for rRNA\
\ transcript type. Mainly used for running on Ensembl GTF (specify \"-ttype 2\"\
). Otherwise, for spec-conforming GTF files, disregard.\n\n-rRNAdSampleTarget\n\
Downsamples to calculate rRNA rate more efficiently. Default is 1 million. Set to\
\ 0 to disable.\n\n-gcMargin\nUsed in conjunction with '-strat gc' to specify the\
\ percent gc content to use as boundaries. E.g. .25 would set a lower cutoff of\
\ 25% and an upper cutoff of 75% (default is 0.375).\n\n-gld\nGap Length Distribution:\
\ if flag is present, the distribution of gap lengths will be plotted.\n\n-gatkFlags\n\
Pass a string of quotes directly to the GATK (e.g. -gatkFlags \"-DBQ 0\" to set\
\ missing base qualities to zero). \n"