Skip to content

Commit

Permalink
Merge branch 'rube14-rb11_add_star_alignment' into devel
Browse files Browse the repository at this point in the history
  • Loading branch information
dozy committed Nov 21, 2017
2 parents 627dbb9 + 741835d commit 3ae3a33
Show file tree
Hide file tree
Showing 3 changed files with 273 additions and 0 deletions.
1 change: 1 addition & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
CHANGES LOG
-----------

- add new templates for STAR alignment and Salmon
- Small change in final_output_prep to allow bamsort_cmd to have configurable executable and scramble to have optional embed reference param.

release 0.18.6
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@ data/vtlib/pre_alignment.json
data/vtlib/pre_alignment_realign.json
data/vtlib/README.vtlib
data/vtlib/realignment_wtsi_template.json
data/vtlib/salmon_alignment.json
data/vtlib/seqchksum.json
data/vtlib/seqchksum_hs.json
data/vtlib/seqchksum_realign.json
data/vtlib/split_by_chromosome.json
data/vtlib/star_alignment.json
data/vtlib/tophat2_alignment.json
examples/bwa_aln_cfg.png
examples/bwa_mem/bwa_mem_alignment.vtf
Expand Down
270 changes: 270 additions & 0 deletions data/vtlib/star_alignment.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
{
"description":"run star to to align input bam to supplied reference genome",
"version":"1.0",
"subgraph_io":{
"ports":{
"inputs":{
"_stdin_":"bamtofastq",
"reference":"star:__REFERENCE_GENOME_IN__"
},
"outputs":{
"_stdout_":"star"
}
}
},
"subst_params":[
{
"id": "basic_pipeline_params",
"type":"SPFILE",
"name":{"subst":"basic_pipeline_params_file"},
"required": "no",
"comment":"this will expand to a set of subst_param elements"
},
{
"id":"fastq1_name",
"required":"no",
"default":"intfile_1.fq.gz",
"subst_constructor":{
"vals":[ "intfile_1_", {"subst":"rpt"}, ".fq.gz" ],
"postproc":{"op":"concat", "pad":""}
}
},
{
"id":"fastq1",
"required":"yes",
"subst_constructor":{
"vals":[ {"subst":"tmpdir"}, "/", {"subst":"fastq1_name"} ],
"postproc":{"op":"concat", "pad":""}
}
},
{
"id":"fastq2_name",
"required":"no",
"default":"intfile_2.fq.gz",
"subst_constructor":{
"vals":[ "intfile_2_", {"subst":"rpt"}, ".fq.gz" ],
"postproc":{"op":"concat", "pad":""}
}
},
{
"id":"fastq2",
"required":"yes",
"subst_constructor":{
"vals":[ {"subst":"tmpdir"}, "/", {"subst":"fastq2_name"} ],
"postproc":{"op":"concat", "pad":""}
}
},
{
"id":"star_dir","required":"no","default":"."
},
{
"id":"star_out",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"star_dir"}, "/", "_", {"subst":"rpt"}, "_" ],
"postproc":{"op":"concat","pad":""}
}
},
{
"id":"annotation_val",
"subst_constructor":{
"vals":[ {"subst":"reposdir"}, "/transcriptomes/", {"subst":"transcriptome_subpath"} ],
"postproc":{"op":"concat","pad":""}
}
},
{
"id":"sjdb_annotation_flag",
"required":"no",
"subst_constructor":{
"vals":[ "--sjdbGTFfile", {"subst":"annotation_val"} ],
"postproc":{"op":"concat","pad":" "}
}
},
{
"id":"aligner_numthreads_flag",
"required":"no",
"subst_constructor":{
"vals":[ "--runThreadN", {"subst":"aligner_numthreads"} ],
"postproc":{"op":"concat","pad":" "}
}
},
{
"id":"sjdb_overhang_val",
"required":"no",
"default":"99"
},
{
"id":"sjdb_overhang_flag",
"required":"no",
"subst_constructor":{
"vals":[ "--sjdbOverhang", {"subst":"sjdb_overhang_val"} ],
"postproc":{"op":"concat","pad":" "}
}
},
{
"id":"chimSegmentMin_flag",
"required":"no",
"subst_constructor":{
"vals":[ "--chimSegmentMin", {
"subst":"chimSegmentMin_val",
"ifnull":"0",
"comment":"unset this value to remove --chimSegmentMin flag"
}
],
"postproc":{"op":"concat","pad":" "}
}
},
{
"id":"chimJunctionOverhangMin_flag",
"required":"no",
"subst_constructor":{
"vals":[ "--chimJunctionOverhangMin", {
"subst":"chimJunctionOverhangMin_val",
"ifnull":"20",
"comment":"unset this value to remove --chimJunctionOverhangMin flag"
}
],
"postproc":{"op":"concat","pad":" "}
}
},
{
"id":"junctions_tab",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"star_out"}, "SJ.out.tab" ],
"postproc":{"op":"concat","pad":""}
}
},
{
"id":"readspergene_tab",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"star_out"}, "ReadsPerGene.out.tab" ],
"postproc":{"op":"concat","pad":""}
}
},
{
"id":"cp_junctions_tab_target",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"outdatadir"}, "/", {"subst":"rpt"}, ".junctions.tab" ],
"postproc":{"op":"concat","pad":""}
}
},
{
"id":"cp_readspergene_tab_target",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"outdatadir"}, "/", {"subst":"rpt"}, ".readspergene.tab" ],
"postproc":{"op":"concat","pad":""}
}
},
{
"id":"star_executable",
"required":"no",
"default":"STAR"
},
{
"id":"quant_vtf",
"required":"yes",
"subst_constructor":{
"vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"quant_method"}, "_alignment.json" ],
"postproc":{"op":"concat", "pad":""}
}
}
],
"nodes":[
{
"id":"bamtofastq",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": false,
"cmd":["bamtofastq", "gz=0", "F=__FQ1_OUT__", "F2=__FQ2_OUT__"]
},
{
"id":"fq1",
"type":"RAFILE",
"name":{"subst":"fastq1"}
},
{
"id":"fq2",
"type":"RAFILE",
"name":{"subst":"fastq2"}
},
{
"id":"star",
"type":"EXEC",
"use_STDIN": false,
"use_STDOUT": true,
"cmd": [
{"subst":"star_executable"},
"--runMode", "alignReads",
"--outFileNamePrefix", {"subst":"star_out"},
{"subst":"aligner_numthreads_flag"},
"--genomeLoad", "NoSharedMemory",
{"subst":"sjdb_annotation_flag"},
{"subst":"sjdb_overhang_flag"},
"--outSAMstrandField", "intronMotif",
"--outSAMattributes", "NH", "HI", "NM", "MD", "AS", "XS",
"--outSAMunmapped", "Within", "KeepPairs",
"--outSAMtype", "BAM", "Unsorted",
"--outFilterIntronMotifs", "RemoveNoncanonicalUnannotated",
"--chimOutType", "WithinBAM",
{"subst":"chimSegmentMin_flag"},
{"subst":"chimJunctionOverhangMin_flag"},
"--quantMode", "GeneCounts",
"--genomeDir", "__REFERENCE_GENOME_IN__",
"--readFilesIn", "__FQ1_IN__", "__FQ2_IN__",
"--outStd", "BAM_Unsorted"
]
},
{
"id":"junctions_tab",
"type":"RAFILE",
"subtype":"DUMMY",
"name":{"subst":"junctions_tab"}
},
{
"id":"readspergene_tab",
"type":"RAFILE",
"subtype":"DUMMY",
"name":{"subst":"readspergene_tab"}
},
{
"id":"cp_junctions_tab",
"type":"EXEC",
"use_STDIN": false,
"use_STDOUT": false,
"cmd":[ "cp", "__SRC_JUNCTIONS_TAB_IN__", {"subst":"cp_junctions_tab_target"} ]
},
{
"id":"cp_readspergene_tab",
"type":"EXEC",
"use_STDIN": false,
"use_STDOUT": false,
"cmd":[ "cp", "__SRC_READSPERGENE_TAB_IN__", {"subst":"cp_readspergene_tab_target"} ]
},
{
"id":"quantify",
"type":"VTFILE",
"use_STDIN": false,
"use_STDOUT": true,
"comment":"inputs: fq1, fq2; outputs: NONE",
"node_prefix":"quant_",
"name":{"subst":"quant_vtf"},
"description":"subgraph containing salmon quantification of transcripts"
}
],
"edges":[
{ "id":"bamtofastq_to_fq1", "from":"bamtofastq:__FQ1_OUT__", "to":"fq1" },
{ "id":"bamtofastq_to_fq2", "from":"bamtofastq:__FQ2_OUT__", "to":"fq2" },
{ "id":"fq1_to_star", "from":"fq1", "to":"star:__FQ1_IN__" },
{ "id":"fq2_to_star", "from":"fq2", "to":"star:__FQ2_IN__" },
{ "id":"star_to_junctions_tab", "from":"star", "to":"junctions_tab" },
{ "id":"cp_junctions_tab", "from":"junctions_tab", "to":"cp_junctions_tab:__SRC_JUNCTIONS_TAB_IN__" },
{ "id":"star_to_readspergene_tab", "from":"star", "to":"readspergene_tab" },
{ "id":"cp_readspergene_tab", "from":"readspergene_tab", "to":"cp_readspergene_tab:__SRC_READSPERGENE_TAB_IN__" },
{ "id":"fq1_to_quantify", "from":"fq1", "to":"quantify:fastq1" },
{ "id":"fq2_to_quantify", "from":"fq2", "to":"quantify:fastq2" }
]
}

0 comments on commit 3ae3a33

Please sign in to comment.