Skip to content

Commit

Permalink
Merge pull request #195 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
prep for release 0.19.0
  • Loading branch information
dozy authored Nov 29, 2017
2 parents 814878f + 3ae3a33 commit b04b88a
Show file tree
Hide file tree
Showing 35 changed files with 5,453 additions and 237 deletions.
3 changes: 3 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
CHANGES LOG
-----------

- add new templates for STAR alignment and Salmon
- Small change in final_output_prep to allow bamsort_cmd to have configurable executable and scramble to have optional embed reference param.

release 0.18.6
- Y-split fixes
When processing VTFILE nodes, make sure that parameters are reevaluated locally instead of naively inheriting values
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,12 @@ data/vtlib/pre_alignment.json
data/vtlib/pre_alignment_realign.json
data/vtlib/README.vtlib
data/vtlib/realignment_wtsi_template.json
data/vtlib/salmon_alignment.json
data/vtlib/seqchksum.json
data/vtlib/seqchksum_hs.json
data/vtlib/seqchksum_realign.json
data/vtlib/split_by_chromosome.json
data/vtlib/star_alignment.json
data/vtlib/tophat2_alignment.json
examples/bwa_aln_cfg.png
examples/bwa_mem/bwa_mem_alignment.vtf
Expand Down
337 changes: 283 additions & 54 deletions bin/viv.pl

Large diffs are not rendered by default.

589 changes: 468 additions & 121 deletions bin/vtfp.pl

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion data/vtlib/bamindexdecoder.json
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
},
{
"id":"decoder_metrics",
"type":"OUTFILE",
"type":"RAFILE",
"name":{"subst":"decoder_metrics"}
},
{
Expand Down
2 changes: 1 addition & 1 deletion data/vtlib/bcl2bam_phix_deplex_wtsi_stage1_template.json
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@
"name":{"subst":"seqchksum","required":"yes","ifnull":{"subst_constructor":{ "vals":[ {"subst":"cfgdatadir"}, "/", {"subst":"fs1p","required":"yes", "ifnull": "final_stage1_process.json"} ], "postproc":{"op":"concat", "pad":""} }}},
"description":"subgraph containing final stage1 processing"
},
{ "id":"seqchksum_file", "type":"OUTFILE", "name":{"subst":"seqchksum_file"} },
{ "id":"seqchksum_file", "type":"RAFILE", "name":{"subst":"seqchksum_file"} },
{
"id":"filtered_bam",
"type":"OUTFILE",
Expand Down
10 changes: 10 additions & 0 deletions data/vtlib/bwa_aln_alignment.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
{
"id":"tee4",
"type":"EXEC",
"use_STDIN":true,
"use_STDOUT":false,
"cmd":[
"teepot",
{"subst":"teepot_vflag", "ifnull":"-v"},
Expand All @@ -39,11 +41,15 @@
{
"id":"bwa_aln_1",
"type":"EXEC",
"use_STDIN":false,
"use_STDOUT":true,
"cmd":[ {"subst":"bwa_executable"}, "aln", "-t", {"subst":"aligner_numthreads"}, "-b1", "__REFERENCE_GENOME_FASTA_IN__", "__BAM_IN__" ]
},
{
"id":"bwa_aln_2",
"type":"EXEC",
"use_STDIN":false,
"use_STDOUT":true,
"cmd":[ {"subst":"bwa_executable"}, "aln", "-t", {"subst":"aligner_numthreads"}, "-b2", "__REFERENCE_GENOME_FASTA_IN__", "__BAM_IN__" ]
},
{
Expand All @@ -63,11 +69,15 @@
{
"id":"bwa_sampe",
"type":"EXEC",
"use_STDIN":false,
"use_STDOUT":true,
"cmd":[ {"subst":"bwa_executable"}, "sampe", "__REFERENCE_GENOME_FASTA_IN__", "__SAI_1_IN__", "__SAI_2_IN__", "__BAM_1_IN__", "__BAM_2_IN__" ]
},
{
"id":"samtobam",
"type":"EXEC",
"use_STDIN":true,
"use_STDOUT":true,
"cmd":[
"scramble",
{"subst":"s2b_compress_level", "ifnull":"-0"},
Expand Down
6 changes: 6 additions & 0 deletions data/vtlib/bwa_mem_alignment.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,23 @@
{
"id":"bamtofastq",
"type":"EXEC",
"use_STDIN":true,
"use_STDOUT":true,
"cmd":["bamtofastq"]
},
{
"id":"bwa_mem",
"comment":"presuming interleaved FR fastq records (-p flag), output all records (-T 0)",
"type":"EXEC",
"use_STDIN":false,
"use_STDOUT":true,
"cmd":[ {"subst":"bwa_executable"}, "mem", "-t", {"subst":"aligner_numthreads"}, {"subst":"bwa_mem_p_flag"},{"subst":"bwa_mem_Y_flag"}, {"subst":"bwa_mem_T_flag"}, {"subst":"bwa_mem_K_flag"}, "__DB_PREFIX_REFERENCE_GENOME_IN__", "__FQ_IN__" ]
},
{
"id":"samtobam",
"type":"EXEC",
"use_STDIN":true,
"use_STDOUT":true,
"cmd":[
"scramble",
{"subst":"s2b_mt", "ifnull":{"subst_constructor":{ "vals":[ "-t", {"subst":"s2b_mt_val"} ]}}},
Expand Down
13 changes: 10 additions & 3 deletions data/vtlib/final_output_prep.json
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd": [ "bamsormadup", {"subst":"bsmd_threads"}, "SO=coordinate", "level=0", "verbose=0", "fixmate=1", "adddupmarksupport=1", {"subst":"bs_tmpfile_flag"} ]
"cmd": [ {"subst":"bsc_executable", "required":"yes", "ifnull":"bamsormadup"}, {"subst":"bsmd_threads"}, "SO=coordinate", "level=0", "verbose=0", "fixmate=1", "adddupmarksupport=1", {"subst":"bs_tmpfile_flag"} ]
},
{
"id":"bammarkduplicates",
Expand Down Expand Up @@ -268,7 +268,14 @@
{"subst":"b2c_compress_level", "ifnull":"-7"},
"-I", "bam",
"-O", "cram",
{"subst":"scramble_reference_flag"} ]
{"subst":"scramble_reference_flag"},
{"select":"scramble_embed_reference","default":0,"select_range":[0,1],
"cases":[
[],
"-e"
]
}
]
},
{
"id":"scramble_tee",
Expand Down Expand Up @@ -376,7 +383,7 @@
{ "id":"bam_file", "type":"OUTFILE", "name":{"subst":"bam_file"} },
{ "id":"cram_file", "type":"OUTFILE", "name":{"subst":"cram_file"} },
{ "id":"cram_md5", "type":"OUTFILE", "name":{"subst":"cram_md5"} },
{ "id":"seqchksum_file", "type":"OUTFILE", "name":{"subst":"seqchksum_file"} },
{ "id":"seqchksum_file", "type":"RAFILE", "name":{"subst":"seqchksum_file"} },
{ "id":"seqchksum_file_cram", "type":"RAFILE", "name":{"subst":"seqchksum_file_cram"}, "comment":"this file is a temporary fix for blocking problems at the cmp_seqchksum node" },
{ "id":"seqchksum_extrahash_file", "type":"OUTFILE", "name":{"subst":"seqchksum_extrahash_file"} },
{ "id":"stats_F0x900_file", "type":"OUTFILE", "name":{"subst":"stats_F0x900_file"} },
Expand Down
130 changes: 130 additions & 0 deletions data/vtlib/salmon_alignment.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
{
"version":"1.0",
"description":"steps in the alignment pipeline perform a checksum-based comparison of input and output (bam) data. Final validation step in alignment pipeline",
"subgraph_io":{
"ports":{
"inputs":{
"fastq1":"salmon:__FQ1_IN__",
"fastq2":"salmon:__FQ2_IN__"
}
}
},
"subst_params":[
{
"id":"salmon_dir",
"required":"no",
"default":"salmon_quant"
},
{
"id":"salmon_out",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"salmon_dir"}, "_", {"subst":"rpt"} ],
"postproc":{"op":"concat","pad":""}
}
},
{
"id":"quant",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"salmon_dir"}, "_", {"subst":"rpt"}, "/quant.sf" ],
"postproc":{"op":"concat","pad":""}
},
"default":"salmon_quant/quant.sf"
},
{
"id":"quant_genes",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"salmon_dir"}, "_", {"subst":"rpt"}, "/quant.genes.sf" ],
"postproc":{"op":"concat","pad":""}
},
"default":"salmon_quant/quant.genes.sf"
},
{
"id":"lib_format_counts",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"salmon_dir"}, "_", {"subst":"rpt"}, "/lib_format_counts.json" ],
"postproc":{"op":"concat","pad":""}
},
"default":"salmon_quant/lib_format_counts.json"
},
{
"id":"libparams",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"salmon_dir"}, "_", {"subst":"rpt"}, "/libParams" ],
"postproc":{"op":"concat","pad":""}
},
"default":"salmon_quant/libParams"
},
{
"id":"cmd_info",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"salmon_dir"}, "_", {"subst":"rpt"}, "/cmd_info.json" ],
"postproc":{"op":"concat","pad":""}
},
"default":"salmon_quant/cmd_info.json"
},
{
"id":"zip_target",
"required":"no",
"subst_constructor":{
"vals":[ {"subst":"outdatadir"}, "/", {"subst":"rpt"}, ".salmon_quant.zip" ],
"postproc":{"op":"concat","pad":""}
}
},
{
"id":"gene_mapping_flag",
"required":"no",
"subst_constructor":{
"vals":[ "--geneMap=", {"subst":"annotation_val"} ],
"postproc":{"op":"concat","pad":""}
}
},
{
"id":"salmon_transcriptome_val",
"required":"yes"
}
],
"nodes":[
{
"id":"salmon",
"type":"EXEC",
"use_STDIN": false,
"use_STDOUT": true,
"cmd":[
"salmon",
"--no-version-check",
"quant",
"--index", {"subst":"salmon_transcriptome_val"},
"--libType", "A",
"--mates1", "__FQ1_IN__",
"--mates2", "__FQ2_IN__",
{"subst":"gene_mapping_flag"},
{"subst":"b2c_mt", "ifnull":{"subst_constructor":{ "vals":[ "-p", {"subst":"b2c_mt_val"} ]}}},
"--output", {"subst":"salmon_out"}
]
},
{
"id":"zip_salmon_quant",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": false,
"cmd":[
"zip", "-r",
{"subst":"zip_target"},
{"subst":"quant"},
{"subst":"quant_genes"},
{"subst":"lib_format_counts"},
{"subst":"libparams"},
{"subst":"cmd_info"}
]
}
],
"edges":[
{ "id":"salmon_to_zip_salmon_quant", "from":"salmon", "to":"zip_salmon_quant"}
]
}
Loading

0 comments on commit b04b88a

Please sign in to comment.