Skip to content

Commit

Permalink
Merge pull request #56 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
release 0.10
  • Loading branch information
dozy committed Nov 20, 2014
2 parents 6b33edc + 9c63110 commit 4999234
Show file tree
Hide file tree
Showing 17 changed files with 761 additions and 346 deletions.
8 changes: 5 additions & 3 deletions bin/viv.pl
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@
if($to_node->{type} eq q[EXEC]) {
$data_xfer_name = _create_fifo($edge->{from});
}
elsif($to_node->{subtype} eq q[DUMMY]) {
elsif(defined $to_node->{subtype} and $to_node->{subtype} eq q[DUMMY]) {
$data_xfer_name = q[];
}
else {
Expand All @@ -126,7 +126,9 @@
# kick off any unblocked EXEC nodes, noting their details for later release of any dependants
my %pid2id = ();
for my $node_id (keys %exec_nodes) {
if($exec_nodes{$node_id}->{wait_counter} == 0 and not $exec_nodes{$node_id}->{pid}) { # green light - execute
my $wait_counter = $exec_nodes{$node_id}->{wait_counter};
$wait_counter ||= 0;
if($wait_counter == 0 and not $exec_nodes{$node_id}->{pid}) { # green light - execute

my $node = $exec_nodes{$node_id};
if((my $pid=_fork_off($node, $do_exec))) {
Expand Down Expand Up @@ -176,7 +178,7 @@
}else{

$logger->($VLMED, sprintf(q[Child %s (pid: %d), return_status: %#04X, wifexited: %d (%#04X), wexitstatus: %s], $completed_node->{id}, $pid, $status, $wifexited, $wexitstatus, $wexitstatus), "\n");
$logger->($VLMED, sprintf(q[Child %s (pid: %d), wifsignaled: %#04X, wtermsig: %s], $completed_node->{id}, $pid, $wifsignaled, $wtermsig), "\n");
$logger->($VLMED, sprintf(q[Child %s (pid: %d), wifsignaled: %#04X, wtermsig: %s], $completed_node->{id}, $pid, $wifsignaled, ($wifsignaled? $wtermsig: q{NA})), "\n");
$logger->($VLMED, sprintf(q[Child %s (pid: %d), wifexited: %#04X, wexitstatus: %s], $completed_node->{id}, $pid, $wifexited, $wexitstatus), "\n");

if($dependants_list and @$dependants_list) {
Expand Down
28 changes: 9 additions & 19 deletions bin/vtfp.pl
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ sub do_substitutions {
my $subst_value = make_substitutions($subst_param_name, $substitutable_params, \%subst_requests, $query_mode);

if($query_mode) {
print $out join(qq[\t], ($subst_param_name, ($substitutable_params->{$subst_param_name}->{required}? q[required]: q[not_required]), $substitutable_params->{$subst_param_name}->{parent_id}, $substitutable_params->{$subst_param_name,}->{attrib_name}, )), "\n";
print $out join(qq[\t], ($subst_param_name, ($substitutable_params->{$subst_param_name}->{required}? q[required]: q[not_required]), $substitutable_params->{$subst_param_name}->{parent_id}, $substitutable_params->{$subst_param_name}->{attrib_name}, )), "\n";
}
}

Expand Down Expand Up @@ -380,23 +380,20 @@ sub resolve_subst_to_string {

if(not defined $subst_value) {
# do a little unpacking for readability
my $attrib_name = $subst_param->{attrib_name};
my $elem_index = $subst_param->{elem_index};
$attrib_name ||= "element $elem_index";
my $subst_param_name = $subst_param->{param_name};
my $parent_id = $subst_param->{parent_id};
$parent_id ||= q[NO_PARENT_ID]; # should be ARRAY?

if($subst_param->{required} and not $query_mode) { # required means "must be specified by the caller", so default value is disregarded
# $logger->($VLFATAL, q[No substitution specified for required substitutable param (], $subst_param_name, q[ for ], $attrib_name, q[ in ], $parent_id, q[) - use -q for full list of substitutable parameters]);
# NOTE: the decision to fail can only be decided at the top level of the subst_param structure
$logger->($VLMIN, q[No substitution specified for required substitutable param ], $subst_param_name, q[ for ], $attrib_name, q[ in ], $parent_id);
$logger->($VLMIN, q[No substitution specified for required substitutable param ], $subst_param_name);
return;
}

$subst_value = $subst_param->{default_value};
if(not defined $subst_value) {
$logger->($VLMIN, q[No default value specified for apparent substitutable param (], $subst_param_name, q[ for ], $attrib_name, q[ in ], $parent_id, q[)]);
$logger->($VLMIN, q[No default value specified for apparent substitutable param ], $subst_param_name);
}
}

Expand All @@ -422,10 +419,8 @@ sub resolve_subst_array {
my ($subst_param, $subst_value, $query_mode) = @_;

if(ref $subst_value ne q[ARRAY]) {
$logger->($VLMIN, q[Attempt to substitute array for non-array in substitutable param (],
$subst_param->{param_name},
q[ for ], $subst_param->{attrib_name},
q[ in ], ($subst_param->{parent_id}? $subst_param->{parent_id}: q[UNNAMED_PARENT]), q[)]);
$logger->($VLMIN, q[Attempt to substitute array for non-array in substitutable param ],
$subst_param->{param_name});
return;
}

Expand All @@ -440,17 +435,12 @@ sub resolve_subst_array {
}
else {
if($subst_param->{required}) {
$logger->($VLFATAL, q[No substitution specified for required substitutable param (],
$subst_param->{param_name},
q[ for ], $subst_param->{attrib_name},
q[ in ], ($subst_param->{parent_id}? $subst_param->{parent_id}: q[UNNAMED_PARENT]),
q[) - use -q for full list of substitutable parameters]);
$logger->($VLFATAL, q[No substitution specified for required substitutable param ],
$subst_param->{param_name});
}
else {
$logger->($VLMIN, q[No default value specified for apparent substitutable param (],
$subst_param->{param_name},
q[ for ], $subst_param->{attrib_name},
q[ in ], ($subst_param->{parent_id}? $subst_param->{parent_id}: q[UNNAMED_PARENT]), q[)]);
$logger->($VLMIN, q[No default value specified for apparent substitutable param ],
$subst_param->{param_name});
return;
}
}
Expand Down
10 changes: 5 additions & 5 deletions data/bcl2bam_phix_deplex_wtsi_stage1_template.vtf
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@
{
"id":"prefilter",
"type":"EXEC",
"cmd":"/nfs/users/nfs_j/js10/teepot-1.0.1/teepot -m2M __PF1__ __PF2__"
"cmd":"teepot -m2M __PF1__ __PF2__"
},

{
Expand Down Expand Up @@ -250,13 +250,13 @@
{
"id":"tee_split",
"type":"EXEC",
"cmd":"mbuffer -f -q -m 5M -o __PIPE1__ -o __PIPE2__"
"cmd":"teepot -m 5M __FILTERED_BAM_OUT__ __SPLIT_BAM_OUT__"
},

{
"id":"tee_decode",
"type":"EXEC",
"cmd":"/nfs/users/nfs_j/js10/teepot-1.0.1/teepot -m2M __TD1__ __TD2__"
"cmd":"teepot -m2M __TD1__ __TD2__"
},

{
Expand Down Expand Up @@ -401,13 +401,13 @@

{
"id":"tee_to_filtered_bam",
"from":"tee_split:__PIPE1__",
"from":"tee_split:__FILTERED_BAM_OUT__",
"to":"filtered_bam"
},

{
"id":"tee_to_sort",
"from":"tee_split:__PIPE2__",
"from":"tee_split:__SPLIT_BAM_OUT__",
"to":"splitter"
}

Expand Down
14 changes: 8 additions & 6 deletions data/bwamem_wtsi_stage2_template.vtf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"description":"Process DNA seq data in BAM files within NPG Pipeline producing WTSI DNAP Sequencing Informatics BAM output: full PG history, complete SQ lines, adapter marking, recoding indexing sequence etc",
"description":"Process DNA seq data in BAM files within second stage of NPG Pipeline producing WTSI DNAP Sequencing Informatics output",
"nodes":[
{
"id":"bmd_phix_multiway",
Expand Down Expand Up @@ -316,7 +316,7 @@
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": false,
"cmd":"teepot -w 300 -m 1G __OUT1__ __OUT2__"
"cmd":"teepot -w 300 -m 1G __PHIX_ALN_OUT__ __TGT_ALN_OUT__"
},
{
"id":"bamrecompress_input",
Expand Down Expand Up @@ -549,14 +549,15 @@
},
{
"id":"bammarkduplicates_phix",
"comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd":{"subst_param_name":"bammarkduplicates_phix",
"required":"yes",
"subst_constructor":{
"vals":[
{"subst_param_name":"bmd_cmd","required":"no","default":"bammarkduplicates2"},
{"subst_param_name":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"},
" level=0 ",
"verbose=0 ",
"M=",
Expand Down Expand Up @@ -620,14 +621,15 @@
},
{
"id":"bammarkduplicates_target",
"comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd":{"subst_param_name":"bammarkduplicates_target",
"required":"yes",
"subst_constructor":{
"vals":[
{"subst_param_name":"bmd_cmd","required":"no","default":"bammarkduplicates2"},
{"subst_param_name":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"},
" level=0 ",
"verbose=0 ",
"M=",
Expand Down Expand Up @@ -824,7 +826,7 @@
},
{
"id":"ti_to_brc",
"from":"tee_input:__OUT1__",
"from":"tee_input:__PHIX_ALN_OUT__",
"to":"bamrecompress_input"
},
{
Expand All @@ -834,7 +836,7 @@
},
{
"id":"ti_to_bamcollate2_ranking",
"from":"tee_input:__OUT2__",
"from":"tee_input:__TGT_ALN_OUT__",
"to":"bamcollate2_ranking"
},
{
Expand Down
8 changes: 4 additions & 4 deletions data/snap_sample_template.vtf
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@
{
"id":"tee_headerSQfix",
"type":"EXEC",
"cmd":"mbuffer -f -q -m 5M -o - -o __OUT1__",
"cmd":"teepot -m 5M __HEADER_FIX_OUT__ __FULL_BAM_OUT__",
"comment":"get deadlock when tee used here"
},
{
Expand All @@ -429,7 +429,7 @@
{
"id":"mbuffer_headerSQfix",
"type":"EXEC",
"cmd":"mbuffer -f -q -m 5M"
"cmd":"teepot -m 5M -"
},
{
"id":"reheader_headerSQfix",
Expand Down Expand Up @@ -785,7 +785,7 @@
},
{
"id":"tee_headerSQfix_to_sam",
"from":"tee_headerSQfix:__OUT1__",
"from":"tee_headerSQfix:__HEADER_FIX_OUT__",
"to":"sam_headerSQfix"
},
{
Expand All @@ -805,7 +805,7 @@
},
{
"id":"tee_headerSQfix_to_mbuffer",
"from":"tee_headerSQfix",
"from":"tee_headerSQfix:__FULL_BAM_OUT__",
"to":"mbuffer_headerSQfix"
},
{
Expand Down
26 changes: 14 additions & 12 deletions data/tophat2_wtsi_stage2_template.vtf
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"description":"Process RNASeq data in BAM files within NPG Pipeline producing WTSI DNAP Sequencing Informatics BAM output: full PG history, complete SQ lines, adapter marking, recoding indexing sequence etc",
"description":"Process RNASeq data in BAM files within second stage of NPG Pipeline producing WTSI DNAP Sequencing Informatics output",
"nodes":[
{
"id":"bmd_phix_multiway",
Expand Down Expand Up @@ -314,8 +314,8 @@
"id":"tee_input",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd":"tee __FIFO__"
"use_STDOUT": false,
"cmd":"teepot -w 300 -m 1G __PHIX_ALN_OUT__ __TGT_ALN_OUT__"
},
{
"id":"bamrecompress_input",
Expand Down Expand Up @@ -651,8 +651,8 @@
"id":"tee_headerSQfix",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd":"mbuffer -f -q -m 5M -o - -o __OUT1__",
"use_STDOUT": false,
"cmd":"teepot -m 5M __HEADER_FIX_OUT__ __FULL_BAM_OUT__",
"comment":"get deadlock when tee used here"
},
{
Expand All @@ -676,7 +676,7 @@
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd":"mbuffer -f -q -m 5M"
"cmd":"teepot -m 5M -"
},
{
"id":"reheader_headerSQfix",
Expand Down Expand Up @@ -785,14 +785,15 @@
},
{
"id":"bammarkduplicates_phix",
"comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd":{"subst_param_name":"bammarkduplicates_phix",
"required":"yes",
"subst_constructor":{
"vals":[
{"subst_param_name":"bmd_cmd","required":"no","default":"bammarkduplicates2"},
{"subst_param_name":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"},
" level=0 ",
"M=",
{"subst_param_name":"outdatadir","required":"no","default":"."},
Expand Down Expand Up @@ -855,14 +856,15 @@
},
{
"id":"bammarkduplicates_target",
"comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd":{"subst_param_name":"bammarkduplicates_target",
"required":"yes",
"subst_constructor":{
"vals":[
{"subst_param_name":"bmd_cmd","required":"no","default":"bammarkduplicates2"},
{"subst_param_name":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"},
" level=0 ",
"M=",
{"subst_param_name":"outdatadir","required":"no","default":"."},
Expand Down Expand Up @@ -1058,7 +1060,7 @@
},
{
"id":"ti_to_brc",
"from":"tee_input",
"from":"tee_input:__PHIX_ALN_OUT__",
"to":"bamrecompress_input"
},
{
Expand All @@ -1068,7 +1070,7 @@
},
{
"id":"ti_to_bamcollate2_ranking",
"from":"tee_input:__FIFO__",
"from":"tee_input:__TGT_ALN_OUT__",
"to":"bamcollate2_ranking"
},
{
Expand Down Expand Up @@ -1133,7 +1135,7 @@
},
{
"id":"tee_headerSQfix_to_sam",
"from":"tee_headerSQfix:__OUT1__",
"from":"tee_headerSQfix:__HEADER_FIX_OUT__",
"to":"sam_headerSQfix"
},
{
Expand All @@ -1153,7 +1155,7 @@
},
{
"id":"tee_headerSQfix_to_mbuffer",
"from":"tee_headerSQfix",
"from":"tee_headerSQfix:__FULL_BAM_OUT__",
"to":"mbuffer_headerSQfix"
},
{
Expand Down
12 changes: 5 additions & 7 deletions data/vtlib/README.vtlib
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ $ vtfp.pl -l aln_vtf.log -o aln.json \
-keys alignment_method -vals <ALIGNMENT_TEMPLATE_JSON> \
-keys reposdir -vals <REFERENCE_REPOSITORY_ROOT> \
-keys alignment_reference_genome_name -vals <PATH_TO_ALIGNER_REFERENCE_FROM_ROOT> \
-keys picard_reference_dict_name -vals <PATH_TO_PICARD_DICT_TARGET_REFERENCE_FROM_ROOT> \
-keys picard_dict_name_phix -vals <PATH_TO_PICARD_DICT_PHIX_REFERENCE_FROM_ROOT> \
-keys reference_dict_name -vals <PATH_TO_PICARD_DICT_TARGET_REFERENCE_FROM_ROOT> \
-keys reference_genome_fasta_name -vals <PATH_TO_FASTA_REFERENCE_FROM_ROOT> \
-keys phix_reference_genome_fasta_name -vals <PATH_TO_FASTA_PHIX_REFERENCE_FROM_ROOT> \
-keys aligner_numthreads -vals <NUMBER_OF_THREADS_USED_BY_ALIGNER> \
Expand All @@ -28,22 +27,21 @@ $ viv.pl -x -s -o v 3 -o viv_run.log aln.json
alignment_method - the name of the template file containing the alignment steps (without the ".json" suffix)
repos_dir - root of the reference repository, useful for when the various reference formats appear in a common location
alignment_reference_genome_name - path from the repos_dir to the reference needed by the selected aligner
picard_reference_dict_name - picard reference_dict for target, used to reconstruct bam header SQ lines after alignment
picard_dict_name_phix - picard reference_dict for phix, used to reconstruct bam header SQ lines after alignment
reference_dict_name - picard reference_dict for target, used to reconstruct bam header SQ lines after alignment
reference_genome_fasta_name - path from reposdir to fasta reference for target, used by scramble to create cram files
phix_reference_genome_fasta_name - path from reposdir to fasta reference for phix, used by scramble to create cram files
aligner_numthreads - number of threads used bny the aligner, for aligners which have this feature


Examples:
bwa aln:
$ vtfp.pl -l was2_bwa_aln.vtf.log -o was2_bwa_aln.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_aln -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_aln -keys reposdir -vals /paths/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json
$ vtfp.pl -l aws2_bwa_aln.vtf.log -o aws2_bwa_aln.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_aln -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_aln -keys reposdir -vals /paths/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa/hs37d5.fa -keys reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json

bwa mem:
$ vtfp.pl -l was2_bwa_mem.vtf.log -o was2_bwa_mem.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_mem -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_mem -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa0_6/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json
$ vtfp.pl -l aws2_bwa_mem.vtf.log -o aws2_bwa_mem.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_mem -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_mem -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa0_6/hs37d5.fa -keys reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json

tophat2:
$ vtfp.pl -l was2_tophat2.vtf.log -o was2_tophat2.json -keys indatadir -vals indata -keys outdatadir -vals outdata_tophat2 -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals tophat2 -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bowtie2/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json
$ vtfp.pl -l aws2_tophat2.vtf.log -o aws2_tophat2.json -keys indatadir -vals indata -keys outdatadir -vals outdata_tophat2 -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals tophat2 -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bowtie2/hs37d5.fa -keys reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json



Loading

0 comments on commit 4999234

Please sign in to comment.