diff --git a/bin/viv.pl b/bin/viv.pl index 0e088a905..efb325afc 100755 --- a/bin/viv.pl +++ b/bin/viv.pl @@ -99,7 +99,7 @@ if($to_node->{type} eq q[EXEC]) { $data_xfer_name = _create_fifo($edge->{from}); } - elsif($to_node->{subtype} eq q[DUMMY]) { + elsif(defined $to_node->{subtype} and $to_node->{subtype} eq q[DUMMY]) { $data_xfer_name = q[]; } else { @@ -126,7 +126,9 @@ # kick off any unblocked EXEC nodes, noting their details for later release of any dependants my %pid2id = (); for my $node_id (keys %exec_nodes) { - if($exec_nodes{$node_id}->{wait_counter} == 0 and not $exec_nodes{$node_id}->{pid}) { # green light - execute + my $wait_counter = $exec_nodes{$node_id}->{wait_counter}; + $wait_counter ||= 0; + if($wait_counter == 0 and not $exec_nodes{$node_id}->{pid}) { # green light - execute my $node = $exec_nodes{$node_id}; if((my $pid=_fork_off($node, $do_exec))) { @@ -176,7 +178,7 @@ }else{ $logger->($VLMED, sprintf(q[Child %s (pid: %d), return_status: %#04X, wifexited: %d (%#04X), wexitstatus: %s], $completed_node->{id}, $pid, $status, $wifexited, $wexitstatus, $wexitstatus), "\n"); - $logger->($VLMED, sprintf(q[Child %s (pid: %d), wifsignaled: %#04X, wtermsig: %s], $completed_node->{id}, $pid, $wifsignaled, $wtermsig), "\n"); + $logger->($VLMED, sprintf(q[Child %s (pid: %d), wifsignaled: %#04X, wtermsig: %s], $completed_node->{id}, $pid, $wifsignaled, ($wifsignaled? $wtermsig: q{NA})), "\n"); $logger->($VLMED, sprintf(q[Child %s (pid: %d), wifexited: %#04X, wexitstatus: %s], $completed_node->{id}, $pid, $wifexited, $wexitstatus), "\n"); if($dependants_list and @$dependants_list) { diff --git a/bin/vtfp.pl b/bin/vtfp.pl index 14a470d78..f16893ea1 100755 --- a/bin/vtfp.pl +++ b/bin/vtfp.pl @@ -281,7 +281,7 @@ sub do_substitutions { my $subst_value = make_substitutions($subst_param_name, $substitutable_params, \%subst_requests, $query_mode); if($query_mode) { - print $out join(qq[\t], ($subst_param_name, ($substitutable_params->{$subst_param_name}->{required}? q[required]: q[not_required]), $substitutable_params->{$subst_param_name}->{parent_id}, $substitutable_params->{$subst_param_name,}->{attrib_name}, )), "\n"; + print $out join(qq[\t], ($subst_param_name, ($substitutable_params->{$subst_param_name}->{required}? q[required]: q[not_required]), $substitutable_params->{$subst_param_name}->{parent_id}, $substitutable_params->{$subst_param_name}->{attrib_name}, )), "\n"; } } @@ -380,9 +380,6 @@ sub resolve_subst_to_string { if(not defined $subst_value) { # do a little unpacking for readability - my $attrib_name = $subst_param->{attrib_name}; - my $elem_index = $subst_param->{elem_index}; - $attrib_name ||= "element $elem_index"; my $subst_param_name = $subst_param->{param_name}; my $parent_id = $subst_param->{parent_id}; $parent_id ||= q[NO_PARENT_ID]; # should be ARRAY? @@ -390,13 +387,13 @@ sub resolve_subst_to_string { if($subst_param->{required} and not $query_mode) { # required means "must be specified by the caller", so default value is disregarded # $logger->($VLFATAL, q[No substitution specified for required substitutable param (], $subst_param_name, q[ for ], $attrib_name, q[ in ], $parent_id, q[) - use -q for full list of substitutable parameters]); # NOTE: the decision to fail can only be decided at the top level of the subst_param structure - $logger->($VLMIN, q[No substitution specified for required substitutable param ], $subst_param_name, q[ for ], $attrib_name, q[ in ], $parent_id); + $logger->($VLMIN, q[No substitution specified for required substitutable param ], $subst_param_name); return; } $subst_value = $subst_param->{default_value}; if(not defined $subst_value) { - $logger->($VLMIN, q[No default value specified for apparent substitutable param (], $subst_param_name, q[ for ], $attrib_name, q[ in ], $parent_id, q[)]); + $logger->($VLMIN, q[No default value specified for apparent substitutable param ], $subst_param_name); } } @@ -422,10 +419,8 @@ sub resolve_subst_array { my ($subst_param, $subst_value, $query_mode) = @_; if(ref $subst_value ne q[ARRAY]) { - $logger->($VLMIN, q[Attempt to substitute array for non-array in substitutable param (], - $subst_param->{param_name}, - q[ for ], $subst_param->{attrib_name}, - q[ in ], ($subst_param->{parent_id}? $subst_param->{parent_id}: q[UNNAMED_PARENT]), q[)]); + $logger->($VLMIN, q[Attempt to substitute array for non-array in substitutable param ], + $subst_param->{param_name}); return; } @@ -440,17 +435,12 @@ sub resolve_subst_array { } else { if($subst_param->{required}) { - $logger->($VLFATAL, q[No substitution specified for required substitutable param (], - $subst_param->{param_name}, - q[ for ], $subst_param->{attrib_name}, - q[ in ], ($subst_param->{parent_id}? $subst_param->{parent_id}: q[UNNAMED_PARENT]), - q[) - use -q for full list of substitutable parameters]); + $logger->($VLFATAL, q[No substitution specified for required substitutable param ], + $subst_param->{param_name}); } else { - $logger->($VLMIN, q[No default value specified for apparent substitutable param (], - $subst_param->{param_name}, - q[ for ], $subst_param->{attrib_name}, - q[ in ], ($subst_param->{parent_id}? $subst_param->{parent_id}: q[UNNAMED_PARENT]), q[)]); + $logger->($VLMIN, q[No default value specified for apparent substitutable param ], + $subst_param->{param_name}); return; } } diff --git a/data/bcl2bam_phix_deplex_wtsi_stage1_template.vtf b/data/bcl2bam_phix_deplex_wtsi_stage1_template.vtf index a68268065..2303cad65 100644 --- a/data/bcl2bam_phix_deplex_wtsi_stage1_template.vtf +++ b/data/bcl2bam_phix_deplex_wtsi_stage1_template.vtf @@ -218,7 +218,7 @@ { "id":"prefilter", "type":"EXEC", - "cmd":"/nfs/users/nfs_j/js10/teepot-1.0.1/teepot -m2M __PF1__ __PF2__" + "cmd":"teepot -m2M __PF1__ __PF2__" }, { @@ -250,13 +250,13 @@ { "id":"tee_split", "type":"EXEC", - "cmd":"mbuffer -f -q -m 5M -o __PIPE1__ -o __PIPE2__" + "cmd":"teepot -m 5M __FILTERED_BAM_OUT__ __SPLIT_BAM_OUT__" }, { "id":"tee_decode", "type":"EXEC", - "cmd":"/nfs/users/nfs_j/js10/teepot-1.0.1/teepot -m2M __TD1__ __TD2__" + "cmd":"teepot -m2M __TD1__ __TD2__" }, { @@ -401,13 +401,13 @@ { "id":"tee_to_filtered_bam", - "from":"tee_split:__PIPE1__", + "from":"tee_split:__FILTERED_BAM_OUT__", "to":"filtered_bam" }, { "id":"tee_to_sort", - "from":"tee_split:__PIPE2__", + "from":"tee_split:__SPLIT_BAM_OUT__", "to":"splitter" } diff --git a/data/bwamem_wtsi_stage2_template.vtf b/data/bwamem_wtsi_stage2_template.vtf index 47f440b8b..8aa64bdd4 100644 --- a/data/bwamem_wtsi_stage2_template.vtf +++ b/data/bwamem_wtsi_stage2_template.vtf @@ -1,5 +1,5 @@ { -"description":"Process DNA seq data in BAM files within NPG Pipeline producing WTSI DNAP Sequencing Informatics BAM output: full PG history, complete SQ lines, adapter marking, recoding indexing sequence etc", +"description":"Process DNA seq data in BAM files within second stage of NPG Pipeline producing WTSI DNAP Sequencing Informatics output", "nodes":[ { "id":"bmd_phix_multiway", @@ -316,7 +316,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": false, - "cmd":"teepot -w 300 -m 1G __OUT1__ __OUT2__" + "cmd":"teepot -w 300 -m 1G __PHIX_ALN_OUT__ __TGT_ALN_OUT__" }, { "id":"bamrecompress_input", @@ -549,6 +549,7 @@ }, { "id":"bammarkduplicates_phix", + "comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, @@ -556,7 +557,7 @@ "required":"yes", "subst_constructor":{ "vals":[ - {"subst_param_name":"bmd_cmd","required":"no","default":"bammarkduplicates2"}, + {"subst_param_name":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"}, " level=0 ", "verbose=0 ", "M=", @@ -620,6 +621,7 @@ }, { "id":"bammarkduplicates_target", + "comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, @@ -627,7 +629,7 @@ "required":"yes", "subst_constructor":{ "vals":[ - {"subst_param_name":"bmd_cmd","required":"no","default":"bammarkduplicates2"}, + {"subst_param_name":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"}, " level=0 ", "verbose=0 ", "M=", @@ -824,7 +826,7 @@ }, { "id":"ti_to_brc", - "from":"tee_input:__OUT1__", + "from":"tee_input:__PHIX_ALN_OUT__", "to":"bamrecompress_input" }, { @@ -834,7 +836,7 @@ }, { "id":"ti_to_bamcollate2_ranking", - "from":"tee_input:__OUT2__", + "from":"tee_input:__TGT_ALN_OUT__", "to":"bamcollate2_ranking" }, { diff --git a/data/snap_sample_template.vtf b/data/snap_sample_template.vtf index 14c4fab35..7f3c57cc9 100644 --- a/data/snap_sample_template.vtf +++ b/data/snap_sample_template.vtf @@ -411,7 +411,7 @@ { "id":"tee_headerSQfix", "type":"EXEC", - "cmd":"mbuffer -f -q -m 5M -o - -o __OUT1__", + "cmd":"teepot -m 5M __HEADER_FIX_OUT__ __FULL_BAM_OUT__", "comment":"get deadlock when tee used here" }, { @@ -429,7 +429,7 @@ { "id":"mbuffer_headerSQfix", "type":"EXEC", - "cmd":"mbuffer -f -q -m 5M" + "cmd":"teepot -m 5M -" }, { "id":"reheader_headerSQfix", @@ -785,7 +785,7 @@ }, { "id":"tee_headerSQfix_to_sam", - "from":"tee_headerSQfix:__OUT1__", + "from":"tee_headerSQfix:__HEADER_FIX_OUT__", "to":"sam_headerSQfix" }, { @@ -805,7 +805,7 @@ }, { "id":"tee_headerSQfix_to_mbuffer", - "from":"tee_headerSQfix", + "from":"tee_headerSQfix:__FULL_BAM_OUT__", "to":"mbuffer_headerSQfix" }, { diff --git a/data/tophat2_wtsi_stage2_template.vtf b/data/tophat2_wtsi_stage2_template.vtf index aa0f2dca8..c1acbae3e 100644 --- a/data/tophat2_wtsi_stage2_template.vtf +++ b/data/tophat2_wtsi_stage2_template.vtf @@ -1,5 +1,5 @@ { -"description":"Process RNASeq data in BAM files within NPG Pipeline producing WTSI DNAP Sequencing Informatics BAM output: full PG history, complete SQ lines, adapter marking, recoding indexing sequence etc", +"description":"Process RNASeq data in BAM files within second stage of NPG Pipeline producing WTSI DNAP Sequencing Informatics output", "nodes":[ { "id":"bmd_phix_multiway", @@ -314,8 +314,8 @@ "id":"tee_input", "type":"EXEC", "use_STDIN": true, - "use_STDOUT": true, - "cmd":"tee __FIFO__" + "use_STDOUT": false, + "cmd":"teepot -w 300 -m 1G __PHIX_ALN_OUT__ __TGT_ALN_OUT__" }, { "id":"bamrecompress_input", @@ -651,8 +651,8 @@ "id":"tee_headerSQfix", "type":"EXEC", "use_STDIN": true, - "use_STDOUT": true, - "cmd":"mbuffer -f -q -m 5M -o - -o __OUT1__", + "use_STDOUT": false, + "cmd":"teepot -m 5M __HEADER_FIX_OUT__ __FULL_BAM_OUT__", "comment":"get deadlock when tee used here" }, { @@ -676,7 +676,7 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"mbuffer -f -q -m 5M" + "cmd":"teepot -m 5M -" }, { "id":"reheader_headerSQfix", @@ -785,6 +785,7 @@ }, { "id":"bammarkduplicates_phix", + "comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, @@ -792,7 +793,7 @@ "required":"yes", "subst_constructor":{ "vals":[ - {"subst_param_name":"bmd_cmd","required":"no","default":"bammarkduplicates2"}, + {"subst_param_name":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"}, " level=0 ", "M=", {"subst_param_name":"outdatadir","required":"no","default":"."}, @@ -855,6 +856,7 @@ }, { "id":"bammarkduplicates_target", + "comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174", "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, @@ -862,7 +864,7 @@ "required":"yes", "subst_constructor":{ "vals":[ - {"subst_param_name":"bmd_cmd","required":"no","default":"bammarkduplicates2"}, + {"subst_param_name":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"}, " level=0 ", "M=", {"subst_param_name":"outdatadir","required":"no","default":"."}, @@ -1058,7 +1060,7 @@ }, { "id":"ti_to_brc", - "from":"tee_input", + "from":"tee_input:__PHIX_ALN_OUT__", "to":"bamrecompress_input" }, { @@ -1068,7 +1070,7 @@ }, { "id":"ti_to_bamcollate2_ranking", - "from":"tee_input:__FIFO__", + "from":"tee_input:__TGT_ALN_OUT__", "to":"bamcollate2_ranking" }, { @@ -1133,7 +1135,7 @@ }, { "id":"tee_headerSQfix_to_sam", - "from":"tee_headerSQfix:__OUT1__", + "from":"tee_headerSQfix:__HEADER_FIX_OUT__", "to":"sam_headerSQfix" }, { @@ -1153,7 +1155,7 @@ }, { "id":"tee_headerSQfix_to_mbuffer", - "from":"tee_headerSQfix", + "from":"tee_headerSQfix:__FULL_BAM_OUT__", "to":"mbuffer_headerSQfix" }, { diff --git a/data/vtlib/README.vtlib b/data/vtlib/README.vtlib index 9c62b00d4..9d9e3e446 100644 --- a/data/vtlib/README.vtlib +++ b/data/vtlib/README.vtlib @@ -10,8 +10,7 @@ $ vtfp.pl -l aln_vtf.log -o aln.json \ -keys alignment_method -vals \ -keys reposdir -vals \ -keys alignment_reference_genome_name -vals \ --keys picard_reference_dict_name -vals \ --keys picard_dict_name_phix -vals \ +-keys reference_dict_name -vals \ -keys reference_genome_fasta_name -vals \ -keys phix_reference_genome_fasta_name -vals \ -keys aligner_numthreads -vals \ @@ -28,8 +27,7 @@ $ viv.pl -x -s -o v 3 -o viv_run.log aln.json alignment_method - the name of the template file containing the alignment steps (without the ".json" suffix) repos_dir - root of the reference repository, useful for when the various reference formats appear in a common location alignment_reference_genome_name - path from the repos_dir to the reference needed by the selected aligner - picard_reference_dict_name - picard reference_dict for target, used to reconstruct bam header SQ lines after alignment - picard_dict_name_phix - picard reference_dict for phix, used to reconstruct bam header SQ lines after alignment + reference_dict_name - picard reference_dict for target, used to reconstruct bam header SQ lines after alignment reference_genome_fasta_name - path from reposdir to fasta reference for target, used by scramble to create cram files phix_reference_genome_fasta_name - path from reposdir to fasta reference for phix, used by scramble to create cram files aligner_numthreads - number of threads used bny the aligner, for aligners which have this feature @@ -37,13 +35,13 @@ $ viv.pl -x -s -o v 3 -o viv_run.log aln.json Examples: bwa aln: -$ vtfp.pl -l was2_bwa_aln.vtf.log -o was2_bwa_aln.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_aln -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_aln -keys reposdir -vals /paths/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json +$ vtfp.pl -l aws2_bwa_aln.vtf.log -o aws2_bwa_aln.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_aln -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_aln -keys reposdir -vals /paths/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa/hs37d5.fa -keys reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json bwa mem: -$ vtfp.pl -l was2_bwa_mem.vtf.log -o was2_bwa_mem.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_mem -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_mem -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa0_6/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json +$ vtfp.pl -l aws2_bwa_mem.vtf.log -o aws2_bwa_mem.json -keys indatadir -vals indata -keys outdatadir -vals outdata_bwa_mem -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals bwa_mem -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bwa0_6/hs37d5.fa -keys reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json tophat2: -$ vtfp.pl -l was2_tophat2.vtf.log -o was2_tophat2.json -keys indatadir -vals indata -keys outdatadir -vals outdata_tophat2 -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals tophat2 -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bowtie2/hs37d5.fa -keys picard_reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys picard_dict_name_phix -vals PhiX/default/all/picard/phix_unsnipped_short_no_N.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json +$ vtfp.pl -l aws2_tophat2.vtf.log -o aws2_tophat2.json -keys indatadir -vals indata -keys outdatadir -vals outdata_tophat2 -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals tophat2 -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bowtie2/hs37d5.fa -keys reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json diff --git a/data/vtlib/alignment_wtsi_stage2_template.json b/data/vtlib/alignment_wtsi_stage2_template.json index 940d17f12..ddb8cb477 100644 --- a/data/vtlib/alignment_wtsi_stage2_template.json +++ b/data/vtlib/alignment_wtsi_stage2_template.json @@ -1,5 +1,5 @@ { -"description":"Process RNASeq data in BAM files within NPG Pipeline producing WTSI DNAP Sequencing Informatics BAM output: full PG history, complete SQ lines, adapter marking, recoding indexing sequence etc", +"description":"Process DNA (or RNA) seq data in BAM files within second stage of NPG Pipeline producing WTSI DNAP Sequencing Informatics output", "nodes":[ { "id":"src_bam", @@ -30,8 +30,9 @@ "id":"tee0", "type":"EXEC", "use_STDIN": true, - "use_STDOUT": true, - "cmd":"mbuffer -f -o __PHIX_ALN__ -o __TGT_ALN__ " + "use_STDOUT": false, + "comment":"large wait (500 minutes) to avoid unnecessary spill to disk", + "cmd":"teepot -w 30000 -m 1G __PHIX_ALN_OUT__ __TGT_ALN_OUT__" }, { "id":"pre_alignment_target", @@ -84,15 +85,15 @@ "description":"subgraph containing alignment process" }, { - "id":"picard_reference_dict", + "id":"reference_dict", "type":"INFILE", - "name":{"subst_param_name":"picard_reference_dict", + "name":{"subst_param_name":"reference_dict", "required":"yes", "subst_constructor":{ "vals":[ {"subst_param_name":"reposdir","required":"no","default":"."}, "/", - {"subst_param_name":"picard_reference_dict_name","required":"yes"} + {"subst_param_name":"reference_dict_name","required":"yes"} ], "postproc":{"op":"concat", "pad":""} } @@ -240,7 +241,7 @@ { "id":"final_output_prep_target", "type":"VTFILE", - "subst_map":{"phix_or_target":"target","bstmp":"bspaft","brtmp":"brpaft","bmdtmp":"bmdpaft"}, + "subst_map":{"phix_or_target":"","bstmp":"bspaft","brtmp":"brpaft","bmdtmp":"bmdpaft"}, "name":{"subst_param_name":"final_output_prep_target", "required":"yes", "subst_constructor":{ @@ -274,7 +275,7 @@ { "id":"final_output_prep_phix", "type":"VTFILE", - "subst_map":{"phix_or_target":"phix","bstmp":"bspaft","brtmp":"brpaft","bmdtmp":"bmdpaft"}, + "subst_map":{"phix_or_target":"_phix","bstmp":"bspaft","brtmp":"brpaft","bmdtmp":"bmdpaft"}, "name":{"subst_param_name":"final_output_prep_phix", "required":"yes", "subst_constructor":{ @@ -338,6 +339,22 @@ } } }, + { + "id":"cram_md5", + "type":"OUTFILE", + "name":{"subst_param_name":"cram_md5", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + ".cram.md5" + ], + "postproc":{"op":"concat", "pad":""} + } + } + }, { "id":"phix_cram", "type":"OUTFILE", @@ -354,6 +371,22 @@ } } }, + { + "id":"phix_cram_md5", + "type":"OUTFILE", + "name":{"subst_param_name":"phix_cram_md5", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + "_phix.cram.md5" + ], + "postproc":{"op":"concat", "pad":""} + } + } + }, { "id":"out_bamcheck", "type":"OUTFILE", @@ -370,6 +403,38 @@ } } }, + { + "id":"out_stats_F0x900", + "type":"OUTFILE", + "name":{"subst_param_name":"out_stats_F0x900", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + "_F0x900.stats" + ], + "postproc":{"op":"concat", "pad":""} + } + } + }, + { + "id":"out_stats_F0xB00", + "type":"OUTFILE", + "name":{"subst_param_name":"out_stats_F0xB00", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + "_F0xB00.stats" + ], + "postproc":{"op":"concat", "pad":""} + } + } + }, { "id":"out_phix_bamcheck", "type":"OUTFILE", @@ -386,6 +451,39 @@ } } }, + { + "id":"out_phix_stats_F0x900", + "type":"OUTFILE", + "name":{"subst_param_name":"out_phix_stats_F0x900", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + "_phix_F0x900.stats" + ], + "postproc":{"op":"concat", "pad":""} + } + } + }, + { + "id":"out_phix_stats_F0xB00", + "type":"OUTFILE", + "name":{"subst_param_name":"out_phix_stats_F0xB00", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + "_phix_F0xB00.stats" + ], + "postproc":{"op":"concat", "pad":""} + } + } + }, + { "id":"out_flagstat", "type":"OUTFILE", @@ -434,7 +532,39 @@ } }, "description":"subgraph containing seqchksum validation of outputs" - } + }, + { + "id":"phix_chksum", + "type":"OUTFILE", + "name":{"subst_param_name":"phix_chksum", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + "_phix.seqchksum" + ], + "postproc":{"op":"concat", "pad":""} + } + } + }, + { + "id":"target_chksum", + "type":"OUTFILE", + "name":{"subst_param_name":"target_chksum", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + ".seqchksum" + ], + "postproc":{"op":"concat", "pad":""} + } + } + } ], "edges":[ { @@ -448,8 +578,8 @@ "to":"tee0" }, { - "id":"t0_to_bamcollate2_ranking", - "from":"tee0:__PHIX_ALN__", + "id":"t0_to_bamcollate2_ranking_phix", + "from":"tee0:__PHIX_ALN_OUT__", "to":"bamrecompress_input" }, { @@ -469,7 +599,7 @@ }, { "id":"t0_to_bamcollate2_ranking", - "from":"tee0:__TGT_ALN__", + "from":"tee0:__TGT_ALN_OUT__", "to":"pre_alignment_target" }, { @@ -488,9 +618,9 @@ "to":"post_alignment_target" }, { - "id":"picard_reference_dict_to_post_alignment", - "from":"picard_reference_dict", - "to":"post_alignment_target:reference_picard_dict" + "id":"reference_dict_to_post_alignment", + "from":"reference_dict", + "to":"post_alignment_target:reference_dict" }, { "id":"post_alignment_to_alignment_filter", @@ -513,7 +643,7 @@ "to":"final_output_prep_target" }, { - "id":"af_to_paf_target", + "id":"af_to_paf_phix", "from":"alignment_filter:__PHIX_OUTBAM__", "to":"final_output_prep_phix" }, @@ -533,7 +663,7 @@ "to":"target_bam" }, { - "id":"paf_to_bam", + "id":"paf_to_bam_phix", "from":"final_output_prep_phix", "to":"phix_bam" }, @@ -542,21 +672,53 @@ "from":"final_output_prep_target:out_cram", "to":"cram" }, + { + "id":"scramble_to_cram_md5", + "from":"final_output_prep_target:out_cram_md5", + "to":"cram_md5" + }, { "id":"scramble_to_phix_cram", "from":"final_output_prep_phix:out_cram", "to":"phix_cram" }, { - "id":"bamcheck_to_out", + "id":"scramble_to_phix_cram_md5", + "from":"final_output_prep_phix:out_cram_md5", + "to":"phix_cram_md5" + }, + { + "id":"bamcheck_out_target", "from":"final_output_prep_target:out_bamcheck", "to":"out_bamcheck" }, { - "id":"bamcheck_to_phix", + "id":"samtools_stats_F0x900_out_target", + "from":"final_output_prep_target:out_stats_F0x900", + "to":"out_stats_F0x900" + }, + { + "id":"samtools_stats_F0xB00_out_target", + "from":"final_output_prep_target:out_stats_F0xB00", + "to":"out_stats_F0xB00" + }, + + { + "id":"bamcheck_out_phix", "from":"final_output_prep_phix:out_bamcheck", "to":"out_phix_bamcheck" }, + { + "id":"samtools_stats_F0x900_out_phix", + "from":"final_output_prep_phix:out_stats_F0x900", + "to":"out_phix_stats_F0x900" + }, + { + "id":"samtools_stats_F0xB00_out_phix", + "from":"final_output_prep_phix:out_stats_F0xB00", + "to":"out_phix_stats_F0xB00" + }, + { "id":"flagstat_to_out", "from":"final_output_prep_target:out_flagstat", @@ -581,6 +743,16 @@ "id":"phix_bam_to_seqchksum", "from":"phix_bam", "to":"seqchksum:phix_bam" - } + }, + { + "id":"target_chksum_to_file", + "from":"seqchksum:target_chksum", + "to":"target_chksum" + }, + { + "id":"phix_chksum_to_file", + "from":"seqchksum:phix_chksum", + "to":"phix_chksum" + } ] } diff --git a/data/vtlib/bwa_aln_alignment.json b/data/vtlib/bwa_aln_alignment.json index 0829b5c0f..3bef77d63 100644 --- a/data/vtlib/bwa_aln_alignment.json +++ b/data/vtlib/bwa_aln_alignment.json @@ -112,16 +112,6 @@ "from":"bwa_aln_start_bam", "to":"bwa_aln_2:__BAM_IN__" }, - { - "id":"ref_to_bwa_aln_1", - "from":"reference_fasta:__ALN1_REF__", - "to":"bwa_aln_1:__REFERENCE_GENOME_FASTA__" - }, - { - "id":"ref_to_bwa_aln2", - "from":"reference_fasta:__ALN2_REF__", - "to":"bwa_aln_2:__REFERENCE_GENOME_FASTA__" - }, { "id":"bwa_sampe_bam1", "from":"bwa_aln_start_bam", @@ -142,11 +132,6 @@ "from":"bwa_aln_2", "to":"bwa_sampe:__SAI_2__" }, - { - "id":"ref_to_sampe", - "from":"reference_fasta:__SAMPE_REF__", - "to":"bwa_sampe:__REFERENCE_GENOME_FASTA__" - }, { "id":"bwa_sampe_to_scramble", "from":"bwa_sampe", diff --git a/data/vtlib/final_output_prep.json b/data/vtlib/final_output_prep.json index c8dd3a637..c5c7090f1 100644 --- a/data/vtlib/final_output_prep.json +++ b/data/vtlib/final_output_prep.json @@ -8,8 +8,11 @@ }, "outputs":{ "_stdout_":"bamrecompress", - "out_cram":"scramble", + "out_cram":"scramble_tee:__CRAM_OUT__", + "out_cram_md5":"scramble_md5", "out_bamcheck":"bamcheck", + "out_stats_F0x900":"samtools_stats_F0x900", + "out_stats_F0xB00":"samtools_stats_F0xB00", "out_flagstat":"flagstat" } } @@ -18,6 +21,8 @@ { "id":"bamsort_coord", "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, "cmd":{"subst_param_name":"bamsort_coord", "required":"yes", "subst_constructor":{ @@ -38,7 +43,6 @@ {"subst_param_name":"bstmp","required":"yes"}, "_", {"subst_param_name":"rpt","required":"yes"}, - "_", {"subst_param_name":"phix_or_target","required":"yes"}, ".tmp" ], @@ -52,12 +56,15 @@ }, { "id":"bammarkduplicates", + "comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174", "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, "cmd":{"subst_param_name":"bammarkduplicates", "required":"yes", "subst_constructor":{ "vals":[ - {"subst_param_name":"bmd_cmd","required":"no","default":"bammarkduplicates2"}, + {"subst_param_name":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"}, " level=0", "verbose=0", {"subst_param_name":"bmd_tmpfile_flag", @@ -70,7 +77,6 @@ {"subst_param_name":"bmdtmp","required":"yes"}, "_", {"subst_param_name":"rpt","required":"yes"}, - "_", {"subst_param_name":"phix_or_target","required":"yes"}, ".tmp" ], @@ -85,7 +91,6 @@ {"subst_param_name":"outdatadir","required":"no","default":"."}, "/", {"subst_param_name":"rpt","required":"yes"}, - "_", {"subst_param_name":"phix_or_target","required":"yes"}, ".markdups_metrics.txt" ], @@ -100,19 +105,78 @@ { "id":"bmd_multiway", "type":"EXEC", - "cmd":"teepot -w 300 __SCRAMBLE_OUT__ __BAMCHECK_OUT__ __FLAGSTAT_OUT__ __CALIBRATION_PU_OUT__ __BAM_OUT__" + "use_STDIN": true, + "use_STDOUT": false, + "cmd":"teepot -w 300 __SCRAMBLE_OUT__ __BAMCHECK_OUT__ __FLAGSTAT_OUT__ __CALIBRATION_PU_OUT__ __BAM_OUT__ __SAMTOOLS_STATS_F0x900__ __SAMTOOLS_STATS_F0xB00__" }, { "id":"scramble", "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, "cmd":"scramble -I bam -O cram -r __REFERENCE_GENOME_FASTA__" }, - + { + "id":"scramble_tee", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": false, + "cmd":"teepot -w 30000 __CRAM_OUT__ __MD5_OUT__", + "comment":"allow a generous 500 minutes for the teepot timeout" + }, + { + "id":"scramble_md5", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":"md5sum" + }, { "id":"bamcheck", "type":"EXEC", - "cmd":"bamcheck" + "use_STDIN": true, + "use_STDOUT": true, + "cmd":"bamcheck -F 0x900" + }, + { + "id":"samtools_stats_F0x900", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":{"subst_param_name":"samtools_stats_F0x900", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"samtools_executable","required":"no","default":"samtools"}, + "stats", + "-F", + {"subst_param_name":"stats_filter__F0x900","required":"no","default":"0x900"}, + "-" + ], + "postproc":{"op":"pack","pad":" "} + } + } }, + { + "id":"samtools_stats_F0xB00", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":{"subst_param_name":"samtools_stats_F0xB00", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"samtools_executable","required":"no","default":"samtools"}, + "stats", + "-F", + {"subst_param_name":"stats_filter__F0xB00","required":"no","default":"0xB00"}, + "-" + ], + "postproc":{"op":"pack","pad":" "} + } + } + }, + { "id":"calibration_pu", "type":"EXEC", @@ -131,6 +195,7 @@ {"subst_param_name":"outdatadir","required":"no","default":"."}, "/", {"subst_param_name":"rpt","required":"yes"}, + {"subst_param_name":"phix_or_target","required":"yes"}, {"subst_param_name":"bam_ext","required":"no","default":".bam"} ], "postproc":{"op":"concat", "pad":""} @@ -146,6 +211,8 @@ { "id":"bamrecompress", "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, "cmd":{"subst_param_name":"bamrecompress", "required":"yes", "subst_constructor":{ @@ -160,7 +227,9 @@ "indexfilename=", {"subst_param_name":"outdatadir","required":"no","default":"."}, "/", - {"subst_param_name":"rpt","required":"yes"}, ".bai" + {"subst_param_name":"rpt","required":"yes"}, + {"subst_param_name":"phix_or_target","required":"no","default":""}, + ".bai" ], "postproc":{"op":"concat", "pad":""} } @@ -173,7 +242,9 @@ "md5filename=", {"subst_param_name":"outdatadir","required":"no","default":"."}, "/", - {"subst_param_name":"rpt","required":"yes"}, ".bam.md5" + {"subst_param_name":"rpt","required":"yes"}, + {"subst_param_name":"phix_or_target","required":"no","default":""}, + ".bam.md5" ], "postproc":{"op":"concat", "pad":""} } @@ -199,7 +270,6 @@ {"subst_param_name":"brtmp","required":"yes"}, "_", {"subst_param_name":"rpt","required":"yes"}, - "_", {"subst_param_name":"phix_or_target","required":"yes"}, ".tmp" ], @@ -214,13 +284,40 @@ { "id":"flagstat_filter", "type":"EXEC", - "cmd":"samtools view -u -F 0x900 -", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":{"subst_param_name":"samtools_flagstat_filter", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"samtools_executable","required":"no","default":"samtools"}, + "view", + "-u", + "-F", + {"subst_param_name":"flagstats_filter_flag","required":"no","default":"0x900"}, + "-" + ], + "postproc":{"op":"pack","pad":" "} + } + }, "description":"Filter out secondary and supplementary alignment records" }, { "id":"flagstat", "type":"EXEC", - "cmd":"samtools flagstat -" + "use_STDIN": true, + "use_STDOUT": true, + "cmd":{"subst_param_name":"samtools_flagstat_cmd", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"samtools_executable","required":"no","default":"samtools"}, + "flagstat", + "-" + ], + "postproc":{"op":"pack","pad":" "} + } + } } ], "edges":[ @@ -235,15 +332,35 @@ "to":"bmd_multiway" }, { - "id":"bmd_to_scramble", + "id":"bmdmw_to_scramble", "from":"bmd_multiway:__SCRAMBLE_OUT__", "to":"scramble" }, + { + "id":"scramble_to_scramble_tee", + "from":"scramble", + "to":"scramble_tee" + }, + { + "id":"scramble_tee_to_md5", + "from":"scramble_tee:__MD5_OUT__", + "to":"scramble_md5" + }, { "id":"bmd_to_bamcheck", "from":"bmd_multiway:__BAMCHECK_OUT__", "to":"bamcheck" }, + { + "id":"bmd_to_sts_F0x900", + "from":"bmd_multiway:__SAMTOOLS_STATS_F0x900__", + "to":"samtools_stats_F0x900" + }, + { + "id":"bmd_to_sts_F0xB00", + "from":"bmd_multiway:__SAMTOOLS_STATS_F0xB00__", + "to":"samtools_stats_F0xB00" + }, { "id":"bmd_to_calibration_pu", "from":"bmd_multiway:__CALIBRATION_PU_OUT__", diff --git a/data/vtlib/post_alignment.json b/data/vtlib/post_alignment.json index 1b4d45c68..a37e339d1 100644 --- a/data/vtlib/post_alignment.json +++ b/data/vtlib/post_alignment.json @@ -4,7 +4,7 @@ "ports":{ "inputs":{ "_stdin_":"tee_headerSQfix", - "reference_picard_dict":"alterSQ_headerSQfix:__IN_PICARD_DICT__", + "reference_dict":"alterSQ_headerSQfix:__IN_PICARD_DICT__", "no_align_bam":"bam12auxmerge:__NO_ALN_BAM__" }, "outputs":{ @@ -17,8 +17,8 @@ "id":"tee_headerSQfix", "type":"EXEC", "use_STDIN": true, - "use_STDOUT": true, - "cmd":"mbuffer -f -q -m 5M -o - -o __OUT1__", + "use_STDOUT": false, + "cmd":"teepot -w 300 -m 5M __HEADER_FIX_OUT__ __FULL_BAM_OUT__", "comment":"get deadlock when tee used here" }, { @@ -26,7 +26,18 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"samtools view -h -" + "cmd":{"subst_param_name":"samtools_view", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"samtools_executable","required":"no","default":"samtools"}, + "view", + "-h", + "-" + ], + "postproc":{"op":"pack","pad":" "} + } + } }, { "id":"alterSQ_headerSQfix", @@ -42,14 +53,25 @@ "type":"EXEC", "use_STDIN": true, "use_STDOUT": true, - "cmd":"mbuffer -f -q -m 5M" + "cmd":"teepot -m 5M -" }, { "id":"reheader_headerSQfix", "type":"EXEC", "use_STDIN": false, "use_STDOUT": true, - "cmd":"samtools reheader __IN_SAMHEADER__ __IN_BAM__" + "cmd":{"subst_param_name":"samtools_reheader", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"samtools_executable","required":"no","default":"samtools"}, + "reheader", + "__IN_SAMHEADER__", + "__IN_BAM__" + ], + "postproc":{"op":"pack","pad":" "} + } + } }, { "id":"bam12split", @@ -76,7 +98,7 @@ "edges":[ { "id":"tee_headerSQfix_to_sam", - "from":"tee_headerSQfix:__OUT1__", + "from":"tee_headerSQfix:__HEADER_FIX_OUT__", "to":"sam_headerSQfix" }, { @@ -91,7 +113,7 @@ }, { "id":"tee_headerSQfix_to_mbuffer", - "from":"tee_headerSQfix", + "from":"tee_headerSQfix:__FULL_BAM_OUT__", "to":"mbuffer_headerSQfix" }, { diff --git a/data/vtlib/realignment_wtsi_template.json b/data/vtlib/realignment_wtsi_template.json index 174ab7fe9..c23372290 100644 --- a/data/vtlib/realignment_wtsi_template.json +++ b/data/vtlib/realignment_wtsi_template.json @@ -30,8 +30,8 @@ "id":"tee0", "type":"EXEC", "use_STDIN": true, - "use_STDOUT": true, - "cmd":"mbuffer -f -o __PHIX_ALN__ -o __TGT_ALN__ " + "use_STDOUT": false, + "cmd":"teepot __PHIX_ALN_OUT__ __TGT_ALN_OUT__ " }, { "id":"pre_alignment_target", @@ -84,15 +84,15 @@ "description":"subgraph containing alignment process" }, { - "id":"picard_reference_dict", + "id":"reference_dict", "type":"INFILE", - "name":{"subst_param_name":"picard_reference_dict", + "name":{"subst_param_name":"reference_dict", "required":"yes", "subst_constructor":{ "vals":[ {"subst_param_name":"reposdir","required":"no","default":"."}, "/", - {"subst_param_name":"picard_reference_dict_name","required":"yes"} + {"subst_param_name":"reference_dict_name","required":"yes"} ], "postproc":{"op":"concat", "pad":""} } @@ -293,8 +293,8 @@ "to":"tee0" }, { - "id":"t0_to_bamcollate2_ranking", - "from":"tee0:__PHIX_ALN__", + "id":"t0_to_bamrecompress_input", + "from":"tee0:__PHIX_ALN_OUT__", "to":"bamrecompress_input" }, { @@ -313,8 +313,8 @@ "to":"post_alignment_target:no_align_bam" }, { - "id":"t0_to_bamcollate2_ranking", - "from":"tee0:__TGT_ALN__", + "id":"t0_to_pre_alignment_target", + "from":"tee0:__TGT_ALN_OUT__", "to":"pre_alignment_target" }, { @@ -333,9 +333,9 @@ "to":"post_alignment_target" }, { - "id":"picard_reference_dict_to_post_alignment", - "from":"picard_reference_dict", - "to":"post_alignment_target:reference_picard_dict" + "id":"reference_dict_to_post_alignment", + "from":"reference_dict", + "to":"post_alignment_target:reference_dict" }, { "id":"post_alignment_to_alignment_filter", diff --git a/data/vtlib/seqchksum.json b/data/vtlib/seqchksum.json index ce1011eab..37bc8f454 100644 --- a/data/vtlib/seqchksum.json +++ b/data/vtlib/seqchksum.json @@ -4,35 +4,111 @@ "ports":{ "inputs":{ "_stdin_":"seqchksum_input", - "target_bam":"bamcat_output:__TARGETBAM__", - "phix_bam":"bamcat_output:__PHIXBAM__" + "target_bam":"target_tee", + "phix_bam":"phix_tee" + }, + "outputs":{ + "target_chksum":"seqchksum_target", + "phix_chksum":"seqchksum_phix" } } }, "nodes":[ + { + "id":"target_tee", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": false, + "cmd":"teepot __BAMCAT_OUT__ __CHKSUM_OUT__" + }, + { + "id":"phix_tee", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": false, + "cmd":"teepot __BAMCAT_OUT__ __CHKSUM_OUT__" + }, + { + "id":"seqchksum_target", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":"bamseqchksum" + }, + { + "id":"seqchksum_phix", + "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, + "cmd":"bamseqchksum" + }, { "id":"bamcat_output", + "comment":"changed from bamcat when deadlock occurred when using new approach of saving chksum files", "type":"EXEC", - "cmd":"bamcat verbose=0 level=0 __PHIXBAM__ __TARGETBAM__" + "use_STDIN": false, + "use_STDOUT": true, + "cmd":{"subst_param_name":"samtools_merge", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"samtools_executable","required":"no","default":"samtools"}, + "merge", + "-c", + "--", + "-", + "__TARGETBAM_IN__", + "__PHIXBAM_IN__" + ], + "postproc":{"op":"pack","pad":" "} + } + } }, { "id":"seqchksum_output", "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, "cmd":"bamseqchksum" }, { "id":"seqchksum_input", "type":"EXEC", + "use_STDIN": true, + "use_STDOUT": true, "cmd":"bamseqchksum" }, { "id":"cmp_seqchksum", "type":"EXEC", - "cmd":"cmp -s __INPUTCHK__ __OUTPUTCHK__", + "use_STDIN": false, + "use_STDOUT": false, + "cmd":"cmp -s __INPUTCHK_IN__ __OUTPUTCHK_IN__", "description":"check input primary/sequence data matches output" } ], "edges":[ + { + "id":"phix_to_chksum", + "from":"phix_tee:__CHKSUM_OUT__", + "to":"seqchksum_phix" + }, + { + "id":"phix_to_bamcat", + "from":"phix_tee:__BAMCAT_OUT__", + "to":"bamcat_output:__PHIXBAM_IN__" + }, + { + "id":"target_to_chksum", + "from":"target_tee:__CHKSUM_OUT__", + "to":"seqchksum_target" + }, + { + "id":"target_to_bamcat", + "from":"target_tee:__BAMCAT_OUT__", + "to":"bamcat_output:__TARGETBAM_IN__" + }, + { "id":"bamcat_to_chk_output", "from":"bamcat_output", @@ -41,12 +117,12 @@ { "id":"seqchksum_output_to_cmp", "from":"seqchksum_output", - "to":"cmp_seqchksum:__OUTPUTCHK__" + "to":"cmp_seqchksum:__OUTPUTCHK_IN__" }, { "id":"seqchksum_input_to_cmp", "from":"seqchksum_input", - "to":"cmp_seqchksum:__INPUTCHK__" + "to":"cmp_seqchksum:__INPUTCHK_IN__" } ] } diff --git a/examples/bwa_mem/bwa_mem_alignment.vtf b/examples/bwa_mem/bwa_mem_alignment.vtf index 40a388c86..6d6d91f6a 100644 --- a/examples/bwa_mem/bwa_mem_alignment.vtf +++ b/examples/bwa_mem/bwa_mem_alignment.vtf @@ -24,7 +24,7 @@ "required":"yes", "subst_constructor":{ "vals":[ - "bwa0_6", + {"subst_param_name":"bwa_executable","required":"no","default":"bwa0_6"}, "mem", "-t", {"subst_param_name":"aligner_numthreads","required":"no","default":"2"}, diff --git a/examples/bwa_mem/generic_alignment_with_phix.vtf b/examples/bwa_mem/generic_alignment_with_phix.vtf index 59681a90c..456032cb3 100644 --- a/examples/bwa_mem/generic_alignment_with_phix.vtf +++ b/examples/bwa_mem/generic_alignment_with_phix.vtf @@ -41,25 +41,25 @@ } }, {"subst_param_name":"i2b_first_tile_flag", - "required":"no", - "subst_constructor":{ - "vals":[ - "FIRST_TILE", - {"subst_param_name":"i2b_first_tile","required":"no"} - ], - "postproc":{"op":"concat","pad":"="} - } - }, - {"subst_param_name":"i2b_tile_limit_flag", - "required":"no", - "subst_constructor":{ - "vals":[ - "TILE_LIMIT", - {"subst_param_name":"i2b_tile_limit","required":"no"} - ], - "postproc":{"op":"concat","pad":"="} - } - }, + "required":"no", + "subst_constructor":{ + "vals":[ + "FIRST_TILE", + {"subst_param_name":"i2b_first_tile","required":"no"} + ], + "postproc":{"op":"concat","pad":"="} + } + }, + {"subst_param_name":"i2b_tile_limit_flag", + "required":"no", + "subst_constructor":{ + "vals":[ + "TILE_LIMIT", + {"subst_param_name":"i2b_tile_limit","required":"no"} + ], + "postproc":{"op":"concat","pad":"="} + } + }, "OUTPUT=/dev/stdout", {"subst_param_name":"i2b_lane_flag", "required":"yes", @@ -104,6 +104,8 @@ "subst_constructor":{ "vals":[ "bamadapterfind level=0 md5=1 md5filename=", + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", {"subst_param_name":"rpt","required":"yes"}, ".bam.md5" ], @@ -188,23 +190,23 @@ }, "description":"[path to] reference genome for alignment step in appropriate format" }, - { - "id":"alignment_target", - "type":"VTFILE", - "name":{"subst_param_name":"alignment_vtf", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst_param_name":"cfgdatadir","required":"no","default":"."}, - "/", - {"subst_param_name":"alignment_method","required":"yes"}, - "_alignment.json" - ], - "postproc":{"op":"concat", "pad":""} - } - }, - "description":"subgraph containing alignment process" - }, + { + "id":"alignment_target", + "type":"VTFILE", + "name":{"subst_param_name":"alignment_vtf", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"cfgdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"alignment_method","required":"yes"}, + "_alignment.json" + ], + "postproc":{"op":"concat", "pad":""} + } + }, + "description":"subgraph containing alignment process" + }, { "id":"alignment_reference_genome_phix", "type":"INFILE", @@ -221,23 +223,23 @@ }, "description":"[path to] phix reference genome for alignment step in appropriate format" }, - { - "id":"alignment_phix", - "type":"VTFILE", - "name":{"subst_param_name":"alignment_vtf", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst_param_name":"cfgdatadir","required":"no","default":"."}, - "/", - {"subst_param_name":"alignment_method","required":"yes"}, - "_alignment.json" - ], - "postproc":{"op":"concat", "pad":""} - } - }, - "description":"subgraph containing alignment process" - }, + { + "id":"alignment_phix", + "type":"VTFILE", + "name":{"subst_param_name":"alignment_vtf", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"cfgdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"alignment_method","required":"yes"}, + "_alignment.json" + ], + "postproc":{"op":"concat", "pad":""} + } + }, + "description":"subgraph containing alignment process" + }, { "id":"target_cram", "type":"OUTFILE", @@ -264,7 +266,7 @@ {"subst_param_name":"outdatadir","required":"no","default":"."}, "/", {"subst_param_name":"rpt","required":"yes"}, - {"subst_param_name":"bamcheck_name","required":"no","default":".bamcheck"} + {"subst_param_name":"bamcheck_name","required":"no","default":".bamstats"} ], "postproc":{"op":"concat", "pad":""} } @@ -348,7 +350,7 @@ {"subst_param_name":"outdatadir","required":"no","default":"."}, "/", {"subst_param_name":"rpt","required":"yes"}, - {"subst_param_name":"phix_bamcheck_name","required":"no","default":"_phix.bamcheck"} + {"subst_param_name":"phix_bamcheck_name","required":"no","default":"_phix.bamstats"} ], "postproc":{"op":"concat", "pad":""} } @@ -504,24 +506,24 @@ }, "description":"reference fasta for scramble (target)" }, - { - "id":"post_alignment_filter_target", - "type":"VTFILE", + { + "id":"post_alignment_filter_target", + "type":"VTFILE", "subst_map":{"phix_or_target":"target","bstmp":"bspaft","brtmp":"brpaft","bmdtmp":"bmdpaft"}, - "name":{"subst_param_name":"post_alignment_filter_target", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst_param_name":"cfgdatadir","required":"no","default":"."}, - "/", - {"subst_param_name":"post_alignment_filter","required":"no","default":"post_alignment_filter"}, - ".json" - ], - "postproc":{"op":"concat", "pad":""} - } - }, - "description":"subgraph containing post alignment_filter process (target)" - }, + "name":{"subst_param_name":"post_alignment_filter_target", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"cfgdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"post_alignment_filter","required":"no","default":"post_alignment_filter"}, + ".json" + ], + "postproc":{"op":"concat", "pad":""} + } + }, + "description":"subgraph containing post alignment_filter process (target)" + }, ############################## # post alignment_filter (phix) ############################## @@ -541,24 +543,24 @@ }, "description":"reference fasta for scramble (phix)" }, - { - "id":"post_alignment_filter_phix", - "type":"VTFILE", + { + "id":"post_alignment_filter_phix", + "type":"VTFILE", "subst_map":{"phix_or_target":"phix","bstmp":"bspaft","brtmp":"brpaft","bmdtmp":"bmdpaft"}, - "name":{"subst_param_name":"post_alignment_filter_phix", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst_param_name":"cfgdatadir","required":"no","default":"."}, - "/", - {"subst_param_name":"post_alignment_filter","required":"no","default":"post_alignment_filter"}, - ".json" - ], - "postproc":{"op":"concat", "pad":""} - } - }, - "description":"subgraph containing post alignment_filter process (phix)" - }, + "name":{"subst_param_name":"post_alignment_filter_phix", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"cfgdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"post_alignment_filter","required":"no","default":"post_alignment_filter"}, + ".json" + ], + "postproc":{"op":"concat", "pad":""} + } + }, + "description":"subgraph containing post alignment_filter process (phix)" + }, ######################## # phiX final output bam ######################## @@ -600,23 +602,58 @@ ########### # seqchksum ########### - { - "id":"seqchksum", - "type":"VTFILE", - "name":{"subst_param_name":"seqchksum", - "required":"yes", - "subst_constructor":{ - "vals":[ - {"subst_param_name":"cfgdatadir","required":"no","default":"."}, - "/", - {"subst_param_name":"seqchksum_vtf","required":"no","default":"seqchksum"}, - ".json" - ], - "postproc":{"op":"concat", "pad":""} - } - }, - "description":"subgraph containing seqchksum validation of outputs" - } + { + "id":"seqchksum", + "type":"VTFILE", + "name":{"subst_param_name":"seqchksum", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"cfgdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"seqchksum_vtf","required":"no","default":"seqchksum"}, + ".json" + ], + "postproc":{"op":"concat", "pad":""} + } + }, + "description":"subgraph containing seqchksum validation of outputs" + }, + { + "id":"target_output_checksum", + "type":"OUTFILE", + "name":{"subst_param_name":"target_output_checksum", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + {"subst_param_name":"checksum_output_name", + "required":"no", + "default":"_out.bamseqchecksum" + } + ], + "postproc":{"op":"concat", "pad":""} + } + } + }, + { + "id":"target_input_checksum", + "type":"OUTFILE", + "name":{"subst_param_name":"target_input_checksum", + "required":"yes", + "subst_constructor":{ + "vals":[ + {"subst_param_name":"outdatadir","required":"no","default":"."}, + "/", + {"subst_param_name":"rpt","required":"yes"}, + {"subst_param_name":"checksum_input_name","required":"no","default":"_in.bamseqchecksum"} + ], + "postproc":{"op":"concat", "pad":""} + } + } + } ], "edges":[ { @@ -676,11 +713,11 @@ #################### # alignment (target) #################### - { - "id":"reference_genome_to_alignment_target", - "from":"alignment_reference_genome_target", - "to":"alignment_target:reference" - }, + { + "id":"reference_genome_to_alignment_target", + "from":"alignment_reference_genome_target", + "to":"alignment_target:reference" + }, { "id":"ta_to_alignment_target", "from":"teepot_aln:__TARGET__", @@ -689,11 +726,11 @@ #################### # alignment (phix) #################### - { - "id":"reference_genome_to_alignment_phix", - "from":"alignment_reference_genome_phix", - "to":"alignment_phix:reference" - }, + { + "id":"reference_genome_to_alignment_phix", + "from":"alignment_reference_genome_phix", + "to":"alignment_phix:reference" + }, { "id":"ta_to_alignment_target", "from":"teepot_aln:__PHIX__", @@ -702,33 +739,33 @@ ######################### # post_alignment (target) ######################### - { - "id":"alignment_to_post_alignment_target", - "from":"alignment_target", - "to":"post_alignment_target", + { + "id":"alignment_to_post_alignment_target", + "from":"alignment_target", + "to":"post_alignment_target", "comment":"freshly aligned data to post-alignment processing" }, - { - "id":"picard_ref_dict_to_post_alignment_target", - "from":"reference_picard_dict_target", - "to":"post_alignment_target:reference_picard_dict", + { + "id":"picard_ref_dict_to_post_alignment_target", + "from":"reference_picard_dict_target", + "to":"post_alignment_target:reference_picard_dict", "comment":"used by headerSQfix in post_alignment" - }, + }, ####################### # post_alignment (phix) ####################### - { - "id":"alignment_to_post_alignment_phix", - "from":"alignment_phix", - "to":"post_alignment_phix", + { + "id":"alignment_to_post_alignment_phix", + "from":"alignment_phix", + "to":"post_alignment_phix", "comment":"freshly aligned data to post-alignment processing" }, - { - "id":"picard_ref_dict_to_post_alignment_phix", - "from":"reference_picard_dict_phix", - "to":"post_alignment_phix:reference_picard_dict", + { + "id":"picard_ref_dict_to_post_alignment_phix", + "from":"reference_picard_dict_phix", + "to":"post_alignment_phix:reference_picard_dict", "comment":"used by headerSQfix in post_alignment" - }, + }, ########################## # AlignmentFilter (inputs) ########################## @@ -842,21 +879,30 @@ ######################### # validation by seqchksum ######################### - { - "id":"src_bam_to_seqchksum", - "from":"tee0:__CHKSUMOUT__", - "to":"seqchksum" - }, - { - "id":"target_bam_to_seqchksum", - "from":"target_bam", - "to":"seqchksum:target_bam" - }, - { - "id":"phix_bam_to_seqchksum", - "from":"phix_bam", - "to":"seqchksum:phix_bam" - } - + { + "id":"src_bam_to_seqchksum", + "from":"tee0:__CHKSUMOUT__", + "to":"seqchksum" + }, + { + "id":"target_bam_to_seqchksum", + "from":"target_bam", + "to":"seqchksum:target_bam" + }, + { + "id":"phix_bam_to_seqchksum", + "from":"phix_bam", + "to":"seqchksum:phix_bam" + }, + { + "id":"seqchksum_output_to_target_checksum", + "from":"seqchksum:output_checksum", + "to":"target_output_checksum" + }, + { + "id":"seqchksum_input_to_target_checksum", + "from":"seqchksum:input_checksum", + "to":"target_input_checksum" + } ] } diff --git a/examples/bwa_mem/post_alignment_filter.vtf b/examples/bwa_mem/post_alignment_filter.vtf index fe6ac0cca..06e395de8 100644 --- a/examples/bwa_mem/post_alignment_filter.vtf +++ b/examples/bwa_mem/post_alignment_filter.vtf @@ -95,7 +95,7 @@ { "id":"bamcheck", "type":"EXEC", - "cmd":"bamcheck" + "cmd":"samtools stats" }, { "id":"bamrecompress", diff --git a/examples/bwa_mem/seqchksum.vtf b/examples/bwa_mem/seqchksum.vtf index 0be285c0e..a04894cfe 100644 --- a/examples/bwa_mem/seqchksum.vtf +++ b/examples/bwa_mem/seqchksum.vtf @@ -1,72 +1,75 @@ { "description":"steps in the alignment pipeline perform a checksum-based comparison of input and output (bam) data. Final validation step in alignment pipeline", "subgraph_io":{ - "ports":{ - "inputs":{ - "_stdin_":"seqchksum_input", - "target_bam":"bamcat_output:__TARGETBAM__", - "phix_bam":"bamcat_output:__PHIXBAM__" - } - } + "ports":{ + "inputs":{ + "_stdin_":"seqchksum_input", + "target_bam":"bamcat_output:__TARGETBAM__", + "phix_bam":"bamcat_output:__PHIXBAM__" + }, + "outputs":{"output_checksum":"teepot_output_seqchksum", + "input_checksum":"teepot_input_seqchksum" + } + } }, "nodes":[ - { - "id":"bamcat_output", - "type":"EXEC", - "cmd":"bamcat verbose=0 level=0 __PHIXBAM__ __TARGETBAM__" - }, - { - "id":"seqchksum_output", - "type":"EXEC", - "cmd":"bamseqchksum" - }, - { - "id":"seqchksum_input", - "type":"EXEC", - "cmd":"bamseqchksum" - }, - { - "id":"teepot_input_seqchksum", - "type":"EXEC", - "cmd":"teepot __CHKSUMOUT__" - }, - { - "id":"teepot_output_seqchksum", - "type":"EXEC", - "cmd":"teepot __CHKSUMOUT__" - }, - { - "id":"cmp_seqchksum", - "type":"EXEC", - "cmd":"cmp -s __INPUTCHK__ __OUTPUTCHK__", - "description":"check input primary/sequence data matches output" - } + { + "id":"bamcat_output", + "type":"EXEC", + "cmd":"bamcat verbose=0 level=0 __PHIXBAM__ __TARGETBAM__" + }, + { + "id":"seqchksum_output", + "type":"EXEC", + "cmd":"bamseqchksum" + }, + { + "id":"seqchksum_input", + "type":"EXEC", + "cmd":"bamseqchksum" + }, + { + "id":"teepot_input_seqchksum", + "type":"EXEC", + "cmd":"teepot __CHKSUMOUT__ -" + }, + { + "id":"teepot_output_seqchksum", + "type":"EXEC", + "cmd":"teepot __CHKSUMOUT__ -" + }, + { + "id":"cmp_seqchksum", + "type":"EXEC", + "cmd":"cmp -s __INPUTCHK__ __OUTPUTCHK__", + "description":"check input primary/sequence data matches output" + } ], "edges":[ - { - "id":"bamcat_to_chk_output", - "from":"bamcat_output", - "to":"seqchksum_output" - }, - { - "id":"seqchksum_output_to_cmp", - "from":"seqchksum_output", - "to":"teepot_output_seqchksum" - }, - { - "id":"seqchksum_output_to_cmp", - "from":"teepot_output_seqchksum:__CHKSUMOUT__", - "to":"cmp_seqchksum:__OUTPUTCHK__" - }, - { - "id":"seqchksum_input_to_cmp", - "from":"seqchksum_input", - "to":"teepot_input_seqchksum" - }, - { - "id":"seqchksum_input_to_cmp", - "from":"teepot_input_seqchksum:__CHKSUMOUT__", - "to":"cmp_seqchksum:__INPUTCHK__" - } + { + "id":"bamcat_to_chk_output", + "from":"bamcat_output", + "to":"seqchksum_output" + }, + { + "id":"seqchksum_output_to_cmp", + "from":"seqchksum_output", + "to":"teepot_output_seqchksum" + }, + { + "id":"seqchksum_output_to_cmp", + "from":"teepot_output_seqchksum:__CHKSUMOUT__", + "to":"cmp_seqchksum:__OUTPUTCHK__" + }, + { + "id":"seqchksum_input_to_cmp", + "from":"seqchksum_input", + "to":"teepot_input_seqchksum" + }, + { + "id":"seqchksum_input_to_cmp", + "from":"teepot_input_seqchksum:__CHKSUMOUT__", + "to":"cmp_seqchksum:__INPUTCHK__" + } ] }