Skip to content

Commit

Permalink
Switched to use samtools merge and samtools markdup (#281)
Browse files Browse the repository at this point in the history
* Switched to use samtools merge and samtools markdup

* -c option added to samtools markdup to avoid some optical duplicate count errors

---------

Co-authored-by: ces <[email protected]>
  • Loading branch information
jidur and ces authored Feb 14, 2025
1 parent 6efdc7b commit 7e708ca
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 12 deletions.
55 changes: 43 additions & 12 deletions data/vtlib/merge_aligned.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,27 @@
}
},
{"id":"bmd_cmd","required":"no","default":"bamstreamingmarkduplicates"},
{
"id": "stmd_metrics_file",
"required": "no",
"comment": "samtools markdup is now the standard tool",
"subst_constructor": {
"vals": [
{
"subst": "outdatadir"
},
"/",
{
"subst": "library"
},
".markdups_metrics.txt"
],
"postproc": {
"op": "concat",
"pad": ""
}
}
},
{
"id":"incrams_seqchksum",
"required":"yes",
Expand All @@ -65,22 +86,32 @@
}
],
"nodes": [
{
"id":"bammarkduplicates",
"comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"orig_cmd":{"subst":"bammarkduplicates"},
"cmd": [ {"subst":"bmd_cmd"}, "level=0", "verbose=0", {"subst":"bmd_tmpfile_flag"}, {"subst":"bmd_metrics_file_flag"}, {"subst":"bmd_resetdupflag"} ]
},
{
"id": "markdup",
"description": "mark duplicates method",
"type": "VTFILE",
"comment": "inputs: _stdin_ (filtered bam); outputs: _stdout_ (bam, same as input)",
"node_prefix_tba": "merged_markdup_",
"name": {
"select": "markdup_method",
"required": true,
"select_range": [
1
],
"default": "samtools",
"cases": {
"samtools": "merged_markdup_samtools.json",
"biobambam": "merged_markdup_biobambam.json"
}
}
},
{
"id":"crammerge",
"type":"EXEC",
"use_STDIN": false,
"use_STDOUT": true,
"orig_cmd":{"subst":"crammerge"},
"cmd": [ "samtools","merge","-O","BAM","-l","0","-",{"subst":"incrams"} ],
"cmd": [ {"subst":"samtools_executable"}, "merge", "-O", "BAM", "-l", "0", "--input-fmt-option", "no_ref=1", "-",{"subst":"incrams"} ],
"description":"merge individual cram files from a sample into one cram file"
},
{
Expand Down Expand Up @@ -128,8 +159,8 @@
}
],
"edges": [
{ "id": "crammerge_to_bammarkduplicates", "from": "crammerge", "to": "bammarkduplicates" },
{ "id": "bammarkduplicates_to_final_output_prep", "from": "bammarkduplicates", "to": "merge_final_output_prep" },
{ "id": "crammerge_to_markdup", "from": "crammerge", "to": "markdup" },
{ "id": "markdup_to_final_output_prep", "from": "markdup", "to": "merge_final_output_prep" },
{ "id": "merge_seqchksum_to_seqchksumdefault_tee", "from": "merge_seqchksum", "to": "seqchksumdefault_head5" },
{ "id": "seqchksumdefault_head5", "from":"seqchksumdefault_head5", "to":"cmp_seqchksumdefault:merged_seqchksum" },
{ "id": "final_output_prep_to_head5", "from": "merge_final_output_prep","to": "seqchksum_head5" },
Expand Down
20 changes: 20 additions & 0 deletions data/vtlib/merged_markdup_biobambam.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"version":"2.0",
"description":"steps in the alignment pipeline to post-process bam files produced by the AlignmentFilter",
"subgraph_io":{
"ports":{
"inputs":{ "_stdin_":"bammarkduplicates" },
"outputs":{ "_stdout_":"bammarkduplicates" }
}
},
"nodes":[
{
"id":"bammarkduplicates",
"comment":"default tool bamstreamingmarkduplicates must be from Biobambam >= 0.0.174",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd": [ {"subst":"bmd_cmd"}, "level=0", "verbose=0", {"subst":"bmd_tmpfile_flag"}, {"subst":"bmd_metrics_file_flag"}, {"subst":"bmd_resetdupflag"} ]
}
]
}
74 changes: 74 additions & 0 deletions data/vtlib/merged_markdup_samtools.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
{
"version":"2.0",
"description":"steps in the alignment pipeline to post-process bam files produced by the AlignmentFilter",
"subgraph_io":{
"ports":{
"inputs":{ "_stdin_":"collate" },
"outputs":{ "_stdout_":"markdup" }
}
},
"nodes":[
{
"id":"collate",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd": [
{"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "collate", "-l", "0", "--threads", "4","-O","-"
]
},
{
"id":"fixmate",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd": [
{"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "fixmate",
"--threads", {"subst":"fixmate_threads","required":true,"ifnull":4},
{"select":"fixmate_add_mate_score", "required":true, "select_range":[1], "default":"on", "cases":{"on":"-m","off":[]}},
{"subst":"fixmate_extra_flags", "required":false},
"-", "-"
]
},
{
"id":"coord_sort",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"cmd": [
{"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "sort",
"-l", {"subst":"coord_sort_compression","required":true,"ifnull":["0"]},
{"subst":"coord_sort_mpt_flag","ifnull":{"subst_constructor":{"vals":["-m", {"subst":"coord_sort_mem_per_thread","required":false}]}}},
"--threads", {"subst":"coord_sort_threads","required":true,"ifnull":4},
{"subst":"coord_sort_extra_flags", "required":false},
"-"
]
},
{
"id":"markdup",
"type":"EXEC",
"use_STDIN": true,
"use_STDOUT": true,
"comment":"add -T <temp_prefix> after initial tests",
"cmd": [
{"subst":"samtools_executable", "required":true, "ifnull":"samtools"}, "markdup",
"--threads", {"subst":"markdup_threads","required":true,"ifnull":4},
"-s",
"-c",
"-f", {"subst":"stmd_metrics_file","required":true},
{"select":"stmd_mark_supps", "required":true, "select_range":[1], "default":"on","cases":{"on":"-S","off":[]}},
["-d", {"subst":"markdup_optical_distance_value","required":false,"ifnull":2500}],
{"subst":"stmd_relaxed_flag","required":false, "ifnull":["--mode", {"subst":"stmd_dup_pos_mode","required":false,"ifnull":"s"}]},
{"select":"stmd_label_dups", "required":true, "select_range":[1], "default":"off", "cases":{"on":"-t","off":[]}},
{"select":"stmd_clear_previous", "required":true, "select_range":[1], "default":"off","cases":{"on":"-c","off":[]}},
{"subst":"stmd_extra_flags", "required":false},
"-", "-"
]
}
],
"edges":[
{ "id":"collate_to_fixmate", "from":"collate", "to":"fixmate" },
{ "id":"fixmate_to_coordsort", "from":"fixmate", "to":"coord_sort" },
{ "id":"coordsort_tomarkdup", "from":"coord_sort", "to":"markdup" }
]
}

0 comments on commit 7e708ca

Please sign in to comment.