diff --git a/CHANGELOG.md b/CHANGELOG.md index 0945058..7451bbc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ - removed an `exit 0` that would terminate runs after processing a single (set of) input file(s). +- now using 4 cores for merging multiple BAM files (more details [here](https://github.com/FelixKrueger/Bismark/issues/707) + ### deduplicate_bismark - Changed the path to Samtools to custom variable ([#609](https://github.com/FelixKrueger/Bismark/issues/609)) diff --git a/bismark b/bismark index b3e574d..ed2b7fa 100755 --- a/bismark +++ b/bismark @@ -8,7 +8,7 @@ use Getopt::Long; use FindBin qw($RealBin); use lib "$RealBin/../lib"; -## This program is Copyright (C) 2010-23, Felix Krueger (fkrueger@altoslabs.com) +## This program is Copyright (C) 2010-24, Felix Krueger (fkrueger@altoslabs.com) ## This program is free software: you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -26,7 +26,7 @@ use lib "$RealBin/../lib"; my $parent_dir = getcwd(); my $bismark_version = 'v0.24.2'; -my $copyright_dates = "2010-23"; +my $copyright_dates = "2010-24"; my $start_run = time(); my $command_line = join (" ",@ARGV); @@ -1416,15 +1416,18 @@ sub merge_individual_BAM_files{ if ($cram){ $merged_name =~ s/bam$/cram/; warn "At this stage we write out a single CRAM file and delete all temporary BAM files\n"; - warn "Now merging BAM files @$tempbam into >>> $merged_name <<<\n"; + warn "Now merging BAM files @$tempbam into >>> $merged_name <<< (using --threads 4)\n"; $final_output_filename = "${output_dir}${merged_name}"; - - open (OUT,"| $samtools_path view -h -C -T $cram_ref 2>/dev/null - > ${output_dir}${merged_name}") or die "Failed to write to CRAM file $merged_name: $!\nPlease note that this option requires Samtools version 1.2 or higher!\n\n"; + # 01 Dez 2024: MilosCRF conducted some tests and determined that merging BAM files is faster when --threads 4 is used + # see more here: https://github.com/FelixKrueger/Bismark/issues/707 + open (OUT,"| $samtools_path view --threads 4 -h -C -T $cram_ref 2>/dev/null - > ${output_dir}${merged_name}") or die "Failed to write to CRAM file $merged_name: $!\nPlease note that this option requires Samtools version 1.2 or higher!\n\n"; } else{ $final_output_filename = "${output_dir}${merged_name}"; - warn "Now merging BAM files @$tempbam into >>> $merged_name <<<\n"; - open (OUT,"| $samtools_path view -bSh 2>/dev/null - > ${output_dir}${merged_name}") or die "Failed to write to $merged_name: $!\n"; + warn "Now merging BAM files @$tempbam into >>> $merged_name <<< (using --threads 4)\n"; + # 01 Dez 2024: MilosCRF conducted some tests and determined that merging BAM files is faster when --threads 4 is used + # see more here: https://github.com/FelixKrueger/Bismark/issues/707 + open (OUT,"| $samtools_path view --threads 4 -bSh 2>/dev/null - > ${output_dir}${merged_name}") or die "Failed to write to $merged_name: $!\n"; } my $first = 0; @@ -9994,6 +9997,6 @@ Bismark BAM/SAM OUTPUT (default): Each read of paired-end alignments is written out in a separate line in the above format. -Last modified on 23 August 2023 +Last modified on 01 Dezember 2024 HOW_TO }