From 74ceb35592278e2798d7a862b75238fd3e215847 Mon Sep 17 00:00:00 2001 From: Felix Krueger Date: Fri, 27 Dec 2024 20:41:17 +0100 Subject: [PATCH] Now works with FastA files and both options. Closes #723. --- CHANGELOG.md | 2 ++ bismark | 45 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06392e3..9971072 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ - now using 4 cores for merging multiple BAM files (more details [here](https://github.com/FelixKrueger/Bismark/issues/707) +- fixed a corner case when reads were aligned in FastA mode with `--parallel` and in addition either `--ambiguous` and/or `--unmapped` (see [#723](https://github.com/FelixKrueger/Bismark/issues/723) + ### deduplicate_bismark - Changed the path to Samtools to custom variable ([#609](https://github.com/FelixKrueger/Bismark/issues/609)) diff --git a/bismark b/bismark index ed2b7fa..6d56841 100755 --- a/bismark +++ b/bismark @@ -596,7 +596,7 @@ foreach my $filename (@filenames){ } } if ($all_children_succeeded) { - print "All child process successfully finished."; + warn "All child processes successfully finished."; } else { die "\nTerminating. Not all child processes successfully finished."; @@ -743,23 +743,47 @@ foreach my $filename (@filenames){ if ($unmapped){ if ($gzip){ - push @temp_unmapped_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_unmapped_reads_1.fq"; - push @temp_unmapped_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}.gz_unmapped_reads_2.fq"; + if ($sequence_file_format eq 'FASTA'){ + push @temp_unmapped_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_unmapped_reads_1.fa"; + push @temp_unmapped_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}.gz_unmapped_reads_2.fa"; + } + else{ + push @temp_unmapped_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_unmapped_reads_1.fq"; + push @temp_unmapped_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}.gz_unmapped_reads_2.fq"; + } } else{ - push @temp_unmapped_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_unmapped_reads_1.fq"; - push @temp_unmapped_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}_unmapped_reads_2.fq"; + if ($sequence_file_format eq 'FASTA'){ + push @temp_unmapped_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_unmapped_reads_1.fa"; + push @temp_unmapped_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}_unmapped_reads_2.fa"; + } + else{ + push @temp_unmapped_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_unmapped_reads_1.fq"; + push @temp_unmapped_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}_unmapped_reads_2.fq"; + } } } if ($ambiguous){ if ($gzip){ - push @temp_ambiguous_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_ambiguous_reads_1.fq"; - push @temp_ambiguous_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}.gz_ambiguous_reads_2.fq"; + if ($sequence_file_format eq 'FASTA'){ + push @temp_ambiguous_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_ambiguous_reads_1.fa"; + push @temp_ambiguous_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}.gz_ambiguous_reads_2.fa"; + } + else{ + push @temp_ambiguous_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}.gz_ambiguous_reads_1.fq"; + push @temp_ambiguous_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}.gz_ambiguous_reads_2.fq"; + } } else{ - push @temp_ambiguous_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_ambiguous_reads_1.fq"; - push @temp_ambiguous_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}_ambiguous_reads_2.fq"; + if ($sequence_file_format eq 'FASTA'){ + push @temp_ambiguous_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_ambiguous_reads_1.fa"; + push @temp_ambiguous_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}_ambiguous_reads_2.fa"; + } + else{ + push @temp_ambiguous_1, "${output_dir}${add_prefix}${pathless_filename_1}.temp.${_}_ambiguous_reads_1.fq"; + push @temp_ambiguous_2, "${output_dir}${add_prefix}${pathless_filename_2}.temp.${_}_ambiguous_reads_2.fq"; + } } } } @@ -1262,7 +1286,8 @@ sub merge_individual_ambiguous_files{ } foreach my $temp(@$temp_ambiguous){ - $temp =~ s/.*\///; # removing path information + $temp =~ s/.*\///; # removing path information + # warn "These are the temp ambiguous files: $temp\n"; sleep(5); } open (AMBIGUOUS,"| gzip -c - > $output_dir$ambiguous_file") or die "Failed to write to $ambiguous_file: $!\n";