Skip to content

Commit

Permalink
Merge pull request #4050 from vgteam/no-end-loops
Browse files Browse the repository at this point in the history
Get rid of apparently looping softclips at the ends of reads
  • Loading branch information
adamnovak committed Aug 14, 2023
2 parents ff7afb4 + 9093d3d commit 59b9195
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 52 deletions.
90 changes: 39 additions & 51 deletions src/minimizer_mapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4034,6 +4034,36 @@ static int32_t flank_penalty(size_t length, const std::vector<pareto_point>& fro
return result;
}

/// A helper function that cna merge softclips in properly when joining up
/// paths, but doesn't need expensive full passes over the paths later.
static inline void add_to_path(Path* target, Path* to_append) {
for (auto& mapping : *to_append->mutable_mapping()) {
// For each mapping to append
if (target->mapping_size() > 0) {
// There is an existing mapping on the path.
// Find that previous mapping.
auto* prev_mapping = target->mutable_mapping(target->mapping_size() - 1);

if (mapping.position().node_id() == prev_mapping->position().node_id() &&
(mapping.position().offset() != 0 || mapping_is_total_insertion(*prev_mapping) || mapping_is_total_insertion(mapping))) {
// Previous mapping is to the same node, and either the new
// mapping doesn't start at 0, or one mapping takes up no
// space on the node (i.e. is a pure insert).
//
// So we want to combine the mappings.
for (auto& edit : *mapping.mutable_edit()) {
// Move over all the edits in this mapping onto the end of that one.
*prev_mapping->add_edit() = std::move(edit);
}

continue;
}
}
// If we don't combine the mappings, we need to just move the whole mapping
*target->add_mapping() = std::move(mapping);
}
};

void MinimizerMapper::find_optimal_tail_alignments(const Alignment& aln, const vector<GaplessExtension>& extended_seeds, LazyRNG& rng, Alignment& best, Alignment& second_best) const {

// This assumes that full-length extensions have the highest scores.
Expand Down Expand Up @@ -4270,64 +4300,22 @@ void MinimizerMapper::find_optimal_tail_alignments(const Alignment& aln, const v
best.set_score(winning_score);
second_best.set_score(second_score);

// Concatenate the paths. We know there must be at least an edit boundary
// Concatenate the paths.

// We know there must be at least an edit boundary
// between each part, because the maximal extension doesn't end in a
// mismatch or indel and eats all matches.
// We also don't need to worry about jumps that skip intervening sequence.
*best.mutable_path() = std::move(winning_left);

for (auto* to_append : {&winning_middle, &winning_right}) {
// For each path to append
for (auto& mapping : *to_append->mutable_mapping()) {
// For each mapping to append

if (mapping.position().offset() != 0 && best.path().mapping_size() > 0) {
// If we have a nonzero offset in our mapping, and we follow
// something, we must be continuing on from a previous mapping to
// the node.
crash_unless(mapping.position().node_id() == best.path().mapping(best.path().mapping_size() - 1).position().node_id());

// Find that previous mapping
auto* prev_mapping = best.mutable_path()->mutable_mapping(best.path().mapping_size() - 1);
for (auto& edit : *mapping.mutable_edit()) {
// Move over all the edits in this mapping onto the end of that one.
*prev_mapping->add_edit() = std::move(edit);
}
} else {
// If we start at offset 0 or there's nothing before us, we need to just move the whole mapping
*best.mutable_path()->add_mapping() = std::move(mapping);
}
}
}
add_to_path(best.mutable_path(), &winning_middle);
add_to_path(best.mutable_path(), &winning_right);
// Compute the identity from the path.
best.set_identity(identity(best.path()));

//Do the same for the second best
*second_best.mutable_path() = std::move(second_left);

for (auto* to_append : {&second_middle, &second_right}) {
// For each path to append
for (auto& mapping : *to_append->mutable_mapping()) {
// For each mapping to append

if (mapping.position().offset() != 0 && second_best.path().mapping_size() > 0) {
// If we have a nonzero offset in our mapping, and we follow
// something, we must be continuing on from a previous mapping to
// the node.
crash_unless(mapping.position().node_id() == second_best.path().mapping(second_best.path().mapping_size() - 1).position().node_id());

// Find that previous mapping
auto* prev_mapping = second_best.mutable_path()->mutable_mapping(second_best.path().mapping_size() - 1);
for (auto& edit : *mapping.mutable_edit()) {
// Move over all the edits in this mapping onto the end of that one.
*prev_mapping->add_edit() = std::move(edit);
}
} else {
// If we start at offset 0 or there's nothing before us, we need to just move the whole mapping
*second_best.mutable_path()->add_mapping() = std::move(mapping);
}
}
}

// Compute the identity from the path.
add_to_path(second_best.mutable_path(), &second_middle);
add_to_path(second_best.mutable_path(), &second_right);
second_best.set_identity(identity(second_best.path()));
}

Expand Down
4 changes: 4 additions & 0 deletions src/path.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1665,6 +1665,10 @@ bool mapping_is_total_deletion(const Mapping& m) {
return m.edit_size() == 1 && edit_is_deletion(m.edit(0));
}

bool mapping_is_total_insertion(const Mapping& m) {
return m.edit_size() == 1 && edit_is_insertion(m.edit(0));
}

bool mapping_is_simple_match(const Mapping& m) {
return m.edit_size() == 1 && edit_is_match(m.edit(0));
}
Expand Down
1 change: 1 addition & 0 deletions src/path.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ int from_length(const Mapping& m);
bool mappings_equivalent(const Mapping& m1, const Mapping& m2);
bool mapping_ends_in_deletion(const Mapping& m);
bool mapping_starts_in_deletion(const Mapping& m);
bool mapping_is_total_insertion(const Mapping& m);
bool mapping_is_total_deletion(const Mapping& m);
bool mapping_is_simple_match(const Mapping& m);
bool path_is_simple_match(const Path& p);
Expand Down
12 changes: 11 additions & 1 deletion test/t/50_vg_giraffe.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap

PATH=../bin:$PATH # for vg

plan tests 49
plan tests 50

vg construct -a -r small/x.fa -v small/x.vcf.gz >x.vg
vg index -x x.xg x.vg
Expand Down Expand Up @@ -58,6 +58,16 @@ chmod 400 x.dist
vg giraffe -x x.xg -H x.gbwt -m x.min -d x.dist -f reads/small.middle.ref.fq >/dev/null
is "${?}" "0" "a read can be mapped when the distance index is not writable"

echo "@read" >read.fq
echo "GATTACATTAGGAGATAGCCATACGACGTAGCATCTAGCTCAGCCACA$(cat small/x.fa | head -n2 | tail -n1)" >>read.fq
echo "+" >>read.fq
echo "GATTACATTAGGAGATAGCCATACGACGTAGCATCTAGCTCAGCCACA$(cat small/x.fa | head -n2 | tail -n1)" | tr 'ACGTN' '(((((' >>read.fq

vg giraffe -x x.xg -H x.gbwt -m x.min -d x.dist -f read.fq > read.gam
LOOP_LINES="$(vg view -aj read.gam | jq -c 'select(.path.mapping[0].position.node_id == .path.mapping[1].position.node_id)' | wc -l)"
is "${LOOP_LINES}" "0" "a read which softclips does not appear to loop"

rm -f read.fq read.gam
rm -f x.vg x.xg x.gbwt x.min x.sync x.dist x.gg
rm -f x.giraffe.gbz

Expand Down

1 comment on commit 59b9195

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 22594 seconds

Please sign in to comment.