Skip to content

Commit

Permalink
Stop using generic mapping cutter and just move insertions
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed Jun 28, 2024
1 parent 5d15bed commit 0555273
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 26 deletions.
63 changes: 41 additions & 22 deletions src/path.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1286,16 +1286,20 @@ Path concat_paths(const Path& path1, const Path& path2) {
return simplify(res);
}

#define debug_simplify
Path simplify(const Path& p, bool trim_internal_deletions) {
Path s;
s.set_name(p.name());
//cerr << "simplifying " << pb2json(p) << endl;
#ifdef debug_simplify
cerr << "simplifying " << pb2json(p) << endl;
#endif
// loop over the mappings in the path, doing a few things
// exclude mappings that are total deletions
// when possible, merge a mapping with the previous mapping
// push inserted sequences to the left
for (size_t i = 0; i < p.mapping_size(); ++i) {
auto m = simplify(p.mapping(i), trim_internal_deletions);
std::cerr << "Simplify mapping " << pb2json(p.mapping(i)) << " to " << pb2json(m) << std::endl;
// remove empty mappings as these are redundant
if (trim_internal_deletions) {
// remove wholly-deleted or empty mappings as these are redundant
Expand All @@ -1306,39 +1310,45 @@ Path simplify(const Path& p, bool trim_internal_deletions) {
if (m.edit_size() == 0) continue;
}
if (s.mapping_size()) {
//&& m.position().is_reverse() == s.mapping(s.mapping_size()-1).position().is_reverse()) {
// if this isn't the first mapping
// refer to the last mapping
Mapping* l = s.mutable_mapping(s.mapping_size()-1);
// split off any insertions from the start
// and push them to the last mapping
size_t ins_at_start = 0;
for (size_t j = 0; j < m.edit_size(); ++j) {
auto& e = m.edit(j);
if (!edit_is_insertion(e)) break;
ins_at_start += e.to_length();

// Move any insertion edits at the start of m to be in l instead.
//
// We don't use cut_mapping() here because it is too powerful and
// also will bring along any adjacent deletions.
size_t edits_moved = 0;
while (edits_moved < m.edit_size() && edit_is_insertion(m.edit(edits_moved))) {
// Copy insertions to the end of l
*l->add_edit() = std::move(*m.mutable_edit(edits_moved));
edits_moved++;
}
// if there are insertions at the start, move them left
if (ins_at_start) {
auto p = cut_mapping(m, ins_at_start);
auto& ins = p.first;
// cerr << "insertion " << pb2json(ins) << endl;
// take the position from the original mapping
m = p.second;
*m.mutable_position() = ins.position();
// cerr << "before and after " << pb2json(ins) << " and " << pb2json(m) << endl;
for (size_t j = 0; j < ins.edit_size(); ++j) {
auto& e = ins.edit(j);
*l->add_edit() = e;
}
// Splice them out of m
m.mutable_edit()->DeleteSubrange(0, edits_moved);

#ifdef debug_simplify
if (edits_moved > 0) {
cerr << "Moved " << edits_moved << "insertion edits left so previous mapping is now " << pb2json(*l) << endl;
}
#endif
// if our last mapping has no position, but we do, merge
if ((!l->has_position() || l->position().node_id() == 0)
&& (m.has_position() && m.position().node_id() != 0)) {

#ifdef debug_simplify
std::cerr << "Push position to previous mapping" << std::endl;
#endif

*l->mutable_position() = m.position();
// if our last mapping has a position, and we don't, merge
} else if ((!m.has_position() || m.position().node_id() == 0)
&& (l->has_position() && l->position().node_id() != 0)) {

#ifdef debug_simplify
std::cerr << "Get position from previous mapping" << std::endl;
#endif

*m.mutable_position() = *l->mutable_position();
m.mutable_position()->set_offset(from_length(*l));
}
Expand All @@ -1350,10 +1360,19 @@ Path simplify(const Path& p, bool trim_internal_deletions) {
&& l->position().node_id() == m.position().node_id()
&& l->position().offset() + mapping_from_length(*l) == m.position().offset())) {
// we can merge the current mapping onto the old one

#ifdef debug_simplify
std::cerr << "Combine with previous mapping" << std::endl;
#endif

*l = concat_mappings(*l, m, trim_internal_deletions);
} else {
if (from_length(m) || to_length(m)) {
*s.add_mapping() = m;
} else {
#ifdef debug_simplify
std::cerr << "Drop empty mapping" << std::endl;
#endif
}
}
} else {
Expand Down
16 changes: 13 additions & 3 deletions src/path.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -289,8 +289,15 @@ void reverse_complement_path_in_place(Path* path,
const function<int64_t(id_t)>& node_length);
/// Simplify the path for addition as new material in the graph. Remove any
/// mappings that are merely single deletions, merge adjacent edits of the same
/// type, strip leading and trailing deletion edits on mappings, and make sure no
/// mappings have missing positions.
/// type, strip leading and trailing deletion edits on mappings (adjusting
/// positions), and make sure no mappings have missing positions.
///
/// Note that this removes deletions at the start and end of Mappings, so code
/// that handles simplified Alignments needs to handle offsets on internal
/// Mappings.
///
/// If trim_internal_deletions is false, refrains from creating internal skips
/// of deleted sequence.
Path simplify(const Path& p, bool trim_internal_deletions = true);
/// Merge adjacent edits of the same type, strip leading and trailing deletion
/// edits (while updating positions if necessary), and makes sure position is
Expand Down Expand Up @@ -320,7 +327,10 @@ pair<mapping_t, mapping_t> cut_mapping(const mapping_t& m, const Position& pos);
// divide mapping at reference-relative offset (as measure in from_length)
pair<Mapping, Mapping> cut_mapping_offset(const Mapping& m, size_t offset);
pair<mapping_t, mapping_t> cut_mapping_offset(const mapping_t& m, size_t offset);
// divide mapping at target-relative offset (as measured in to_length)
/// Divide mapping at target-relative offset (as measured in to_length).
///
/// Deletions at the cut point (which are 0 target-relative bases long) always
/// end up in the first piece.
pair<Mapping, Mapping> cut_mapping(const Mapping& m, size_t offset);
pair<mapping_t, mapping_t> cut_mapping(const mapping_t& m, size_t offset);
// divide path at reference-relative position
Expand Down
3 changes: 2 additions & 1 deletion src/unittest/path.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ TEST_CASE("Path simplification tolerates adjacent insertions and deletions", "[p
Path path;
json2pb(path, path_string.c_str(), path_string.size());

auto simple = simplify(path);
// Simplify without replacing deletions with skips
auto simple = simplify(path, false);

std::cerr << pb2json(simple) << std::endl;

Expand Down

0 comments on commit 0555273

Please sign in to comment.