Skip to content

Commit

Permalink
Use pin offset for deduplicating chains and making dotplots
Browse files Browse the repository at this point in the history
  • Loading branch information
adamnovak committed May 22, 2024
1 parent 45cbf80 commit 220e416
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 10 deletions.
7 changes: 7 additions & 0 deletions src/minimizer_mapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,13 @@ class MinimizerMapper : public AlignerClient {
return this->value.offset;
}
}

/// Get the position on the read's sequence that corresponds to the
/// located graph positions. For reverse-strand minimizers this will be
/// at the end of the minimizer's interval in the read.
inline size_t pin_offset() const {
return this->value.offset;
}

/// How many bases are in a window for which a minimizer is chosen?
inline size_t window_size() const {
Expand Down
21 changes: 11 additions & 10 deletions src/minimizer_mapper_from_chains.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,10 @@ void MinimizerMapper::dump_debug_dotplot(const std::string& name, const VectorVi
// Contig alone
exp.field(path_name);
}
// Offset on contig
// Offset on contig of the pin point
exp.field(position.first);
// Offset in read
exp.field(minimizers[seed.source].forward_offset());
// Offset in read *of the pin point* (not of the forward-strand start of the minimizer)
exp.field(minimizers[seed.source].pin_offset());
}
}
}
Expand Down Expand Up @@ -850,7 +850,7 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
seed_positions.reserve(extension_seeds.size());
for (auto& seed_index : extension_seeds) {
if (!used_seeds.count(seed_index)) {
seed_positions.push_back(minimizers[seeds.at(seed_index).source].value.offset);
seed_positions.push_back(minimizers[seeds.at(seed_index).source].pin_offset());
}
}

Expand Down Expand Up @@ -888,11 +888,11 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {

// Find the relevant seed range
std::vector<size_t> anchor_seeds;
while (seed_it != extension_seeds.end() && minimizers[seeds.at(*seed_it).source].value.offset < anchor_interval.first) {
while (seed_it != extension_seeds.end() && minimizers[seeds.at(*seed_it).source].pin_offset() < anchor_interval.first) {
// Move seed iterator to inside or past the interval (should really always be already inside).
++seed_it;
}
while (seed_it != extension_seeds.end() && minimizers[seeds.at(*seed_it).source].value.offset < anchor_interval.second) {
while (seed_it != extension_seeds.end() && minimizers[seeds.at(*seed_it).source].pin_offset() < anchor_interval.second) {
// Take all the seeds into the vector of anchor seeds.
auto found = used_seeds.find(*seed_it);
if (found == used_seeds.end()) {
Expand Down Expand Up @@ -1709,7 +1709,8 @@ vector<Alignment> MinimizerMapper::map_from_chains(Alignment& aln) {
}

for (auto& seed_num : chains[processed_num]) {
size_t read_pos = minimizers[seeds.at(seed_num).source].forward_offset();
// Look at the individual pin points and their associated read-node offset
size_t read_pos = minimizers[seeds.at(seed_num).source].pin_offset();
pos_t graph_pos = seeds.at(seed_num).pos;

nid_t node_id = id(graph_pos);
Expand Down Expand Up @@ -3524,7 +3525,7 @@ algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const Vector
// And derive the graph start
graph_start = make_pos_t(id(graph_end), is_rev(graph_end), offset(graph_end) - length);
// And the read start
read_start = source.value.offset + 1 - length;
read_start = source.pin_offset() + 1 - length;
// The seed is actually the last 1bp interval
hint_start = length - 1;
} else {
Expand All @@ -3540,14 +3541,14 @@ algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const Vector
// How much do we cut off the end?
margin_right = (size_t)source.length - length;
// And we store the read start position already in the item
read_start = source.value.offset;
read_start = source.pin_offset();
// The seed is actually at the start
hint_start = 0;
}

#ifdef debug
std::cerr << "Minimizer at read " << source.forward_offset() << " length " << source.length
<< " orientation " << source.value.is_reverse << " pinned at " << source.value.offset
<< " orientation " << source.value.is_reverse << " pinned at " << source.pin_offset()
<< " is anchor of length " << length << " matching graph " << graph_start << " and read " << read_start
<< " forward, with hint " << hint_start << " bases later on the read" << std::endl;
#endif
Expand Down

1 comment on commit 220e416

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for branch lr-giraffe. View the full report here.

0 tests passed, 16 tests failed and 0 tests skipped in 504 seconds

Failed tests:

  • test_sim_chr21_snp1kg (34 seconds)
  • test_full_brca2_cactus (32 seconds)
  • test_full_brca2_primary (30 seconds)
  • test_full_brca2_snp1kg (31 seconds)
  • test_map_brca1_cactus (30 seconds)
  • test_map_brca1_primary (31 seconds)
  • test_map_brca1_snp1kg (30 seconds)
  • test_map_brca1_snp1kg_mpmap (31 seconds)
  • test_map_mhc_primary (32 seconds)
  • test_map_mhc_snp1kg (32 seconds)
  • test_sim_mhc_cactus (29 seconds)
  • test_sim_mhc_snp1kg (27 seconds)
  • test_sim_mhc_snp1kg_mpmap (26 seconds)
  • test_call_chr21_snp1kg (30 seconds)
  • test_sim_chr21_snp1kg_trained (32 seconds)
  • test_sim_yeast_cactus (27 seconds)

Please sign in to comment.