From cb2f9dd8ccc1ca5a9f467b9c4e85c84504cd300e Mon Sep 17 00:00:00 2001 From: Chris Saunders Date: Tue, 4 Feb 2014 15:16:23 -0800 Subject: [PATCH 1/3] fix typo --- src/c++/lib/applications/GenerateSVCandidates/SVScorerSplit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c++/lib/applications/GenerateSVCandidates/SVScorerSplit.cpp b/src/c++/lib/applications/GenerateSVCandidates/SVScorerSplit.cpp index 1687019d..0270008a 100644 --- a/src/c++/lib/applications/GenerateSVCandidates/SVScorerSplit.cpp +++ b/src/c++/lib/applications/GenerateSVCandidates/SVScorerSplit.cpp @@ -194,7 +194,7 @@ getSVSplitReadSupport( SVEvidence& evidence) { // apply the split-read scoring, only when: - // 1) the SV is precise, i.e. has successful somatic contigs; + // 1) the SV is precise, i.e. has successfully aligned contigs; // 2) the values of max depth are reasonable (otherwise, the read map may blow out). (filter is run externally) if (sv.isImprecise()) return; From a75da190e535167549b4027d930092bcefa408dd Mon Sep 17 00:00:00 2001 From: Barnes Date: Wed, 5 Feb 2014 16:59:05 +0000 Subject: [PATCH 2/3] Adding split-read MAPQ=0 filtering: "if (atoi(saDat[4].c_str()) == 0) return;" This can be changed later to be set to 20 as is done for pair MAPQ values. --- src/c++/lib/manta/SVLocusScanner.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/c++/lib/manta/SVLocusScanner.cpp b/src/c++/lib/manta/SVLocusScanner.cpp index e4ec2f4d..146f5824 100644 --- a/src/c++/lib/manta/SVLocusScanner.cpp +++ b/src/c++/lib/manta/SVLocusScanner.cpp @@ -285,6 +285,9 @@ getSACandidatesFromRead( assert((saDat.size() == 6) && "Unexpected number of SA tag values"); + // Skip split-mapq = 0 reads to remove lots of false positives + if (atoi(saDat[4].c_str()) == 0) return; + const chromMap_t::const_iterator ci(chromToIndex.find(saDat[0])); assert(ci != chromToIndex.end()); From 14277214ad4433292bf4b26183b01a5ea5996103 Mon Sep 17 00:00:00 2001 From: Chris Saunders Date: Wed, 5 Feb 2014 18:04:54 -0800 Subject: [PATCH 3/3] refine SA MAPQ filtration details --- src/c++/lib/manta/SVLocusScanner.cpp | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/src/c++/lib/manta/SVLocusScanner.cpp b/src/c++/lib/manta/SVLocusScanner.cpp index 146f5824..ec0204af 100644 --- a/src/c++/lib/manta/SVLocusScanner.cpp +++ b/src/c++/lib/manta/SVLocusScanner.cpp @@ -242,6 +242,7 @@ typedef std::map chromMap_t; static void getSACandidatesFromRead( + const ReadScannerOptions& opt, const ReadScannerDerivOptions& dopt, const bam_record& localRead, const SimpleAlignment& localAlign, @@ -257,19 +258,15 @@ getSACandidatesFromRead( if (NULL == saStr) return; split_string(saStr, ';', saVec); - if (saVec[saVec.size() - 1].empty()) + if ( (! saVec.empty()) && saVec.back().empty()) { saVec.pop_back(); } } - // For now we will only handle a single split alignment - // In the future we will need to sort the SA tags by order on of segments on - // the actual template. - // We may also have to toss conflicting segments that map to two different areas, - // or at least find some way of dealing with them. - //std::cerr << "At: '" << saStr << "', '" << localRead << "'" << std::endl; - //std::cerr << "Size: " << saVec.size() << std::endl; + // Only handle a single split alignment right now. + // In the future we could sort the SA tags by order on the template, possibly + // also removing segments that map to two different areas, if (saVec.size() > 1) return; @@ -285,8 +282,9 @@ getSACandidatesFromRead( assert((saDat.size() == 6) && "Unexpected number of SA tag values"); - // Skip split-mapq = 0 reads to remove lots of false positives - if (atoi(saDat[4].c_str()) == 0) return; + /// filter split reads with low MAPQ: + const unsigned saMapq(illumina::blt_util::parse_unsigned_str(saDat[4])); + if (saMapq < opt.minMapq) continue; const chromMap_t::const_iterator ci(chromToIndex.find(saDat[0])); assert(ci != chromToIndex.end()); @@ -303,13 +301,6 @@ getSACandidatesFromRead( cigar_to_apath(saDat[3].c_str(), remoteAlign.path); - /*std::cerr << "SA Values: " - << ", " << saChr - << ", " << saPos - << ", " << saStrand - << ", " << remotePath << '\n'; - */ - // At this point we don't care about strand candidates.push_back(GetSplitSACandidate(dopt, localAlign, remoteAlign)); } @@ -749,7 +740,7 @@ getSingleReadSVCandidates( #endif /// - process split/SA reads: - getSACandidatesFromRead(dopt, localRead, localAlign, chromToIndex, + getSACandidatesFromRead(opt, dopt, localRead, localAlign, chromToIndex, candidates); #ifdef DEBUG_SCANNER log_os << logtag << " post-split read candidate_size: " << candidates.size() << "\n";