Skip to content
This repository has been archived by the owner on Jul 17, 2023. It is now read-only.

Commit

Permalink
MANTA-108 remove no longer used soft-clip logic
Browse files Browse the repository at this point in the history
  • Loading branch information
ctsa committed Dec 6, 2013
1 parent 5288344 commit 18ed0b3
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 167 deletions.
33 changes: 3 additions & 30 deletions src/c++/lib/manta/SVLocusAssembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,6 @@ getBreakendReads(
static const unsigned MAX_NUM_READS(1000);

#ifdef DEBUG_ASBL
unsigned clipCount(0);
unsigned indelCount(0);
unsigned semiAlignedCount(0);
unsigned shadowCount(0);
Expand Down Expand Up @@ -221,28 +220,6 @@ getBreakendReads(

SimpleAlignment bamAlign(bamRead);

/// check whether we keep this read because of soft clipping:
bool isClipKeeper(false);
//// these cases should be handled by soft-clip now:
#if 0
{
static const unsigned minSoftClipLen(4);

unsigned leadingClipLen(0);
unsigned trailingClipLen(0);
getSVBreakendCandidateClip(bamRead, bamAlign.path, leadingClipLen, trailingClipLen);

if (isSearchForRightOpen)
{
if (trailingClipLen >= minSoftClipLen) isClipKeeper = true;
}

if (isSearchForLeftOpen)
{
if (leadingClipLen >= minSoftClipLen) isClipKeeper = true;
}
}
#endif

/// check for any indels in read:
bool isIndelKeeper(false);
Expand All @@ -258,7 +235,7 @@ getBreakendReads(
}
}


/// this test covered semi-aligned and soft-clip together
bool isSemiAlignedKeeper(false);
{
static const unsigned minMismatchLen(4);
Expand Down Expand Up @@ -303,14 +280,12 @@ getBreakendReads(
lastQname = bamRead.qname();
isLastSet = true;

if (! (isClipKeeper
|| isIndelKeeper
if (! (isIndelKeeper
|| isSemiAlignedKeeper
|| isShadowKeeper
)) continue;

#ifdef DEBUG_ASBL
if (isClipKeeper) ++clipCount;
if (isIndelKeeper) ++indelCount;
if (isSemiAlignedKeeper) ++semiAlignedCount;
if (isShadowKeeper) ++shadowCount;
Expand All @@ -323,8 +298,7 @@ getBreakendReads(
log_os << logtag << "Adding bamrec: " << bamRead << '\n'
<< "\tmapq: " << bamRead.pe_map_qual() << '\n'
<< "\tread: " << bamRead.get_bam_read() << '\n';
log_os << "isClipKeeper: " << isClipKeeper
<< " isIndelKeeper: " << isIndelKeeper
log_os << "isIndelKeeper: " << isIndelKeeper
<< " isSemiAlignedKeeper: " << isSemiAlignedKeeper
<< " isShadowKeeper: " << isShadowKeeper
<< '\n';
Expand All @@ -346,7 +320,6 @@ getBreakendReads(

#ifdef DEBUG_ASBL
log_os << logtag << "bam " << bamIndex
<< " clip: " << clipCount
<< " indel: " << indelCount
<< " semi-aligned " << semiAlignedCount
<< " shadow " << shadowCount
Expand Down
108 changes: 2 additions & 106 deletions src/c++/lib/manta/SVLocusScanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,54 +396,6 @@ getSVCandidatesFromReadIndels(



void
getSVBreakendCandidateClip(
const bam_record& bamRead,
const ALIGNPATH::path_t& apath,
unsigned& leadingClipLen,
unsigned& trailingClipLen,
const uint8_t minQ,
const float minQFrac)
{
leadingClipLen = 0;
trailingClipLen = 0;

const uint8_t* qual(bamRead.qual());
const unsigned readSize(bamRead.read_size());

const unsigned trailingClipLenTmp(apath_soft_clip_trail_size(apath));
if (0 != trailingClipLenTmp)
{
// check the quality of clipped region
unsigned minQCount(0);
for (unsigned pos(0); pos<trailingClipLenTmp; ++pos)
{
if (qual[readSize-pos-1] >= minQ) minQCount++;
}
if ((static_cast<float>(minQCount)/trailingClipLenTmp) >= minQFrac)
{
trailingClipLen = trailingClipLenTmp;
}
}

const unsigned leadingClipLenTmp(apath_soft_clip_lead_size(apath));
if (0 != leadingClipLenTmp)
{
// check the quality of clipped region
unsigned minQCount(0);
for (unsigned pos(0); pos<leadingClipLenTmp; ++pos)
{
if (qual[pos] >= minQ) minQCount++;
}
if ((static_cast<float>(minQCount)/leadingClipLenTmp) >= minQFrac)
{
leadingClipLen = leadingClipLenTmp;
}
}
}



bool
isGoodShadow(const bam_record& bamRead,
const uint8_t lastMapq,
Expand Down Expand Up @@ -491,40 +443,6 @@ isGoodShadow(const bam_record& bamRead,
}


#if 0
/// get SV candidates from read clipping
static
void
getSVCandidatesFromReadClip(
const ReadScannerOptions& opt,
const bam_record& bamRead,
const SimpleAlignment& bamAlign,
TrackedCandidates& candidates)
{
using namespace SVEvidenceType;
static const index_t svSource(SOFTCLIP);

unsigned leadingClipLen(0), trailingClipLen(0);
getSVBreakendCandidateClip(bamRead, bamAlign.path, leadingClipLen, trailingClipLen);

// soft-clipped reads don't define a full hypothesis, so they're always evidence for a 'complex' ie. undefined, event:
static const bool isComplex(true);

if (leadingClipLen >= opt.minSoftClipLen)
{
const pos_t clipPos(bamAlign.pos);
candidates.push_back(GetSplitSVCandidate(opt,bamRead.target_id(),clipPos,clipPos, svSource, isComplex));
}

if (trailingClipLen >= opt.minSoftClipLen)
{
const pos_t clipPos(bamAlign.pos + apath_ref_length(bamAlign.path));
candidates.push_back(GetSplitSVCandidate(opt,bamRead.target_id(),clipPos,clipPos, svSource, isComplex));
}
}
#endif



static
void
Expand Down Expand Up @@ -818,15 +736,7 @@ getSingleReadSVCandidates(
log_os << logtag << " post-indels candidate_size: " << candidates.size() << "\n";
#endif

// - process soft-clip in the localRead:
//
// NOTE : theoretically the semi-aligned function will now pick up all soft-clip cases
//
// getSVCandidatesFromReadClip(opt, localRead, localAlign, candidates);
#ifdef DEBUG_SCANNER
log_os << logtag << " post-clip candidate_size: " << candidates.size() << "\n";
#endif

// this detects semi-aligned AND soft-clip now:
getSVCandidatesFromSemiAligned(opt, localRead, localAlign, refSeq,
candidates);
#ifdef DEBUG_SCANNER
Expand Down Expand Up @@ -1196,21 +1106,7 @@ isLocalAssemblyEvidence(
}

//
// soft-clipping:
//
// NOTE these cases should be caught be the semi-aligned function now:
//
// {
// unsigned leadingClipLen(0), trailingClipLen(0);
// getSVBreakendCandidateClip(bamRead, bamAlign.path, leadingClipLen, trailingClipLen);
// if ((leadingClipLen >= _opt.minSoftClipLen) || (trailingClipLen >= _opt.minSoftClipLen))
// {
// return true;
// }
// }

//
// semi-aligned read ends:
// semi-aligned AND soft-clipped read ends:
//
{
unsigned leadingMismatchLen(0), trailingMismatchLen(0);
Expand Down
14 changes: 0 additions & 14 deletions src/c++/lib/manta/SVLocusScanner.hh
Original file line number Diff line number Diff line change
Expand Up @@ -62,20 +62,6 @@ struct SVObservationWeights
};


/// check bam record for soft-clipping which is interesting enough to be used as SV evidence:
///
/// \param[in] minQ
/// \param[in] minQFrac this fraction of bases must have qual>=minQ within the clipped region
///
void
getSVBreakendCandidateClip(
const bam_record& bamRead,
const ALIGNPATH::path_t& apath,
unsigned& leadingClipLen,
unsigned& trailingClipLen,
const uint8_t minQ = 20,
const float minQFrac = 0.75);


bool
isGoodShadow(
Expand Down
23 changes: 6 additions & 17 deletions src/c++/lib/options/ReadScannerOptions.hh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ struct ReadScannerOptions
splitBreakendSizeFraction(0.1),
maxSplitBreakendSize(100),
minSplitBreakendSize(10),
// minSoftClipLen(8),
minSemiAlignedMismatchLen(8),
// These numbers are based on checking a few dozens reads
// and might need some fine-tuning
Expand All @@ -44,17 +43,10 @@ struct ReadScannerOptions

unsigned minMapq;

/// report breakend regions with x prob regions removed from each edge
float breakendEdgeTrimProb;

/// report a pair as "proper pair" if fragment size is within x prob region removed from each edge
float properPairTrimProb;

/// add a pair to the evidence pool if frag size is within x prob region removed from each edge
float evidenceTrimProb;

/// ignore indels smaller than this when building graph:
unsigned minCandidateVariantSize;
float breakendEdgeTrimProb; ///< report breakend regions with x prob regions removed from each edge
float properPairTrimProb; ///< report a pair as "proper pair" if fragment size is within x prob region removed from each edge
float evidenceTrimProb; ///< add a pair to the evidence pool if frag size is within x prob region removed from each edge
unsigned minCandidateVariantSize; ///< ignore indels smaller than this when building graph:

// whenever a breakend is predicted from a read pair junction, the predicted breakend range should be no
// smaller than this:
Expand All @@ -72,10 +64,7 @@ struct ReadScannerOptions
// smaller than this:
unsigned minSplitBreakendSize;

// Soft clipped read ends must be of at least this length to be entered as small SV evidence
// unsigned minSoftClipLen;

// Semi-aligned regions need to be at least this long to be included as SV evidence
// Semi-aligned regions (including soft-clipped) need to be at least this long to be included as SV evidence
unsigned minSemiAlignedMismatchLen;

// Accept semi-aligned reads with at least this hypothesis score, different for graph and candidate generation
Expand All @@ -87,6 +76,6 @@ struct ReadScannerOptions
unsigned minSingletonMapqCandidates;

bool isIgnoreAnomProperPair; ///< typically set true for RNA-Seq analysis, where proper-pair is used to signal intron-spanning pairs

float maxDepthFactor; ///< the maximum depth at which input reads are considered in graph creation/assembly, etc. (when avg chrom depths are provided)
};

0 comments on commit 18ed0b3

Please sign in to comment.