-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
26 changed files
with
7,774 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
AllTests | ||
dfinder | ||
*.pyc | ||
*.bam | ||
*.o | ||
*~ | ||
result* | ||
output | ||
CMakeLists.txt.user | ||
build/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#include "BamStatCalculator.h" | ||
#include "error.h" | ||
|
||
#include <numeric> | ||
#include <cmath> | ||
#include <algorithm> | ||
|
||
using namespace std; | ||
using namespace BamTools; | ||
|
||
BamStatCalculator::BamStatCalculator(const string &filename) : | ||
insertMean(-1), insertSd(-1) | ||
{ | ||
if (!reader.Open(filename)) | ||
error("Could not open the input BAM file."); | ||
loadInserts(); | ||
} | ||
|
||
BamStatCalculator::~BamStatCalculator() | ||
{ | ||
reader.Close(); | ||
} | ||
|
||
int BamStatCalculator::getInsertMean() | ||
{ | ||
if (insertMean == -1) { | ||
insertMean = mean(); | ||
} | ||
return insertMean; | ||
} | ||
|
||
int BamStatCalculator::getInsertSd() | ||
{ | ||
if (insertSd == -1) { | ||
insertSd = sd(); | ||
} | ||
return insertSd; | ||
} | ||
|
||
void BamStatCalculator::loadInserts() | ||
{ | ||
BamAlignment al; | ||
size_t cnt = 0; | ||
while (reader.GetNextAlignmentCore(al) && cnt < 10000) | ||
{ | ||
if (al.IsProperPair() && al.MatePosition > al.Position) | ||
{ | ||
uint64_t insert = al.MatePosition + al.Length - al.Position; | ||
if (insert < 10000) { | ||
inserts.push_back(insert); | ||
cnt++; | ||
} | ||
} | ||
} | ||
} | ||
|
||
int BamStatCalculator::mean() | ||
{ | ||
return accumulate(inserts.begin(), inserts.end(), 0) / inserts.size(); | ||
} | ||
|
||
|
||
int BamStatCalculator::sd() | ||
{ | ||
int m = getInsertMean(); | ||
vector<int> temp; | ||
transform(inserts.begin(), inserts.end(), back_inserter(temp), [](int x) { return x*x; }); | ||
uint32_t sum = accumulate(temp.begin(), temp.end(), 0); | ||
return sqrt( sum / temp.size() - m * m); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#ifndef BAMSTATCALCULATOR_H | ||
#define BAMSTATCALCULATOR_H | ||
|
||
#include "api/BamReader.h" | ||
#include <string> | ||
#include <vector> | ||
|
||
class BamStatCalculator | ||
{ | ||
public: | ||
BamStatCalculator(const std::string& filename); | ||
virtual ~BamStatCalculator(); | ||
|
||
int getInsertMean(); | ||
int getInsertSd(); | ||
|
||
private: | ||
void loadInserts(); | ||
int mean(); | ||
int sd(); | ||
|
||
BamTools::BamReader reader; | ||
std::vector<int> inserts; | ||
int insertMean; | ||
int insertSd; | ||
}; | ||
|
||
#endif // BAMSTATCALCULATOR_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
cmake_minimum_required(VERSION 2.8) | ||
|
||
project(sprites) | ||
|
||
include_directories($ENV{BAMTOOLS_HOME}/include $ENV{HTSLIB_HOME}) | ||
#link_directories($ENV{BAMTOOLS_HOME}/lib $ENV{HTSLIB_HOME}) | ||
add_definitions(-std=c++0x) | ||
|
||
add_executable(sprites main.cpp error.cpp Helper.cpp | ||
Deletion.cpp Thirdparty/overlapper.cpp BamStatCalculator.cpp ClipReader.cpp clip.cpp FaidxWrapper.cpp range.cpp) | ||
target_link_libraries(sprites $ENV{HTSLIB_HOME}/libhts.a $ENV{BAMTOOLS_HOME}/lib/libbamtools.a pthread z) | ||
|
||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall") | ||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g -O2 -Wall") | ||
|
||
if(CMAKE_BUILD_TYPE MATCHES DEBUG) | ||
message(${CMAKE_CXX_FLAGS_DEBUG}) | ||
else(CMAKE_BUILD_TYPE MATCHES DEBUG) | ||
message(${CMAKE_CXX_FLAGS_RELEASE}) | ||
endif(CMAKE_BUILD_TYPE MATCHES DEBUG) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#include "ClipReader.h" | ||
#include "error.h" | ||
#include "api/BamAlgorithms.h" | ||
|
||
using namespace std; | ||
using namespace BamTools; | ||
|
||
ClipReader::ClipReader(const string &filename, int allowedNum, int mode, int minMapQual, int isizeCutoff) | ||
: allowedNum(allowedNum), mode(mode), minMapQual(minMapQual), isizeCutoff(isizeCutoff) | ||
{ | ||
if (!reader.Open(filename)) | ||
error("Could not open the input BAM file."); | ||
if (!reader.LocateIndex()) | ||
error("Could not locate the index file"); | ||
} | ||
|
||
ClipReader::~ClipReader() | ||
{ | ||
reader.Close(); | ||
} | ||
|
||
bool ClipReader::setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition) | ||
{ | ||
return reader.SetRegion(leftRefId, leftPosition, rightRefId, rightPosition); | ||
} | ||
|
||
int ClipReader::getReferenceId(const string &referenceName) | ||
{ | ||
return reader.GetReferenceID(referenceName); | ||
} | ||
|
||
string ClipReader::getReferenceName(int referenceId) | ||
{ | ||
assert(referenceId >= 0 && referenceId < reader.GetReferenceCount()); | ||
return reader.GetReferenceData()[referenceId].RefName; | ||
} | ||
|
||
AbstractClip *ClipReader::nextClip() { | ||
BamAlignment al; | ||
while (reader.GetNextAlignment(al)) { | ||
vector<int> clipSizes, readPositions, genomePositions; | ||
// if (!al.GetSoftClips(clipSizes, readPositions, genomePositions)) continue; | ||
if (al.MapQuality < minMapQual || !al.GetSoftClips(clipSizes, readPositions, genomePositions)) continue; | ||
int size = clipSizes.size(); | ||
|
||
if (al.IsProperPair()) { | ||
if (!al.IsReverseStrand() && al.Position == genomePositions[0] && | ||
clipSizes[0] >= allowedNum && | ||
(size == 1 || | ||
(size == 2 && clipSizes[1] <= 5))) { | ||
return new ForwardBClip(al.RefID, | ||
al.Position + 1, | ||
genomePositions[0] + 1, | ||
al.MatePosition + 1, | ||
al.QueryBases, | ||
al.CigarData); | ||
} | ||
if (al.IsReverseStrand() && al.Position != genomePositions[size - 1] && | ||
clipSizes[size - 1] >= allowedNum && | ||
(size == 1 || | ||
(size == 2 && clipSizes[0] <= 5))) { | ||
return new ReverseEClip(al.RefID, | ||
al.Position + 1, | ||
genomePositions[size - 1] + 1, | ||
al.MatePosition + 1, | ||
al.QueryBases, | ||
al.CigarData); | ||
} | ||
} | ||
|
||
if (inEnhancedMode()) { | ||
if (al.RefID != al.MateRefID || abs(al.InsertSize) <= isizeCutoff) | ||
continue; | ||
if ((al.AlignmentFlag == 161 || al.AlignmentFlag == 97) && al.Position < al.MatePosition && | ||
clipSizes[size - 1] >= allowedNum && | ||
(size == 1 || (size == 2 && clipSizes[0] <= 5))) { | ||
return new ForwardEClip(al.RefID, | ||
al.Position + 1, | ||
genomePositions[size - 1] + 1, | ||
al.MatePosition + 1, | ||
al.QueryBases, | ||
al.CigarData); | ||
} | ||
if ((al.AlignmentFlag == 81 || al.AlignmentFlag == 145) && al.Position > al.MatePosition && | ||
clipSizes[0] >= allowedNum && | ||
(size == 1 || (size == 2 && clipSizes[1] <= 5))) { | ||
return new ReverseBClip(al.RefID, | ||
al.Position + 1, | ||
genomePositions[0] + 1, | ||
al.MatePosition + 1, | ||
al.QueryBases, | ||
al.CigarData); | ||
} | ||
} | ||
|
||
} | ||
return NULL; | ||
} | ||
|
||
bool ClipReader::inEnhancedMode() const | ||
{ | ||
return mode == 1; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
#ifndef CLIPREADER_H | ||
#define CLIPREADER_H | ||
|
||
#include "clip.h" | ||
|
||
class ClipReader | ||
{ | ||
public: | ||
// 0 indicates the standard mode and 1 indicates the enhanced mode, which reads reads of type 2 besides type 1 | ||
ClipReader(const std::string& filename, int allowedNum, int mode, int minMapQual, int isizeCutoff); | ||
virtual ~ClipReader(); | ||
|
||
bool setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition); | ||
|
||
int getReferenceId(const std::string& referenceName); | ||
std::string getReferenceName(int referenceId); | ||
|
||
int getAllowedNum() const; | ||
|
||
AbstractClip* nextClip(); | ||
|
||
private: | ||
BamTools::BamReader reader; | ||
int allowedNum; | ||
int mode; | ||
int minMapQual; | ||
int isizeCutoff; | ||
|
||
bool inEnhancedMode() const; | ||
}; | ||
|
||
#endif // CLIPREADER_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#include "Deletion.h" | ||
#include "Helper.h" | ||
#include <cassert> | ||
#include <sstream> | ||
|
||
using namespace std; | ||
|
||
Deletion::Deletion(const string &referenceName, | ||
int start1, | ||
int end1, | ||
int start2, | ||
int end2, | ||
int length, | ||
const string& fromTag) : | ||
referenceName(referenceName), | ||
start1(start1), | ||
end1(end1), | ||
start2(start2), | ||
end2(end2), | ||
length(length), | ||
fromTag(fromTag) { | ||
assert(checkRep()); | ||
} | ||
|
||
Deletion::~Deletion() { | ||
} | ||
|
||
string Deletion::toBedpe() const { | ||
stringstream fmt; | ||
fmt << referenceName << "\t" << start1 - 1 << "\t" << end1 << "\t" | ||
<< referenceName << "\t" << start2 - 1 << "\t" << end2; | ||
return fmt.str(); | ||
} | ||
|
||
bool Deletion::overlaps(const Deletion &other) const | ||
{ | ||
if (referenceName != other.referenceName) return false; | ||
return ((start1-1 >= other.start1-1 && start1-1 <= other.end1) || | ||
(other.start1-1 >= start1-1 && other.start1-1 <= end1)) && | ||
((start2-1 >= other.start2-1 && start2-1 <= other.end2) || | ||
(other.start2-1 >= start2-1 && other.start2-1 <= end2)); | ||
} | ||
|
||
bool Deletion::operator<(const Deletion &other) const | ||
{ | ||
if (referenceName != other.referenceName) return referenceName < other.referenceName; | ||
if (start1 != other.start1) return start1 < other.start1; | ||
if (start2 != other.start2) return start2 < other.start2; | ||
if (end1 != other.end1) return end1 < other.end1; | ||
return end2 < other.end2; | ||
} | ||
|
||
bool Deletion::operator==(const Deletion &other) const | ||
{ | ||
return referenceName == other.referenceName && | ||
start1 == other.start1 && start2 == other.start2 && | ||
end1 == other.end1 && end2 == other.end2; | ||
} | ||
|
||
std::ostream& operator <<(ostream &stream, const Deletion &del) | ||
{ | ||
stream << del.toBedpe(); | ||
return stream; | ||
} | ||
|
||
bool Deletion::checkRep() const | ||
{ | ||
return (start1 <= end1) && | ||
(start2 <= end2) && | ||
(length <= Helper::SVLEN_THRESHOLD); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#ifndef _DELETION_H_ | ||
#define _DELETION_H_ | ||
|
||
#include <string> | ||
#include <iostream> | ||
|
||
class Deletion { | ||
public: | ||
Deletion(const std::string& referenceName, | ||
int start1, | ||
int end1, | ||
int start2, | ||
int end2, | ||
int length, | ||
const std::string& fromTag); | ||
|
||
virtual ~Deletion(); | ||
|
||
std::string getReferenceName() const { return referenceName; } | ||
|
||
int getStart1() const { return start1; } | ||
|
||
int getEnd1() const { return end1; } | ||
|
||
int getStart2() const { return start2; } | ||
|
||
int getEnd2() const { return end2; } | ||
|
||
int getLength() const { return length; } | ||
|
||
std::string getFromTag() const { return fromTag; } | ||
|
||
std::string toBedpe() const; | ||
|
||
friend std::ostream& operator <<(std::ostream& stream, const Deletion& del); | ||
|
||
bool overlaps(const Deletion &other) const; | ||
bool operator<(const Deletion &other) const; | ||
bool operator==(const Deletion &other) const; | ||
|
||
private: | ||
std::string referenceName; | ||
int start1; | ||
int end1; | ||
int start2; | ||
int end2; | ||
int length; | ||
std::string fromTag; | ||
|
||
bool checkRep() const; | ||
|
||
}; | ||
|
||
#endif /* _DELETION_H_ */ |
Oops, something went wrong.