Skip to content

Commit

Permalink
Port of sprites 0.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangzhen committed Oct 22, 2016
1 parent fa4f1a5 commit 5dff421
Show file tree
Hide file tree
Showing 26 changed files with 7,774 additions and 0 deletions.
10 changes: 10 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
AllTests
dfinder
*.pyc
*.bam
*.o
*~
result*
output
CMakeLists.txt.user
build/
70 changes: 70 additions & 0 deletions BamStatCalculator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#include "BamStatCalculator.h"
#include "error.h"

#include <numeric>
#include <cmath>
#include <algorithm>

using namespace std;
using namespace BamTools;

BamStatCalculator::BamStatCalculator(const string &filename) :
insertMean(-1), insertSd(-1)
{
if (!reader.Open(filename))
error("Could not open the input BAM file.");
loadInserts();
}

BamStatCalculator::~BamStatCalculator()
{
reader.Close();
}

int BamStatCalculator::getInsertMean()
{
if (insertMean == -1) {
insertMean = mean();
}
return insertMean;
}

int BamStatCalculator::getInsertSd()
{
if (insertSd == -1) {
insertSd = sd();
}
return insertSd;
}

void BamStatCalculator::loadInserts()
{
BamAlignment al;
size_t cnt = 0;
while (reader.GetNextAlignmentCore(al) && cnt < 10000)
{
if (al.IsProperPair() && al.MatePosition > al.Position)
{
uint64_t insert = al.MatePosition + al.Length - al.Position;
if (insert < 10000) {
inserts.push_back(insert);
cnt++;
}
}
}
}

int BamStatCalculator::mean()
{
return accumulate(inserts.begin(), inserts.end(), 0) / inserts.size();
}


int BamStatCalculator::sd()
{
int m = getInsertMean();
vector<int> temp;
transform(inserts.begin(), inserts.end(), back_inserter(temp), [](int x) { return x*x; });
uint32_t sum = accumulate(temp.begin(), temp.end(), 0);
return sqrt( sum / temp.size() - m * m);
}
28 changes: 28 additions & 0 deletions BamStatCalculator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef BAMSTATCALCULATOR_H
#define BAMSTATCALCULATOR_H

#include "api/BamReader.h"
#include <string>
#include <vector>

class BamStatCalculator
{
public:
BamStatCalculator(const std::string& filename);
virtual ~BamStatCalculator();

int getInsertMean();
int getInsertSd();

private:
void loadInserts();
int mean();
int sd();

BamTools::BamReader reader;
std::vector<int> inserts;
int insertMean;
int insertSd;
};

#endif // BAMSTATCALCULATOR_H
21 changes: 21 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 2.8)

project(sprites)

include_directories($ENV{BAMTOOLS_HOME}/include $ENV{HTSLIB_HOME})
#link_directories($ENV{BAMTOOLS_HOME}/lib $ENV{HTSLIB_HOME})
add_definitions(-std=c++0x)

add_executable(sprites main.cpp error.cpp Helper.cpp
Deletion.cpp Thirdparty/overlapper.cpp BamStatCalculator.cpp ClipReader.cpp clip.cpp FaidxWrapper.cpp range.cpp)
target_link_libraries(sprites $ENV{HTSLIB_HOME}/libhts.a $ENV{BAMTOOLS_HOME}/lib/libbamtools.a pthread z)

set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g -O2 -Wall")

if(CMAKE_BUILD_TYPE MATCHES DEBUG)
message(${CMAKE_CXX_FLAGS_DEBUG})
else(CMAKE_BUILD_TYPE MATCHES DEBUG)
message(${CMAKE_CXX_FLAGS_RELEASE})
endif(CMAKE_BUILD_TYPE MATCHES DEBUG)

103 changes: 103 additions & 0 deletions ClipReader.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#include "ClipReader.h"
#include "error.h"
#include "api/BamAlgorithms.h"

using namespace std;
using namespace BamTools;

ClipReader::ClipReader(const string &filename, int allowedNum, int mode, int minMapQual, int isizeCutoff)
: allowedNum(allowedNum), mode(mode), minMapQual(minMapQual), isizeCutoff(isizeCutoff)
{
if (!reader.Open(filename))
error("Could not open the input BAM file.");
if (!reader.LocateIndex())
error("Could not locate the index file");
}

ClipReader::~ClipReader()
{
reader.Close();
}

bool ClipReader::setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition)
{
return reader.SetRegion(leftRefId, leftPosition, rightRefId, rightPosition);
}

int ClipReader::getReferenceId(const string &referenceName)
{
return reader.GetReferenceID(referenceName);
}

string ClipReader::getReferenceName(int referenceId)
{
assert(referenceId >= 0 && referenceId < reader.GetReferenceCount());
return reader.GetReferenceData()[referenceId].RefName;
}

AbstractClip *ClipReader::nextClip() {
BamAlignment al;
while (reader.GetNextAlignment(al)) {
vector<int> clipSizes, readPositions, genomePositions;
// if (!al.GetSoftClips(clipSizes, readPositions, genomePositions)) continue;
if (al.MapQuality < minMapQual || !al.GetSoftClips(clipSizes, readPositions, genomePositions)) continue;
int size = clipSizes.size();

if (al.IsProperPair()) {
if (!al.IsReverseStrand() && al.Position == genomePositions[0] &&
clipSizes[0] >= allowedNum &&
(size == 1 ||
(size == 2 && clipSizes[1] <= 5))) {
return new ForwardBClip(al.RefID,
al.Position + 1,
genomePositions[0] + 1,
al.MatePosition + 1,
al.QueryBases,
al.CigarData);
}
if (al.IsReverseStrand() && al.Position != genomePositions[size - 1] &&
clipSizes[size - 1] >= allowedNum &&
(size == 1 ||
(size == 2 && clipSizes[0] <= 5))) {
return new ReverseEClip(al.RefID,
al.Position + 1,
genomePositions[size - 1] + 1,
al.MatePosition + 1,
al.QueryBases,
al.CigarData);
}
}

if (inEnhancedMode()) {
if (al.RefID != al.MateRefID || abs(al.InsertSize) <= isizeCutoff)
continue;
if ((al.AlignmentFlag == 161 || al.AlignmentFlag == 97) && al.Position < al.MatePosition &&
clipSizes[size - 1] >= allowedNum &&
(size == 1 || (size == 2 && clipSizes[0] <= 5))) {
return new ForwardEClip(al.RefID,
al.Position + 1,
genomePositions[size - 1] + 1,
al.MatePosition + 1,
al.QueryBases,
al.CigarData);
}
if ((al.AlignmentFlag == 81 || al.AlignmentFlag == 145) && al.Position > al.MatePosition &&
clipSizes[0] >= allowedNum &&
(size == 1 || (size == 2 && clipSizes[1] <= 5))) {
return new ReverseBClip(al.RefID,
al.Position + 1,
genomePositions[0] + 1,
al.MatePosition + 1,
al.QueryBases,
al.CigarData);
}
}

}
return NULL;
}

bool ClipReader::inEnhancedMode() const
{
return mode == 1;
}
32 changes: 32 additions & 0 deletions ClipReader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#ifndef CLIPREADER_H
#define CLIPREADER_H

#include "clip.h"

class ClipReader
{
public:
// 0 indicates the standard mode and 1 indicates the enhanced mode, which reads reads of type 2 besides type 1
ClipReader(const std::string& filename, int allowedNum, int mode, int minMapQual, int isizeCutoff);
virtual ~ClipReader();

bool setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition);

int getReferenceId(const std::string& referenceName);
std::string getReferenceName(int referenceId);

int getAllowedNum() const;

AbstractClip* nextClip();

private:
BamTools::BamReader reader;
int allowedNum;
int mode;
int minMapQual;
int isizeCutoff;

bool inEnhancedMode() const;
};

#endif // CLIPREADER_H
71 changes: 71 additions & 0 deletions Deletion.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "Deletion.h"
#include "Helper.h"
#include <cassert>
#include <sstream>

using namespace std;

Deletion::Deletion(const string &referenceName,
int start1,
int end1,
int start2,
int end2,
int length,
const string& fromTag) :
referenceName(referenceName),
start1(start1),
end1(end1),
start2(start2),
end2(end2),
length(length),
fromTag(fromTag) {
assert(checkRep());
}

Deletion::~Deletion() {
}

string Deletion::toBedpe() const {
stringstream fmt;
fmt << referenceName << "\t" << start1 - 1 << "\t" << end1 << "\t"
<< referenceName << "\t" << start2 - 1 << "\t" << end2;
return fmt.str();
}

bool Deletion::overlaps(const Deletion &other) const
{
if (referenceName != other.referenceName) return false;
return ((start1-1 >= other.start1-1 && start1-1 <= other.end1) ||
(other.start1-1 >= start1-1 && other.start1-1 <= end1)) &&
((start2-1 >= other.start2-1 && start2-1 <= other.end2) ||
(other.start2-1 >= start2-1 && other.start2-1 <= end2));
}

bool Deletion::operator<(const Deletion &other) const
{
if (referenceName != other.referenceName) return referenceName < other.referenceName;
if (start1 != other.start1) return start1 < other.start1;
if (start2 != other.start2) return start2 < other.start2;
if (end1 != other.end1) return end1 < other.end1;
return end2 < other.end2;
}

bool Deletion::operator==(const Deletion &other) const
{
return referenceName == other.referenceName &&
start1 == other.start1 && start2 == other.start2 &&
end1 == other.end1 && end2 == other.end2;
}

std::ostream& operator <<(ostream &stream, const Deletion &del)
{
stream << del.toBedpe();
return stream;
}

bool Deletion::checkRep() const
{
return (start1 <= end1) &&
(start2 <= end2) &&
(length <= Helper::SVLEN_THRESHOLD);
}
54 changes: 54 additions & 0 deletions Deletion.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#ifndef _DELETION_H_
#define _DELETION_H_

#include <string>
#include <iostream>

class Deletion {
public:
Deletion(const std::string& referenceName,
int start1,
int end1,
int start2,
int end2,
int length,
const std::string& fromTag);

virtual ~Deletion();

std::string getReferenceName() const { return referenceName; }

int getStart1() const { return start1; }

int getEnd1() const { return end1; }

int getStart2() const { return start2; }

int getEnd2() const { return end2; }

int getLength() const { return length; }

std::string getFromTag() const { return fromTag; }

std::string toBedpe() const;

friend std::ostream& operator <<(std::ostream& stream, const Deletion& del);

bool overlaps(const Deletion &other) const;
bool operator<(const Deletion &other) const;
bool operator==(const Deletion &other) const;

private:
std::string referenceName;
int start1;
int end1;
int start2;
int end2;
int length;
std::string fromTag;

bool checkRep() const;

};

#endif /* _DELETION_H_ */
Loading

0 comments on commit 5dff421

Please sign in to comment.