From 5dff42155339413c6237cddd0f0ea6e1ec531506 Mon Sep 17 00:00:00 2001 From: Zhen Zhang Date: Sat, 22 Oct 2016 08:33:27 +0000 Subject: [PATCH] Port of sprites 0.3.0 --- .gitignore | 10 + BamStatCalculator.cpp | 70 + BamStatCalculator.h | 28 + CMakeLists.txt | 21 + ClipReader.cpp | 103 + ClipReader.h | 32 + Deletion.cpp | 71 + Deletion.h | 54 + FaidxWrapper.cpp | 31 + FaidxWrapper.h | 19 + Helper.cpp | 56 + Helper.h | 65 + README.md | 50 + SoftClipReader.cpp | 105 + SoftClipReader.h | 31 + Thirdparty/Timer.h | 56 + Thirdparty/overlapper.cpp | 1417 +++++++++++++ Thirdparty/overlapper.h | 196 ++ clip.cpp | 620 ++++++ clip.h | 191 ++ easylogging++.h | 4003 +++++++++++++++++++++++++++++++++++++ error.cpp | 19 + error.h | 57 + main.cpp | 350 ++++ range.cpp | 89 + range.h | 30 + 26 files changed, 7774 insertions(+) create mode 100644 .gitignore create mode 100644 BamStatCalculator.cpp create mode 100644 BamStatCalculator.h create mode 100644 CMakeLists.txt create mode 100644 ClipReader.cpp create mode 100644 ClipReader.h create mode 100644 Deletion.cpp create mode 100644 Deletion.h create mode 100644 FaidxWrapper.cpp create mode 100644 FaidxWrapper.h create mode 100644 Helper.cpp create mode 100644 Helper.h create mode 100644 README.md create mode 100644 SoftClipReader.cpp create mode 100644 SoftClipReader.h create mode 100644 Thirdparty/Timer.h create mode 100644 Thirdparty/overlapper.cpp create mode 100644 Thirdparty/overlapper.h create mode 100644 clip.cpp create mode 100644 clip.h create mode 100644 easylogging++.h create mode 100644 error.cpp create mode 100644 error.h create mode 100644 main.cpp create mode 100644 range.cpp create mode 100644 range.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..49347c6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +AllTests +dfinder +*.pyc +*.bam +*.o +*~ +result* +output +CMakeLists.txt.user +build/ diff --git a/BamStatCalculator.cpp b/BamStatCalculator.cpp new file mode 100644 index 0000000..0e014e1 --- /dev/null +++ b/BamStatCalculator.cpp @@ -0,0 +1,70 @@ +#include "BamStatCalculator.h" +#include "error.h" + +#include +#include +#include + +using namespace std; +using namespace BamTools; + +BamStatCalculator::BamStatCalculator(const string &filename) : + insertMean(-1), insertSd(-1) +{ + if (!reader.Open(filename)) + error("Could not open the input BAM file."); + loadInserts(); +} + +BamStatCalculator::~BamStatCalculator() +{ + reader.Close(); +} + +int BamStatCalculator::getInsertMean() +{ + if (insertMean == -1) { + insertMean = mean(); + } + return insertMean; +} + +int BamStatCalculator::getInsertSd() +{ + if (insertSd == -1) { + insertSd = sd(); + } + return insertSd; +} + +void BamStatCalculator::loadInserts() +{ + BamAlignment al; + size_t cnt = 0; + while (reader.GetNextAlignmentCore(al) && cnt < 10000) + { + if (al.IsProperPair() && al.MatePosition > al.Position) + { + uint64_t insert = al.MatePosition + al.Length - al.Position; + if (insert < 10000) { + inserts.push_back(insert); + cnt++; + } + } + } +} + +int BamStatCalculator::mean() +{ + return accumulate(inserts.begin(), inserts.end(), 0) / inserts.size(); +} + + +int BamStatCalculator::sd() +{ + int m = getInsertMean(); + vector temp; + transform(inserts.begin(), inserts.end(), back_inserter(temp), [](int x) { return x*x; }); + uint32_t sum = accumulate(temp.begin(), temp.end(), 0); + return sqrt( sum / temp.size() - m * m); +} diff --git a/BamStatCalculator.h b/BamStatCalculator.h new file mode 100644 index 0000000..dbc40e7 --- /dev/null +++ b/BamStatCalculator.h @@ -0,0 +1,28 @@ +#ifndef BAMSTATCALCULATOR_H +#define BAMSTATCALCULATOR_H + +#include "api/BamReader.h" +#include +#include + +class BamStatCalculator +{ +public: + BamStatCalculator(const std::string& filename); + virtual ~BamStatCalculator(); + + int getInsertMean(); + int getInsertSd(); + +private: + void loadInserts(); + int mean(); + int sd(); + + BamTools::BamReader reader; + std::vector inserts; + int insertMean; + int insertSd; +}; + +#endif // BAMSTATCALCULATOR_H diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..9653cb9 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 2.8) + +project(sprites) + +include_directories($ENV{BAMTOOLS_HOME}/include $ENV{HTSLIB_HOME}) +#link_directories($ENV{BAMTOOLS_HOME}/lib $ENV{HTSLIB_HOME}) +add_definitions(-std=c++0x) + +add_executable(sprites main.cpp error.cpp Helper.cpp +Deletion.cpp Thirdparty/overlapper.cpp BamStatCalculator.cpp ClipReader.cpp clip.cpp FaidxWrapper.cpp range.cpp) +target_link_libraries(sprites $ENV{HTSLIB_HOME}/libhts.a $ENV{BAMTOOLS_HOME}/lib/libbamtools.a pthread z) + +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g -O2 -Wall") + +if(CMAKE_BUILD_TYPE MATCHES DEBUG) + message(${CMAKE_CXX_FLAGS_DEBUG}) +else(CMAKE_BUILD_TYPE MATCHES DEBUG) + message(${CMAKE_CXX_FLAGS_RELEASE}) +endif(CMAKE_BUILD_TYPE MATCHES DEBUG) + diff --git a/ClipReader.cpp b/ClipReader.cpp new file mode 100644 index 0000000..210a317 --- /dev/null +++ b/ClipReader.cpp @@ -0,0 +1,103 @@ +#include "ClipReader.h" +#include "error.h" +#include "api/BamAlgorithms.h" + +using namespace std; +using namespace BamTools; + +ClipReader::ClipReader(const string &filename, int allowedNum, int mode, int minMapQual, int isizeCutoff) + : allowedNum(allowedNum), mode(mode), minMapQual(minMapQual), isizeCutoff(isizeCutoff) +{ + if (!reader.Open(filename)) + error("Could not open the input BAM file."); + if (!reader.LocateIndex()) + error("Could not locate the index file"); +} + +ClipReader::~ClipReader() +{ + reader.Close(); +} + +bool ClipReader::setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition) +{ + return reader.SetRegion(leftRefId, leftPosition, rightRefId, rightPosition); +} + +int ClipReader::getReferenceId(const string &referenceName) +{ + return reader.GetReferenceID(referenceName); +} + +string ClipReader::getReferenceName(int referenceId) +{ + assert(referenceId >= 0 && referenceId < reader.GetReferenceCount()); + return reader.GetReferenceData()[referenceId].RefName; +} + +AbstractClip *ClipReader::nextClip() { + BamAlignment al; + while (reader.GetNextAlignment(al)) { + vector clipSizes, readPositions, genomePositions; +// if (!al.GetSoftClips(clipSizes, readPositions, genomePositions)) continue; + if (al.MapQuality < minMapQual || !al.GetSoftClips(clipSizes, readPositions, genomePositions)) continue; + int size = clipSizes.size(); + + if (al.IsProperPair()) { + if (!al.IsReverseStrand() && al.Position == genomePositions[0] && + clipSizes[0] >= allowedNum && + (size == 1 || + (size == 2 && clipSizes[1] <= 5))) { + return new ForwardBClip(al.RefID, + al.Position + 1, + genomePositions[0] + 1, + al.MatePosition + 1, + al.QueryBases, + al.CigarData); + } + if (al.IsReverseStrand() && al.Position != genomePositions[size - 1] && + clipSizes[size - 1] >= allowedNum && + (size == 1 || + (size == 2 && clipSizes[0] <= 5))) { + return new ReverseEClip(al.RefID, + al.Position + 1, + genomePositions[size - 1] + 1, + al.MatePosition + 1, + al.QueryBases, + al.CigarData); + } + } + + if (inEnhancedMode()) { + if (al.RefID != al.MateRefID || abs(al.InsertSize) <= isizeCutoff) + continue; + if ((al.AlignmentFlag == 161 || al.AlignmentFlag == 97) && al.Position < al.MatePosition && + clipSizes[size - 1] >= allowedNum && + (size == 1 || (size == 2 && clipSizes[0] <= 5))) { + return new ForwardEClip(al.RefID, + al.Position + 1, + genomePositions[size - 1] + 1, + al.MatePosition + 1, + al.QueryBases, + al.CigarData); + } + if ((al.AlignmentFlag == 81 || al.AlignmentFlag == 145) && al.Position > al.MatePosition && + clipSizes[0] >= allowedNum && + (size == 1 || (size == 2 && clipSizes[1] <= 5))) { + return new ReverseBClip(al.RefID, + al.Position + 1, + genomePositions[0] + 1, + al.MatePosition + 1, + al.QueryBases, + al.CigarData); + } + } + + } + return NULL; +} + +bool ClipReader::inEnhancedMode() const +{ + return mode == 1; +} diff --git a/ClipReader.h b/ClipReader.h new file mode 100644 index 0000000..c22fed6 --- /dev/null +++ b/ClipReader.h @@ -0,0 +1,32 @@ +#ifndef CLIPREADER_H +#define CLIPREADER_H + +#include "clip.h" + +class ClipReader +{ +public: + // 0 indicates the standard mode and 1 indicates the enhanced mode, which reads reads of type 2 besides type 1 + ClipReader(const std::string& filename, int allowedNum, int mode, int minMapQual, int isizeCutoff); + virtual ~ClipReader(); + + bool setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition); + + int getReferenceId(const std::string& referenceName); + std::string getReferenceName(int referenceId); + + int getAllowedNum() const; + + AbstractClip* nextClip(); + +private: + BamTools::BamReader reader; + int allowedNum; + int mode; + int minMapQual; + int isizeCutoff; + + bool inEnhancedMode() const; +}; + +#endif // CLIPREADER_H diff --git a/Deletion.cpp b/Deletion.cpp new file mode 100644 index 0000000..4b20325 --- /dev/null +++ b/Deletion.cpp @@ -0,0 +1,71 @@ +#include "Deletion.h" +#include "Helper.h" +#include +#include + +using namespace std; + +Deletion::Deletion(const string &referenceName, + int start1, + int end1, + int start2, + int end2, + int length, + const string& fromTag) : + referenceName(referenceName), + start1(start1), + end1(end1), + start2(start2), + end2(end2), + length(length), + fromTag(fromTag) { + assert(checkRep()); +} + +Deletion::~Deletion() { +} + +string Deletion::toBedpe() const { + stringstream fmt; + fmt << referenceName << "\t" << start1 - 1 << "\t" << end1 << "\t" + << referenceName << "\t" << start2 - 1 << "\t" << end2; + return fmt.str(); +} + +bool Deletion::overlaps(const Deletion &other) const +{ + if (referenceName != other.referenceName) return false; + return ((start1-1 >= other.start1-1 && start1-1 <= other.end1) || + (other.start1-1 >= start1-1 && other.start1-1 <= end1)) && + ((start2-1 >= other.start2-1 && start2-1 <= other.end2) || + (other.start2-1 >= start2-1 && other.start2-1 <= end2)); +} + +bool Deletion::operator<(const Deletion &other) const +{ + if (referenceName != other.referenceName) return referenceName < other.referenceName; + if (start1 != other.start1) return start1 < other.start1; + if (start2 != other.start2) return start2 < other.start2; + if (end1 != other.end1) return end1 < other.end1; + return end2 < other.end2; +} + +bool Deletion::operator==(const Deletion &other) const +{ + return referenceName == other.referenceName && + start1 == other.start1 && start2 == other.start2 && + end1 == other.end1 && end2 == other.end2; +} + +std::ostream& operator <<(ostream &stream, const Deletion &del) +{ + stream << del.toBedpe(); + return stream; +} + +bool Deletion::checkRep() const +{ + return (start1 <= end1) && + (start2 <= end2) && + (length <= Helper::SVLEN_THRESHOLD); +} diff --git a/Deletion.h b/Deletion.h new file mode 100644 index 0000000..0c1504d --- /dev/null +++ b/Deletion.h @@ -0,0 +1,54 @@ +#ifndef _DELETION_H_ +#define _DELETION_H_ + +#include +#include + +class Deletion { +public: + Deletion(const std::string& referenceName, + int start1, + int end1, + int start2, + int end2, + int length, + const std::string& fromTag); + + virtual ~Deletion(); + + std::string getReferenceName() const { return referenceName; } + + int getStart1() const { return start1; } + + int getEnd1() const { return end1; } + + int getStart2() const { return start2; } + + int getEnd2() const { return end2; } + + int getLength() const { return length; } + + std::string getFromTag() const { return fromTag; } + + std::string toBedpe() const; + + friend std::ostream& operator <<(std::ostream& stream, const Deletion& del); + + bool overlaps(const Deletion &other) const; + bool operator<(const Deletion &other) const; + bool operator==(const Deletion &other) const; + +private: + std::string referenceName; + int start1; + int end1; + int start2; + int end2; + int length; + std::string fromTag; + + bool checkRep() const; + +}; + +#endif /* _DELETION_H_ */ diff --git a/FaidxWrapper.cpp b/FaidxWrapper.cpp new file mode 100644 index 0000000..c7167f8 --- /dev/null +++ b/FaidxWrapper.cpp @@ -0,0 +1,31 @@ +#include "FaidxWrapper.h" +#include "error.h" +#include + +using namespace std; + +FaidxWrapper::FaidxWrapper(const std::string &fasta) +{ + fai = fai_load(fasta.c_str()); + if (fai == NULL) error("Cannot load the indexed fasta."); +} + +FaidxWrapper::~FaidxWrapper() +{ + if (fai != NULL) fai_destroy(fai); +} + +int FaidxWrapper::size() +{ + return faidx_nseq(fai); +} + +string FaidxWrapper::fetch(const string &chrom, int start, int end) +{ + int len; + char *s = faidx_fetch_seq(fai, (char *)chrom.c_str(), start - 1, end - 1, &len); + if (s == NULL) error("cannot fetch the reference sequence"); + string str(s); + transform(str.begin(), str.end(), str.begin(), ::toupper); + return str; +} diff --git a/FaidxWrapper.h b/FaidxWrapper.h new file mode 100644 index 0000000..fbe2541 --- /dev/null +++ b/FaidxWrapper.h @@ -0,0 +1,19 @@ +#ifndef FAIDXWRAPPER_H +#define FAIDXWRAPPER_H + +#include "htslib/faidx.h" +#include + +class FaidxWrapper +{ +public: + FaidxWrapper(const std::string& fasta); + virtual ~FaidxWrapper(); + int size(); + std::string fetch(const std::string& chrom, int start, int end); + +private: + faidx_t *fai; +}; + +#endif // FAIDXWRAPPER_H diff --git a/Helper.cpp b/Helper.cpp new file mode 100644 index 0000000..1a1b6af --- /dev/null +++ b/Helper.cpp @@ -0,0 +1,56 @@ +#include "Helper.h" + +using namespace std; + +// Strip the leading directories and +// the last trailling suffix from a filename +string stripFilename(const string& filename) { + string out = stripDirectories(filename); + return stripExtension(out); +} + +// Remove a single file extension from the filename +string stripExtension(const string& filename) { + size_t suffixPos = filename.find_last_of('.'); + if(suffixPos == string::npos) + return filename; // no suffix + else + return filename.substr(0, suffixPos); +} + +// Strip the leadering directories from a filename +string stripDirectories(const string& filename) { + size_t lastDirPos = filename.find_last_of('/'); + + if(lastDirPos == string::npos) + return filename; // no directories + else + return filename.substr(lastDirPos + 1); +} + + + +std::string Helper::getReferenceName(BamTools::BamReader &reader, int referenceId) { + assert(referenceId >= 0 && referenceId < reader.GetReferenceCount()); + return reader.GetReferenceData()[referenceId].RefName; +} + + +int numOfTheLongestPrefix(const string &s1, const string &s2) +{ + assert(s1.size() == s2.size()); + for (int i = 0; i < s1.size(); i++) { + if (s1[i] != s2[i]) return i; + } + return 0; +} + + +int numOfThelongestSuffix(const string &s1, const string &s2) +{ + assert(s1.size() == s2.size()); + for (int i = 0; i < s1.size(); i++) { + if (s1[s1.size() - 1 - i] != s2[s1.size() - 1 - i]) return i; + } + return 0; +} diff --git a/Helper.h b/Helper.h new file mode 100644 index 0000000..f4a488b --- /dev/null +++ b/Helper.h @@ -0,0 +1,65 @@ +#ifndef HELPER_H +#define HELPER_H + +#include "api/BamReader.h" +#include +#include + +// +// Functions +// +std::string stripFilename(const std::string& filename); +std::string stripExtension(const std::string& filename); +std::string stripDirectories(const std::string& filename); +int numOfTheLongestPrefix(const std::string& s1, const std::string& s2); +int numOfThelongestSuffix(const std::string& s1, const std::string& s2); + +int extend(const std::string& read, int offset, int leftOrigin, int rightOrigin); + +template +void cluster(const std::vector& orig, std::vector >& clusters, Compare comp) { + std::vector buffer; + + auto first = orig.begin(); + auto last = orig.end(); + buffer.push_back(*first); + while (++first != last) { + if (!comp(*first, buffer[0])) { + clusters.push_back(buffer); + buffer.clear(); + } + buffer.push_back(*first); + } + if (!buffer.empty()) clusters.push_back(buffer); +} + +template +void merge(const std::vector& orig, std::vector& results, Compare comp) { + std::vector removed(orig.size(), false); + + for (size_t i = 0; i < orig.size() - 1; ++i) { + for (size_t j = i + 1; j < orig.size(); ++j) { + if (!removed[j] && comp(orig[i], orig[j])) { + removed[j] = true; + } + } + } + + for (size_t i = 0; i < removed.size(); ++i) { + if (!removed[i]) { + results.push_back(orig[i]); + } + } + +} + +namespace Helper { +std::string getReferenceName(BamTools::BamReader& reader, int referenceId); +const int SVLEN_THRESHOLD = -50; +const int CONFLICT_THRESHOLD = 13; + +//std::set forwardEClipNames; +//std::set reverseBClipNames; +} + +#endif // HELPER_H diff --git a/README.md b/README.md new file mode 100644 index 0000000..a187ef8 --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +#*Sprites* + +**Written by** Zhen Zhang (zhangz@csu.edu.cn) +[Jianxin Wang Lab, Central South University](http://netlab.csu.edu.cn/) + +**Please cite:** + +--- + +**Current version:** 0.3.0 + +Support for Linux and OS X + +##Summary +*Sprites* is a sv caller that specializes in detecting deletion from low-coverage sequencing data. It works by identifying split reads from alignments based on soft-clipping information. By re-aligning a split read to one of its target sequences derived from paired-end reads that span it, a deletion is predicted and breakpoint ends are pinpointed with base-pair resolution. *Sprites* uses alignments produced by BWA. Of course, it can also use those produced by other read aligners that support 5'- or 3'-end soft-clipping, like Bowtie2. It can also be extended to detect other types of sv. + +##Pre-built binaries +You can download the pre-built binaries from the [Releases page](https://github.com/zhangzhen/sprites/releases) or the links below: +- Linux 64bit: [sprites\_Linux64](https://github.com/zhangzhen/sprites/releases/download/v0.3.0/sprites\_Linux64) +- OS X: [sprites\_OSX](https://github.com/zhangzhen/sprites/releases/download/v0.3.0/sprites\_OSX) + +##Installation + +#### Requirements +- HTSlib ([http://www.htslib.org/](http://www.htslib.org/)) +- BamTools ([https://github.com/pezmaster31/bamtools](https://github.com/pezmaster31/bamtools)) +- CMake ([http://www.cmake.org](http://www.cmake.org)) + +#### Building Sprites +``` +git clone https://github.com:zhangzhen/sprites.git +cd sprites +export BAMTOOLS_HOME=/path/to/bamtools +export HTSLIB_HOME=/path/to/htslib +mkdir build +cd build +cmake .. +make +cp sprites /usr/local/bin/ +``` +##Usage +``` +sprites [options] sample.bam +``` +The input bam file is required to be sorted. + +**Options** +``` +-r FILE +``` diff --git a/SoftClipReader.cpp b/SoftClipReader.cpp new file mode 100644 index 0000000..a2f6114 --- /dev/null +++ b/SoftClipReader.cpp @@ -0,0 +1,105 @@ +#include "SoftClipReader.h" +#include "error.h" + +#include + +using namespace std; +using namespace BamTools; + +SoftClipReader::SoftClipReader(const string &filename, int minClip, int mode) : + minClip(minClip), mode(mode) { + if (!reader.Open(filename)) + error("Could not open the input BAM file."); + if (!reader.LocateIndex()) + error("Could not locate the index file"); +} + +SoftClipReader::~SoftClipReader() { + reader.Close(); +} + +int SoftClipReader::getReferenceId(const string &referenceName) { + return reader.GetReferenceID(referenceName); +} + +bool SoftClipReader::getSoftClip(SoftClip &clip) { + BamAlignment al; + while (reader.GetNextAlignment(al)) { + vector clipSizes, readPositions, genomePositions; + if (!al.GetSoftClips(clipSizes, readPositions, genomePositions)) continue; + int size = clipSizes.size(); + + if (inEnhancedMode()) { + if (!al.IsReverseStrand() && al.IsMateReverseStrand() && al.Position < al.MatePosition && + al.Position != genomePositions[size - 1] && clipSizes[size - 1] > minClip && + (size == 1 || (size == 2 && clipSizes[0] <= minClip))) { + clip = SoftClip(al.RefID, + al.Position + 1, + al.Position + 1 - ((size == 2) ? clipSizes[0] : 0), + genomePositions[size - 1] + 1, + al.MatePosition + 1, + al.IsReverseStrand(), + al.IsMateReverseStrand(), + clipSizes[size - 1], + al.QueryBases); + return true; + } + if (al.IsReverseStrand() && !al.IsMateReverseStrand() && al.Position > al.MatePosition && + al.Position == genomePositions[0] && clipSizes[0] > minClip && + (size == 1 || (size == 2 && clipSizes[1] <= minClip))) { + clip = SoftClip(al.RefID, + al.Position + 1, + al.Position + 1 - clipSizes[0], + genomePositions[0] + 1, + al.MatePosition + 1, + al.IsReverseStrand(), + al.IsMateReverseStrand(), + clipSizes[0], + al.QueryBases); + return true; + } + } else if (al.IsProperPair()) { + if (!al.IsReverseStrand() && al.Position == genomePositions[0] && + clipSizes[0] > minClip && + (size == 1 || + (size == 2 && clipSizes[1] <= minClip))) { + clip = SoftClip(al.RefID, + al.Position + 1, + al.Position - clipSizes[0] + 1, + genomePositions[0] + 1, + al.MatePosition + 1, + al.IsReverseStrand(), + al.IsMateReverseStrand(), + clipSizes[0], + al.QueryBases); + return true; + } + if (al.IsReverseStrand() && al.Position != genomePositions[size - 1] && + clipSizes[size - 1] > minClip && + (size == 1 || + (size == 2 && clipSizes[0] <= minClip))) { + clip = SoftClip(al.RefID, + al.Position + 1, + al.Position + 1 - ((size == 2) ? clipSizes[0] : 0), + genomePositions[size - 1] + 1, + al.MatePosition + 1, + al.IsReverseStrand(), + al.IsMateReverseStrand(), + clipSizes[size - 1], + al.QueryBases); + return true; + } + } + + } + + return false; +} + +bool SoftClipReader::setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition) { + return reader.SetRegion(leftRefId, leftPosition, rightRefId, rightPosition); +} + +bool SoftClipReader::inEnhancedMode() const { + return mode == 1; +} diff --git a/SoftClipReader.h b/SoftClipReader.h new file mode 100644 index 0000000..a1b9317 --- /dev/null +++ b/SoftClipReader.h @@ -0,0 +1,31 @@ +#ifndef SOFTCLIPREADER_H +#define SOFTCLIPREADER_H + +#include "SoftClip.h" +#include "api/BamReader.h" + +#include + +class SoftClipReader +{ +public: + // 0 indicates the standard mode and 1 indicates the enhanced mode, which reads reads of type 2 besides type 1 + SoftClipReader(const std::string& filename, int minClip, int mode); + virtual ~SoftClipReader(); + + int getReferenceId(const std::string& referenceName); + + bool getSoftClip(SoftClip& clip); + bool setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition); + + int getMinClip() const; + +private: + BamTools::BamReader reader; + int minClip; + int mode; + + bool inEnhancedMode() const; +}; + +#endif // SOFTCLIPREADER_H diff --git a/Thirdparty/Timer.h b/Thirdparty/Timer.h new file mode 100644 index 0000000..7179309 --- /dev/null +++ b/Thirdparty/Timer.h @@ -0,0 +1,56 @@ +//----------------------------------------------- +// Copyright 2009 Wellcome Trust Sanger Institute +// Written by Jared Simpson (js18@sanger.ac.uk) +// Released under the GPL license +//----------------------------------------------- +// +// Timer - Simple object to that prints the wallclock +// length of its lifetime +// +#ifndef TIMER_H +#define TIMER_H + +#include +#include + +class Timer +{ + public: + Timer(std::string s, bool silent = false) : m_desc(s), m_silent(silent) + { + reset(); + } + + ~Timer() + { + if(!m_silent) + fprintf(stderr, "[timer - %s] wall clock: %.2lfs CPU: %.2lfs\n", m_desc.c_str(), getElapsedWallTime(), getElapsedCPUTime()); + } + + double getElapsedWallTime() const + { + timeval now; + gettimeofday(&now, NULL); + return (now.tv_sec - m_wallStart.tv_sec) + (double(now.tv_usec - m_wallStart.tv_usec) / 1000000); + } + + double getElapsedCPUTime() const + { + double now = clock(); + return (now - m_cpuStart) / CLOCKS_PER_SEC; + } + + void reset() { gettimeofday(&m_wallStart, NULL); m_cpuStart = clock(); } + + private: + std::string m_desc; + + // Track the wall-clock and CPU times + // CPU time includes all threads + timeval m_wallStart; + double m_cpuStart; + + bool m_silent; +}; + +#endif diff --git a/Thirdparty/overlapper.cpp b/Thirdparty/overlapper.cpp new file mode 100644 index 0000000..e0de542 --- /dev/null +++ b/Thirdparty/overlapper.cpp @@ -0,0 +1,1417 @@ +//------------------------------------------------------------------------------- +// +// overlapper - String-string overlap algorithm +// +// Copyright (C) 2011 Jared Simpson (jared.simpson@gmail.com) +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// ------------------------------------------------------------------------------ +#include "overlapper.h" +#include "../error.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +OverlapperParams default_params = { 2, -6, -3 }; +OverlapperParams ungapped_params = { 2, -10000, -3 }; + + + + +// +#define max3(x,y,z) std::max(std::max(x,y), z) +//#define DEBUG_OVERLAPPER 1 +//#define DEBUG_EXTEND 1 + + +// +SequenceInterval::SequenceInterval() : start(0), end(-1) +{ + +} + +SequenceOverlap::SequenceOverlap() +{ + length[0] = length[1] = 0; + score = -1; + edit_distance = -1; + total_columns = -1; +} + +// +bool SequenceOverlap::isValid() const +{ + return !cigar.empty() && match[0].isValid() && match[1].isValid(); +} + +bool SequenceOverlap::isQualified(int minOverlap, double minIdentity) const +{ + if (getOverlapLength() >= minOverlap && + getPercentIdentity() >= minIdentity * 100) { + return true; + } + return false; +} + +// +double SequenceOverlap::getPercentIdentity() const +{ + return (double)(total_columns - edit_distance) * 100.0f / total_columns; +} + +// +std::ostream& operator<<(std::ostream& out, const SequenceOverlap& overlap) +{ + out << "[" << overlap.match[0].start << " " << overlap.match[0].end << "] "; + out << "[" << overlap.match[1].start << " " << overlap.match[1].end << "] "; + out << "C:" << overlap.cigar; + return out; +} + +void SequenceOverlap::makePaddedMatches(const std::string& s1, const std::string& s2, + std::string* p1, std::string* p2) const +{ + assert(isValid() && p1 != NULL && p2 != NULL); + + // Process the matching region using the cigar operations + size_t current_1 = match[0].start; + size_t current_2 = match[1].start; + + std::stringstream cigar_parser(cigar); + int length = -1; + char code; + while(cigar_parser >> length >> code) { + assert(length > 0); + if(code == 'M') { + p1->append(s1.substr(current_1, length)); + p2->append(s2.substr(current_2, length)); + current_1 += length; + current_2 += length; + } + else if(code == 'D') { + p1->append(s1.substr(current_1, length)); + p2->append(length, '-'); + current_1 += length; + } + else if(code == 'I') { + p1->append(length, '-'); + p2->append(s2.substr(current_2, length)); + current_2 += length; + } + length = -1; + } +} + +// +int SequenceOverlap::calculateEditDistance(const std::string& s1, const std::string& s2) const +{ + // Recalculate the edit distance between the pair of strings, given this alignment + int new_edit_distance = 0; + + // Process the matching region using the cigar operations + size_t current_1 = match[0].start; + size_t current_2 = match[1].start; + + std::stringstream cigar_parser(cigar); + int length = -1; + char code; + while(cigar_parser >> length >> code) { + assert(length > 0); + if(code == 'M') { + for(int i = 0; i < length; ++i) { + if(s1[current_1 + i] != s2[current_2 + i]) + new_edit_distance++; + } + current_1 += length; + current_2 += length; + } + else if(code == 'D') { + new_edit_distance += length; + current_1 += length; + } + else if(code == 'I') { + new_edit_distance += length; + current_2 += length; + } + length = -1; + } + + return new_edit_distance; +} + +// +int SequenceOverlap::calculateTotalColumns() const +{ + // Recalculate the edit distance between the pair of strings, given this alignment + int total_columns = 0; + + std::stringstream cigar_parser(cigar); + int length = -1; + char code; + while(cigar_parser >> length >> code) { + assert(length > 0); + total_columns += length; + } + + return total_columns; +} + +// +void SequenceOverlap::printAlignment(const std::string& s1, const std::string& s2) const +{ + assert(isValid()); + + std::string out_1; + std::string out_2; + + // Print out the initial part of the strings, which do not match. + // Typically this is the overhanging portion of one of the strings. + std::string leader_1 = s1.substr(0, match[0].start); + std::string leader_2 = s2.substr(0, match[1].start); + + // Pad the beginning of the output strings with spaces to align + if(leader_1.size() < leader_2.size()) + out_1.append(leader_2.size() - leader_1.size(), ' '); + + if(leader_2.size() < leader_1.size()) + out_2.append(leader_1.size() - leader_2.size(), ' '); + + out_1.append(leader_1); + out_2.append(leader_2); + + // Process the matching region using the cigar operations + size_t current_1 = match[0].start; + size_t current_2 = match[1].start; + + std::stringstream cigar_parser(cigar); + int length = -1; + char code; + while(cigar_parser >> length >> code) { + assert(length > 0); + if(code == 'M') { + out_1.append(s1.substr(current_1, length)); + out_2.append(s2.substr(current_2, length)); + current_1 += length; + current_2 += length; + } + else if(code == 'D') { + out_1.append(s1.substr(current_1, length)); + out_2.append(length, '-'); + current_1 += length; + } + else if(code == 'I') { + out_1.append(length, '-'); + out_2.append(s2.substr(current_2, length)); + current_2 += length; + } + length = -1; + } + + // Append the remainder of each string + out_1.append(s1.substr(current_1)); + out_2.append(s2.substr(current_2)); + + // Print the output strings and split long lines + int MAX_COLUMNS = 120; + size_t total_columns = std::max(out_1.size(), out_2.size()); + for(size_t i = 0; i < total_columns; i += MAX_COLUMNS) { + std::string sub_1; + std::string sub_2; + if(i < out_1.size()) + sub_1 = out_1.substr(i, MAX_COLUMNS); + if(i < out_2.size()) + sub_2 = out_2.substr(i, MAX_COLUMNS); + + std::cout << "S1\t" << sub_1 << "\n"; + std::cout << "S2\t" << sub_2 << "\n"; + std::cout << "\n"; + } + std::cout << "Cigar: " << cigar << "\n"; + std::cout << "Score: " << score << "\n"; + + printf("Identity: %2.2lf\n", getPercentIdentity()); +} + +typedef std::vector DPCells; +typedef std::vector DPMatrix; + +// +SequenceOverlap Overlapper::computeOverlap(const std::string& s1, const std::string& s2, const OverlapperParams params) +{ + // Exit with invalid intervals if either string is zero length + SequenceOverlap output; + if(s1.empty() || s2.empty()) { + std::cerr << "Overlapper::computeOverlap error: empty input sequence\n"; + exit(EXIT_FAILURE); + } + + // Initialize the scoring matrix + size_t num_columns = s1.size() + 1; + size_t num_rows = s2.size() + 1; + + DPMatrix score_matrix; + score_matrix.resize(num_columns); + for(size_t i = 0; i < score_matrix.size(); ++i) + score_matrix[i].resize(num_rows); + + // Calculate scores + for(size_t i = 1; i < num_columns; ++i) { + for(size_t j = 1; j < num_rows; ++j) { + // Calculate the score for entry (i,j) + int idx_1 = i - 1; + int idx_2 = j - 1; + int diagonal = score_matrix[i-1][j-1] + (s1[idx_1] == s2[idx_2] ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int left = score_matrix[i-1][j] + params.gap_penalty; + + score_matrix[i][j] = max3(diagonal, up, left); + } + } + + // The location of the highest scoring match in the + // last row or last column is the maximum scoring overlap + // for the pair of strings. We start the backtracking from + // that cell + int max_row_value = std::numeric_limits::min(); + int max_column_value = std::numeric_limits::min(); + size_t max_row_index = 0; + size_t max_column_index = 0; + + // Check every column of the last row + // The first column is skipped to avoid empty alignments + for(size_t i = 1; i < num_columns; ++i) { + int v = score_matrix[i][num_rows - 1]; + if(score_matrix[i][num_rows - 1] > max_row_value) { + max_row_value = v; + max_row_index = i; + } + } + + // Check every row of the last column + for(size_t j = 1; j < num_rows; ++j) { + int v = score_matrix[num_columns - 1][j]; + if(v > max_column_value) { + max_column_value = v; + max_column_index = j; + } + } + + // Compute the location at which to start the backtrack + size_t i; + size_t j; + + if(max_column_value > max_row_value) { + i = num_columns - 1; + j = max_column_index; + output.score = max_column_value; + } + else { + i = max_row_index; + j = num_rows - 1; + output.score = max_row_value; + } + + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); +#ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(i > 0 && j > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = score_matrix[i - 1][j - 1] + (is_match ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int left = score_matrix[i-1][j] + params.gap_penalty; + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(score_matrix[i][j] == up) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(score_matrix[i][j] == left) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(score_matrix[i][j] == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + return output; +} + +SequenceOverlap Overlapper::computeOverlapSG(const std::string& s1, const std::string& s2, const OverlapperParams params) +{ + // Exit with invalid intervals if either string is zero length + SequenceOverlap output; + if(s1.empty() || s2.empty()) { + std::cerr << "Overlapper::computeOverlap error: empty input sequence\n"; + exit(EXIT_FAILURE); + } + + // Initialize the scoring matrix + size_t num_columns = s1.size() + 1; + size_t num_rows = s2.size() + 1; + + DPMatrix score_matrix; + score_matrix.resize(num_columns); + for(size_t i = 0; i < score_matrix.size(); ++i) + score_matrix[i].resize(num_rows); + + for(size_t j = 0; j < num_rows; ++j) { + score_matrix[0][j] = j*params.gap_penalty; + } + + // Calculate scores + for(size_t i = 1; i < num_columns; ++i) { + for(size_t j = 1; j < num_rows; ++j) { + // Calculate the score for entry (i,j) + int idx_1 = i - 1; + int idx_2 = j - 1; + int diagonal = score_matrix[i-1][j-1] + (s1[idx_1] == s2[idx_2] ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int gap_pen = (j == num_rows - 1) ? 0 : params.gap_penalty; + int left = score_matrix[i-1][j] + gap_pen; + + score_matrix[i][j] = max3(diagonal, up, left); + } + } + + int max_row_value; + size_t max_row_index = 0; + + // Check every column of the last row + // The first column is skipped to avoid empty alignments + for(size_t i = num_columns - 1; i > 0; --i) { + int left = score_matrix[i-1][num_rows - 1]; + if (score_matrix[i][num_rows - 1] != left) { + max_row_index = i; + max_row_value = score_matrix[i][num_rows - 1]; + break; + } + } + + // Compute the location at which to start the backtrack + size_t i = max_row_index; + size_t j = num_rows - 1; + output.score = max_row_value; + + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); +#ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(j > 0 && i > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = score_matrix[i - 1][j - 1] + (is_match ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int gap_pen = (j == num_rows - 1) ? 0 : params.gap_penalty; + int left = score_matrix[i-1][j] + gap_pen; + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(score_matrix[i][j] == up) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(score_matrix[i][j] == left) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(score_matrix[i][j] == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + return output; +} + +SequenceOverlap Overlapper::alignSuffix(const std::string& s1, const std::string& s2, const OverlapperParams params) +{ + // Exit with invalid intervals if either string is zero length + SequenceOverlap output; + if(s1.empty() || s2.empty()) { + std::cerr << "Overlapper::computeOverlapSW error: empty input sequence\n"; + exit(EXIT_FAILURE); + } + + // Initialize the scoring matrix + size_t num_columns = s1.size() + 1; + size_t num_rows = s2.size() + 1; + + DPMatrix score_matrix; + score_matrix.resize(num_columns); + for(size_t i = 0; i < score_matrix.size(); ++i) + score_matrix[i].resize(num_rows); + + // Calculate scores + for(size_t i = 1; i < num_columns; ++i) { + for(size_t j = 1; j < num_rows; ++j) { + // Calculate the score for entry (i,j) + int idx_1 = i - 1; + int idx_2 = j - 1; + int diagonal = score_matrix[i-1][j-1] + (s1[idx_1] == s2[idx_2] ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int left = score_matrix[i-1][j] + params.gap_penalty; + + score_matrix[i][j] = std::max(0, max3(diagonal, up, left)); + } + } + + // The location of the highest scoring match in the + // last row or last column is the maximum scoring overlap + // for the pair of strings. We start the backtracking from + // that cell + int max_value = std::numeric_limits::min(); + size_t max_row_index = num_rows - 1; + size_t max_column_index = 0; + + for (size_t i =1; i < num_columns; ++i) { + if (score_matrix[i][max_row_index] > max_value) { + max_value = score_matrix[i][max_row_index]; + max_column_index = i; + } + } + + // Compute the location at which to start the backtrack + size_t i = max_column_index; + size_t j = max_row_index; + + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); +#ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(i > 0 && j > 0 && score_matrix[i][j] > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = score_matrix[i - 1][j - 1] + (is_match ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int left = score_matrix[i-1][j] + params.gap_penalty; + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(score_matrix[i][j] == up) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(score_matrix[i][j] == left) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(score_matrix[i][j] == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + return output; +} + +SequenceOverlap Overlapper::computeOverlapSW2(const std::string& s1, const std::string& s2, int minOverlap, double minIdentity, const OverlapperParams params) +{ + // Exit with invalid intervals if either string is zero length + SequenceOverlap output; + if(s1.empty() || s2.empty()) { + std::cerr << "Overlapper::computeOverlapSW error: empty input sequence\n"; + exit(EXIT_FAILURE); + } + + // Initialize the scoring matrix + size_t num_columns = s1.size() + 1; + size_t num_rows = s2.size() + 1; + + DPMatrix score_matrix; + score_matrix.resize(num_columns); + for(size_t i = 0; i < score_matrix.size(); ++i) + score_matrix[i].resize(num_rows); + + // Calculate scores + for(size_t i = 1; i < num_columns; ++i) { + for(size_t j = 1; j < num_rows; ++j) { + // Calculate the score for entry (i,j) + int idx_1 = i - 1; + int idx_2 = j - 1; + int diagonal = score_matrix[i-1][j-1] + (s1[idx_1] == s2[idx_2] ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; +// int gap_pen = (j == num_rows - 1) ? 0 : params.gap_penalty; +// int left = score_matrix[i-1][j] + gap_pen; + int left = score_matrix[i-1][j] + params.gap_penalty; + + score_matrix[i][j] = std::max(0, max3(diagonal, up, left)); + } + } + + // The location of the highest scoring match in the + // last row or last column is the maximum scoring overlap + // for the pair of strings. We start the backtracking from + // that cell + + std::vector last_row_indexes(num_columns - 1); + for (size_t i = 1; i < num_columns; ++i) { + last_row_indexes[i-1] = i; + } + std::sort(last_row_indexes.begin(), last_row_indexes.end(), + [&score_matrix, num_rows](size_t i1, size_t i2) {return score_matrix[i1][num_rows - 1] > score_matrix[i2][num_rows - 1];}); + + int cnt = 0; + for (auto max_row_index: last_row_indexes) { + if (cnt >= 10) break; + auto max_row_value = score_matrix[max_row_index][num_rows - 1]; + + // Compute the location at which to start the backtrack + size_t i = max_row_index; + size_t j = num_rows - 1; + output.score = max_row_value; + + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); + #ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); + #endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(i > 0 && j > 0 && score_matrix[i][j] > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = score_matrix[i - 1][j - 1] + (is_match ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int left = score_matrix[i-1][j] + params.gap_penalty; +// int gap_pen = (j == num_rows - 1) ? 0 : params.gap_penalty; +// int left = score_matrix[i-1][j] + gap_pen; + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(score_matrix[i][j] == up) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(score_matrix[i][j] == left) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(score_matrix[i][j] == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + +// std::string s0 = "CTGCCCCAAATACAGCTACTGCCACCACCAAGGCGGCTGTTGGTGCCCTGCAGTCAACAGCCAGTCTCTTCGTGGTCTCACTCTCTCTTCTACATCTCTCC"; +// std::reverse(s0.begin(), s0.end()); + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + if (cigar.empty()) continue; + std::reverse(cigar.begin(), cigar.end()); + output.cigar = compactCigar(cigar); + +// if (s2 == s0) +// output.printAlignment(s1, s2); + + if (output.isQualified(minOverlap, minIdentity)) + return output; + + cnt++; + } + error("No overlap was found."); +} + +// Returns the index into a cell vector for for the ith column and jth row +// of a dynamic programming matrix. The band_origin gives the row in first +// column of the matrix that the bands start at. This is used to calculate +// the starting band row for each column. +inline int _getBandedCellIndex(int i, int j, int band_width, int band_origin_row) +{ + int band_start = band_origin_row + i; + int band_row_index = j - band_start; + return (band_row_index >= 0 && band_row_index < band_width) ? i * band_width + band_row_index : -1; +} + +// Returns the score for (i,j) in the +inline int _getBandedCellScore(const DPCells& cells, int i, int j, int band_width, int band_origin_row, int invalid_score) +{ + int band_start = band_origin_row + i; + int band_row_index = j - band_start; + return (band_row_index >= 0 && band_row_index < band_width) ? cells[i * band_width + band_row_index] : invalid_score; +} + +SequenceOverlap Overlapper::extendMatch(const std::string& s1, const std::string& s2, + int start_1, int start_2, int band_width) +{ + SequenceOverlap output; + int num_columns = s1.size() + 1; + int num_rows = s2.size() + 1; + + const int MATCH_SCORE = 2; + const int GAP_PENALTY = -5; + const int MISMATCH_PENALTY = -3; + + // Calculate the number of cells off the diagonal to compute + int half_width = band_width / 2; + band_width = half_width * 2 + 1; // the total number of cells per band + + // Calculate the number of columns that we need to extend to for s1 + size_t num_cells_required = num_columns * band_width; + + // Allocate bands with uninitialized scores + int INVALID_SCORE = std::numeric_limits::min(); + DPCells cells(num_cells_required, 0); + + // Calculate the band center coordinates in the first + // column of the multiple alignment. These are calculated by + // projecting the match diagonal onto the first column. It is possible + // that these are negative. + int band_center = start_2 - start_1 + 1; + int band_origin = band_center - (half_width + 1); +#ifdef DEBUG_EXTEND + printf("Match start: [%d %d]\n", start_1, start_2); + printf("Band center, origin: [%d %d]\n", band_center, band_origin); + printf("Num cells: %zu\n", cells.size()); +#endif + + // Fill in the bands column by column + for(int i = 1; i < num_columns; ++i) { + int j = band_origin + i; // start row of this band + int end_row = j + band_width; + + // Trim band coordinates to only compute valid positions + if(j < 1) + j = 1; + if(end_row > num_rows) + end_row = num_rows; + + if(end_row <= 0 || j >= num_rows || j >= end_row) + continue; // nothing to do for this column + +#ifdef DEBUG_EXTEND + printf("Filling column %d rows [%d %d]\n", i, j, end_row); +#endif + + // Fill in this band. To avoid the need to perform many tests whether a particular cell + // is stored in a band, we do some of the calculations outside of the main loop below. + // We first calculate the score for the first cell in the band. This calculation cannot + // involve the cell above the first row so we ignore it below. We then fill in the main + // part of the band, which can perform valid reads from all its neighboring cells. Finally + // we calculate the last row, which does not use the cell to its left. + + // Set up initial indices and scores + int curr_idx = _getBandedCellIndex(i, j, band_width, band_origin); + int left_idx = _getBandedCellIndex(i - 1, j, band_width, band_origin); + int diagonal_idx = _getBandedCellIndex(i - 1, j - 1, band_width, band_origin); + int diagonal_score = cells[diagonal_idx] + (s1[i - 1] == s2[j - 1] ? MATCH_SCORE : MISMATCH_PENALTY); + int left_score = left_idx != -1 ? cells[left_idx] + GAP_PENALTY : INVALID_SCORE; + int up_score = 0; + + // Set the first row score + cells[curr_idx] = std::max(left_score, diagonal_score); + +#ifdef DEBUG_EXTEND + printf("Filled [%d %d] = %d\n", i , j, cells[curr_idx]); + assert(_getBandedCellIndex(i,j, band_width, band_origin) != -1); + assert(diagonal_idx != -1); +#endif + + // Update indices + curr_idx += 1; + left_idx += 1; + diagonal_idx += 1; + j += 1; + + // Fill in the main part of the band, stopping before the last row + while(j < end_row - 1) { + +#ifdef DEBUG_EXTEND + assert(diagonal_idx == _getBandedCellIndex(i - 1, j - 1, band_width, band_origin)); + assert(left_idx == _getBandedCellIndex(i - 1, j, band_width, band_origin)); + assert(curr_idx - 1 == _getBandedCellIndex(i, j - 1, band_width, band_origin)); +#endif + + diagonal_score = cells[diagonal_idx] + (s1[i - 1] == s2[j - 1] ? MATCH_SCORE : MISMATCH_PENALTY); + left_score = cells[left_idx] + GAP_PENALTY; + up_score = cells[curr_idx - 1] + GAP_PENALTY; + cells[curr_idx] = max3(diagonal_score, left_score, up_score); + +#ifdef DEBUG_EXTEND + printf("Filled [%d %d] = %d\n", i , j, cells[curr_idx]); + assert(_getBandedCellIndex(i,j, band_width, band_origin) != -1); +#endif + // Update indices + curr_idx += 1; + left_idx += 1; + diagonal_idx += 1; + j += 1; + } + + // Fill in last row, here we ignore the left cell which is now out of band + if(j != end_row) { + diagonal_score = cells[diagonal_idx] + (s1[i - 1] == s2[j - 1] ? MATCH_SCORE : MISMATCH_PENALTY); + up_score = cells[curr_idx - 1] + GAP_PENALTY; + cells[curr_idx] = std::max(diagonal_score, up_score); +#ifdef DEBUG_EXTEND + printf("Filled [%d %d] = %d\n", i , j, cells[curr_idx]); + assert(_getBandedCellIndex(i,j, band_width, band_origin) != -1); +#endif + } + } + + // The location of the highest scoring match in the + // last row or last column is the maximum scoring overlap + // for the pair of strings. We start the backtracking from + // that cell + int max_row_value = std::numeric_limits::min(); + int max_column_value = std::numeric_limits::min(); + size_t max_row_index = 0; + size_t max_column_index = 0; + + // Check every column of the last row + // The first column is skipped to avoid empty alignments + for(int i = 1; i < num_columns; ++i) { + int v = _getBandedCellScore(cells, i, num_rows - 1, band_width, band_origin, INVALID_SCORE); + if(v > max_row_value) { + max_row_value = v; + max_row_index = i; + } + } + + // Check every row of the last column + for(int j = 1; j < num_rows; ++j) { + int v = _getBandedCellScore(cells, num_columns - 1, j, band_width, band_origin, INVALID_SCORE); + if(v > max_column_value) { + max_column_value = v; + max_column_index = j; + } + } + + // Compute the location at which to start the backtrack + size_t i; + size_t j; + + if(max_column_value > max_row_value) { + i = num_columns - 1; + j = max_column_index; + output.score = max_column_value; + } + else { + i = max_row_index; + j = num_rows - 1; + output.score = max_row_value; + } + +#ifdef DEBUG_EXTEND + printf("BEST: %zu %zu\n", i, j); +#endif + + // Backtrack to fill in the cigar string and alignment start position + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); +#ifdef DEBUG_EXTEND + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(i > 0 && j > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = _getBandedCellScore(cells, i - 1, j - 1, band_width, band_origin, INVALID_SCORE) + (is_match ? MATCH_SCORE : MISMATCH_PENALTY); + int up = _getBandedCellScore(cells, i, j - 1, band_width, band_origin, INVALID_SCORE) + GAP_PENALTY; + int left = _getBandedCellScore(cells, i -1 , j, band_width, band_origin, INVALID_SCORE) + GAP_PENALTY; + int curr = _getBandedCellScore(cells, i, j, band_width, band_origin, INVALID_SCORE); + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(curr == up) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(curr == left) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(curr == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + return output; +} + +// The score for this cell coming from a match, deletion and insertion +struct AffineCell +{ + AffineCell() : G(0), I(-std::numeric_limits::max()), D(-std::numeric_limits::max()) {} + + // + int G; + int I; + int D; +}; + +typedef std::vector AffineCells; +typedef std::vector AffineMatrix; + +SequenceOverlap Overlapper::computeOverlapAffine(const std::string& s1, const std::string& s2, const OverlapperParams params) +{ + // Exit with invalid intervals if either string is zero length + SequenceOverlap output; + if(s1.empty() || s2.empty()) { + std::cerr << "Overlapper::computeOverlap error: empty input sequence\n"; + exit(EXIT_FAILURE); + } + + // Initialize the scoring matrix + size_t num_columns = s1.size() + 1; + size_t num_rows = s2.size() + 1; + + int gap_open = 5; + int gap_ext = 2; + + AffineMatrix score_matrix; + score_matrix.resize(num_columns); + for(size_t i = 0; i < score_matrix.size(); ++i) + score_matrix[i].resize(num_rows); + + // Calculate scores + for(size_t i = 1; i < num_columns; ++i) { + for(size_t j = 1; j < num_rows; ++j) { + + // Calculate the score for entry (i,j) + int idx_1 = i - 1; + int idx_2 = j - 1; + + int diagonal = score_matrix[i-1][j-1].G + (s1[idx_1] == s2[idx_2] ? params.match_score : params.mismatch_penalty); + + // When computing the score starting from the left/right cells, we have to determine + // whether to extend an existing gap or start a new one. + AffineCell& curr = score_matrix[i][j]; + + AffineCell& up = score_matrix[i][j-1]; + if(up.I > up.G - gap_open) + curr.I = up.I - gap_ext; + else + curr.I = up.G - (gap_open + gap_ext); + + AffineCell& left = score_matrix[i-1][j]; + if(left.D > left.G - gap_open) + curr.D = left.D - gap_ext; + else + curr.D = left.G - (gap_open + gap_ext); + + curr.G = max3(curr.D, curr.I, diagonal); + } + } + + // The location of the highest scoring match in the + // last row or last column is the maximum scoring overlap + // for the pair of strings. We start the backtracking from + // that cell + int max_row_value = std::numeric_limits::min(); + int max_column_value = std::numeric_limits::min(); + size_t max_row_index = 0; + size_t max_column_index = 0; + + // Check every column of the last row + // The first column is skipped to avoid empty alignments + for(size_t i = 1; i < num_columns; ++i) { + int v = score_matrix[i][num_rows - 1].G; + if(v > max_row_value) { + max_row_value = v; + max_row_index = i; + } + } + + // Check every row of the last column + for(size_t j = 1; j < num_rows; ++j) { + int v = score_matrix[num_columns - 1][j].G; + if(v > max_column_value) { + max_column_value = v; + max_column_index = j; + } + } + + // Compute the location at which to start the backtrack + size_t i; + size_t j; + + if(max_column_value > max_row_value) { + i = num_columns - 1; + j = max_column_index; + output.score = max_column_value; + } else { + i = max_row_index; + j = num_rows - 1; + output.score = max_row_value; + } + + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); +#ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(i > 0 && j > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = score_matrix[i - 1][j - 1].G + (is_match ? params.match_score : params.mismatch_penalty); + int up1 = score_matrix[i][j-1].G - (gap_open + gap_ext); + int up2 = score_matrix[i][j-1].I - gap_ext; + + int left1 = score_matrix[i-1][j].G - (gap_open + gap_ext); + int left2 = score_matrix[i-1][j].D - gap_ext; + + int curr = score_matrix[i][j].G; + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(curr == up1 || curr == up2) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(curr == left1 || curr == left2) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(curr == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + return output; +} + +// Compact an expanded CIGAR string into a regular cigar string +std::string Overlapper::compactCigar(const std::string& ecigar) +{ + if(ecigar.empty()) + return ""; + + std::stringstream compact_cigar; + char curr_symbol = ecigar[0]; + int curr_run = 1; + for(size_t i = 1; i < ecigar.size(); ++i) { + if(ecigar[i] == curr_symbol) { + curr_run += 1; + } else { + compact_cigar << curr_run << curr_symbol; + curr_symbol = ecigar[i]; + curr_run = 1; + } + } + + // Add last symbol/run + compact_cigar << curr_run << curr_symbol; + return compact_cigar.str(); +} + + +SequenceOverlap Overlapper::ageAlignPrefix(const std::string &s1, const std::string &s2, const ScoreParam &score_param) +{ + SequenceOverlap output; + + const int NONE = 0; + const int DIAGONAL = 1; + const int VERTICAL = 2; + const int HORIZONTAL = 3; + + int orientation_table[] = {NONE, DIAGONAL, VERTICAL, HORIZONTAL}; + int orientation_table_m[] = {NONE, HORIZONTAL, VERTICAL}; + + output.length[0] = s1.size(); + output.length[1] = s2.size(); + + size_t num_columns = s1.size() + 2; + size_t num_rows = s2.size() + 2; + + DPMatrix S(num_rows, DPCells(num_columns)); + DPMatrix S_backtrace(num_rows, DPCells(num_columns)); + DPMatrix S_lower(num_rows, DPCells(num_columns)); + DPMatrix S_upper(num_rows, DPCells(num_columns)); + + // calculate score matrix + for (size_t i = 1; i < num_rows-1; ++i) { + for (size_t j = 1; j < num_columns-1; ++j) { + S_lower[i][j] = std::max(S_lower[i-1][j] - score_param.gap, S[i-1][j] - score_param.gap_start); + S_upper[i][j] = std::max(S_upper[i][j-1] - score_param.gap, S[i][j-1] - score_param.gap_start); + int middle_scores[] = {0, S[i-1][j-1] + score_param.matchChar(s1[j-1], s2[i-1]), S_lower[i][j], S_upper[i][j]}; + const int N = sizeof(middle_scores) / sizeof(int); + auto max_it = std::max_element(middle_scores, middle_scores + N); + S[i][j] = *max_it; + S_backtrace[i][j] = orientation_table[std::distance(middle_scores, max_it)]; + } + } + + DPMatrix R(num_rows, DPCells(num_columns)); + DPMatrix R_backtrace(num_rows, DPCells(num_columns)); + DPMatrix R_lower(num_rows, DPCells(num_columns)); + DPMatrix R_upper(num_rows, DPCells(num_columns)); + + for (size_t i = num_rows-2; i > 0; --i) { + for (size_t j = num_columns-2; j > 0; --j) { + R_lower[i][j] = std::max(R_lower[i+1][j] - score_param.gap, R[i+1][j] - score_param.gap_start); + R_upper[i][j] = std::max(R_upper[i][j+1] - score_param.gap, R[i][j+1] - score_param.gap_start); + int middle_scores[] = {0, R[i+1][j+1] + score_param.matchChar(s1[j-1], s2[i-1]), R_lower[i][j], R_upper[i][j]}; + const int N = sizeof(middle_scores) / sizeof(int); + auto max_it = std::max_element(middle_scores, middle_scores + N); + R[i][j] = *max_it; + R_backtrace[i][j] = orientation_table[std::distance(middle_scores, max_it)]; + } + } + + + DPMatrix M_backtrace(num_rows, DPCells(num_columns)); + + for (size_t i = 1; i < num_rows-1; ++i) { + M_backtrace[i][0] = VERTICAL; + } + + for (size_t j = 1; j < num_columns-1; ++j) { + M_backtrace[0][j] = HORIZONTAL; + } + + // calculate maximum matrix + for (size_t i = 1; i < num_rows-1; ++i) { + for (size_t j = 1; j < num_columns-1; ++j) { + int scores[] = {S[i][j], S[i][j-1], S[i-1][j]}; + const int N = sizeof(scores) / sizeof(int); + auto max_it = std::max_element(scores, scores + N); + S[i][j] = *max_it; + M_backtrace[i][j] = orientation_table_m[std::distance(scores, max_it)]; + } + } + + DPMatrix MR_backtrace(num_rows, DPCells(num_columns)); + + for (size_t i = num_rows-2; i > 0; --i) { + MR_backtrace[i][num_columns-1] = VERTICAL; + } + + for (size_t j = num_columns-2; j > 0; --j) { + MR_backtrace[num_rows-1][j] = HORIZONTAL; + } + + // calculate maximum matrix + for (size_t i = num_rows-2; i > 0; --i) { + for (size_t j = num_columns-2; j > 0; --j) { + int scores[] = {R[i][j], R[i][j+1], R[i+1][j]}; + const int N = sizeof(scores) / sizeof(int); + auto max_it = std::max_element(scores, scores + N); + R[i][j] = *max_it; + MR_backtrace[i][j] = orientation_table_m[std::distance(scores, max_it)]; + } + } + +// for (size_t i = 0; i < num_rows; ++i) { +// for (size_t j = 0; j < num_columns; ++j) { +// std::cout << S[i][j] << ' '; +// } +// std::cout << std::endl; +// } +// std::cout << std::endl; + +// for (size_t i = 0; i < num_rows; ++i) { +// for (size_t j = 0; j < num_columns; ++j) { +// std::cout << M_backtrace[i][j] << ' '; +// } +// std::cout << std::endl; +// } +// std::cout << std::endl; + +// for (size_t i = 0; i < num_rows; ++i) { +// for (size_t j = 0; j < num_columns; ++j) { +// std::cout << R[i][j] << ' '; +// } +// std::cout << std::endl; +// } +// std::cout << std::endl; + +// for (size_t i = 0; i < num_rows; ++i) { +// for (size_t j = 0; j < num_columns; ++j) { +// std::cout << MR_backtrace[i][j] << ' '; +// } +// std::cout << std::endl; +// } +// std::cout << std::endl; + + int max_score = 0; + int max_row_index = 0; + int max_column_index = 0; + + for (size_t i = 0; i < num_rows-1; ++i) { + for (size_t j = 0; j < num_columns-1; ++j) { + int val = S[i][j] + R[i+1][j+1]; + if (max_score < val) max_score = val; + } + } + + for (size_t i = 0; i < num_rows-1; ++i) { + for (size_t j = 0; j < num_columns-1; ++j) { + if (M_backtrace[i][j] == NONE && S[i][j] + R[i+1][j+1] == max_score) { + max_row_index = i; + max_column_index = j; + goto theEnd; + } + } + } + +theEnd: + output.score = max_score; + output.match[0].end = max_column_index - 1; + output.match[1].end = max_row_index - 1; + +#ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + int i = max_row_index; + int j = max_column_index; + std::string cigar; + + while (S_backtrace[i][j] != NONE && i*j !=0) { + if (S_backtrace[i][j] == VERTICAL) { + cigar.push_back('I'); + output.edit_distance += 1; + i--; + } else if(S_backtrace[i][j] == HORIZONTAL) { + cigar.push_back('D'); + output.edit_distance += 1; + j--; + } else { + if (s1[j-1] != s2[i-1]) { + output.edit_distance += 1; + } + cigar.push_back('M'); + i--; + j--; + } + output.total_columns += 1; + } + + output.match[0].start = j; + output.match[1].start = i; + + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + + return output; +} + + +SequenceOverlap Overlapper::ageAlignSuffix(const std::string &s1, const std::string &s2, const ScoreParam &score_param) +{ + std::string s1_r = s1; + std::reverse(s1_r.begin(), s1_r.end()); + std::string s2_r = s2; + std::reverse(s2_r.begin(), s2_r.end()); + + SequenceOverlap output = ageAlignPrefix(s1_r, s2_r, score_param); + + output.match[0].flipStrand(output.length[0]); + output.match[1].flipStrand(output.length[1]); + + return output; +} + + +SequenceOverlap Overlapper::alignPrefix(const std::string &s1, const std::string &s2, const OverlapperParams params) +{ + std::string s1_r = s1; + std::reverse(s1_r.begin(), s1_r.end()); + std::string s2_r = s2; + std::reverse(s2_r.begin(), s2_r.end()); + + SequenceOverlap output = alignSuffix(s1_r, s2_r, params); + + output.match[0].flipStrand(output.length[0]); + output.match[1].flipStrand(output.length[1]); + + return output; +} diff --git a/Thirdparty/overlapper.h b/Thirdparty/overlapper.h new file mode 100644 index 0000000..1036541 --- /dev/null +++ b/Thirdparty/overlapper.h @@ -0,0 +1,196 @@ +//------------------------------------------------------------------------------- +// +// overlapper - Functions to calculate overlaps between pairs of strings +// +// Copyright (C) 2011 Jared Simpson (jared.simpson@gmail.com) +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// ------------------------------------------------------------------------------ +#ifndef OVERLAPPER_H +#define OVERLAPPER_H + +#include +#include +#include + +// A start/end coordinate pair representing +// a subsequence. The end coordinate is +// the index of the last base aligned. +struct SequenceInterval +{ + // functions + SequenceInterval(); + + // Check that the interval is valid + bool isValid() const { return start <= end; } + + // Change the interval to represent the same + // set of bases but on the opposite strand. + void flipStrand(int sequence_length) + { + assert(isValid()); + int tmp = sequence_length - start - 1; + start = sequence_length - end - 1; + end = tmp; + assert(isValid()); + } + + // Returns the length of the interval + // The interval must be valid + int length() const + { + assert(isValid()); + return end - start + 1; + } + + // data + int start; + int end; // inclusive +}; + +// Data structure to hold the result of +// an overlap calculation +struct SequenceOverlap +{ + // Functions + SequenceOverlap(); + + // Check that the record is properly formed + bool isValid() const; + + // added by Zhen Zhang + bool isQualified(int minOverlap, double minIdentity) const; + + // Return padded versions of the matching portions of the strings + void makePaddedMatches(const std::string& s1, const std::string& s2, + std::string* p1, std::string* p2) const; + + // Print the alignment with padding characters + void printAlignment(const std::string& s1, const std::string& s2) const; + + // Recalculate the edit distance between the strings using this alignment + int calculateEditDistance(const std::string& s1, const std::string& s2) const; + + // Recalculate the number of columns in the alignment + int calculateTotalColumns() const; + + // Return the percent identity which we define to be + // the number of matching columns divided by the total number of columns + double getPercentIdentity() const; + + // Returns the length of the overlap, defined to be the + // number of columns in the alignment + int getOverlapLength() const { return total_columns; } + + // + friend std::ostream& operator<<(std::ostream& out, const SequenceOverlap& overlap); + + // Data + + // The coordinates of the matching portion of each string + // The end coordinate are the index of the last base matched + SequenceInterval match[2]; + + // The length of the input sequences + int length[2]; + + // + int score; + int edit_distance; + int total_columns; + + // The cigar string follows the sam convention with s1 being the "reference": + // I is an insertion into s1 + // D is a deletion from s1 + + // A-C s1 + // AAC s2 + // C: 1M1I1M + // + // ATC s1 + // A-C s2 + // C: 1M1D1M + std::string cigar; + +}; + +struct OverlapperParams +{ + int match_score; + int gap_penalty; + int mismatch_penalty; +}; + +struct ScoreParam +{ + + ScoreParam(int match, int mismatch, int gap, int gap_start = 0) : + match(match), mismatch(mismatch), gap(gap), gap_start(gap_start) { + + } + + int matchChar(char a, char b) const { + if (a == b) return match; + return mismatch; + } + + int match; + int mismatch; + int gap; + int gap_start; + +}; + +// Global variables +extern OverlapperParams default_params; // { 2, -5, -3 }; +extern OverlapperParams ungapped_params; // { 2, -10000, -3 }; +extern OverlapperParams svseq2_params; // { 1, -3, -1 }; + +// +namespace Overlapper +{ + +// Compute the highest-scoring overlap between s1 and s2. +// This is a naive O(M*N) algorithm with a linear gap penalty. +SequenceOverlap computeOverlap(const std::string& s1, const std::string& s2, const OverlapperParams params = default_params); +SequenceOverlap computeOverlapSG(const std::string& s1, const std::string& s2, const OverlapperParams params = default_params); + +SequenceOverlap alignSuffix(const std::string& s1, const std::string& s2, const OverlapperParams params = default_params); +SequenceOverlap alignPrefix(const std::string& s1, const std::string& s2, const OverlapperParams params = default_params); + +SequenceOverlap computeOverlapSW2(const std::string& s1, const std::string& s2, int minOverlap, double minIdentity, const OverlapperParams params = default_params); + +SequenceOverlap ageAlignPrefix(const std::string& s1, const std::string& s2, const ScoreParam& score_param); +SequenceOverlap ageAlignSuffix(const std::string& s1, const std::string& s2, const ScoreParam& score_param); + +// Extend a match between s1 and s2 into a full overlap using banded dynamic programming. +// start_1/start_2 give the starting positions of the current partial alignment. These coordinates +// are used to estimate where the overlap begins. The estimated alignment is refined by calculating +// the overlap with banded dynamic programming +SequenceOverlap extendMatch(const std::string& s1, const std::string& s2, int start_1, int start_2, int bandwidth); + +// Perform an alignment using affine gap penalties +SequenceOverlap computeOverlapAffine(const std::string& s1, const std::string& s2, const OverlapperParams params = default_params); + +// Compact an expanded CIGAR string into a regular cigar string +std::string compactCigar(const std::string& ecigar); + +} + +#endif diff --git a/clip.cpp b/clip.cpp new file mode 100644 index 0000000..05dd9c1 --- /dev/null +++ b/clip.cpp @@ -0,0 +1,620 @@ +#include "clip.h" +#include "error.h" +#include "Helper.h" +#include +#include +#include +#include + +using namespace std; +using namespace BamTools; + +AbstractClip::AbstractClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const vector& cigar) + : referenceId(referenceId), + mapPosition(mapPosition), + clipPosition(clipPosition), + matePosition(matePosition), + sequence(sequence), + cigar(cigar), + conflictFlag(false) { +} + +int AbstractClip::length() const { + return sequence.length(); +} + +int AbstractClip::leftmostPosition() const { + if (cigar[0].Type == 'S') return mapPosition - cigar[0].Length; + return mapPosition; +} + +AbstractClip::~AbstractClip() { +} + +Deletion AbstractClip::call(BamReader &reader, FaidxWrapper &faidx, int insLength, int minOverlap, double minIdentity, int minMapQual) +{ + string refName = Helper::getReferenceName(reader, referenceId); + + vector ranges; + fetchSpanningRanges(reader, insLength, ranges, minMapQual); +// vector sizes; +// fecthSizesForSpanningPairs(reader, insLength, sizes); + + if (ranges.empty()) error("No deletion is found"); + + vector regions; + toTargetRegions(refName, insLength, ranges, regions); + + return call(faidx, regions, minOverlap, minIdentity); +} + +bool AbstractClip::hasConflictWith(AbstractClip *other) { + if (getType() == other->getType()) return false; + return abs(clipPosition - other->clipPosition) < Helper::CONFLICT_THRESHOLD; +} + +bool AbstractClip::getConflictFlag() const +{ + return conflictFlag; +} + +void AbstractClip::setConflictFlag(bool value) +{ + conflictFlag = value; +} + +int AbstractClip::maxEditDistanceForSoftclippedPart() +{ + if (lengthOfSoftclippedPart() >= 20) return 2; + return 1; +} + + + +ForwardBClip::ForwardBClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const vector& cigar) + : AbstractClip(referenceId, mapPosition, clipPosition, matePosition, sequence, cigar) { +} + +/* +Deletion ForwardBClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) { + for (auto it = regions.rbegin(); it != regions.rend(); ++it) { + if ((*it).length() < lengthOfSoftclippedPart()) continue; + string s1 = (*it).sequence(faidx); + SequenceOverlap overlap = Overlapper::computeOverlapSG(s1, softclippedPart()); + if (overlap.edit_distance > maxEditDistanceForSoftclippedPart()) continue; + int leftEnd = (*it).start + overlap.match[0].end; + int offsetToRight = offsetFromThatEnd((*it).referenceName, faidx, leftEnd); + int rightEnd = clipPosition; + int offsetToLeft = offsetFromThisEnd((*it).referenceName, faidx); + int start1 = leftEnd - offsetToLeft; + int start2 = leftEnd + offsetToRight; + int end1 = rightEnd - offsetToLeft; + int end2 = rightEnd + offsetToRight; + int len = start1 - end1 + 1; + if (len > Helper::SVLEN_THRESHOLD) continue; + return Deletion((*it).referenceName, start1, start2, end1, end2, len, getType()); + } + error("No deletion is found."); +} +*/ + +Deletion ForwardBClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) +{ +// error("No deletion is found."); + ScoreParam score_param(1, -1, 2, 4); + for (auto it = regions.begin(); it != regions.end(); ++it) { + string s1 = (*it).sequence(faidx); + reverse(s1.begin(), s1.end()); + string s2 = sequence; + reverse(s2.begin(), s2.end()); + + SequenceOverlap overlap; + + try { + overlap = Overlapper::computeOverlapSW2(s1, s2, minOverlap, minIdentity, ungapped_params); + } catch (ErrorException& ex) { + continue; + } + + for (size_t i = 0; i < 2; ++i) + overlap.match[i].flipStrand(overlap.length[i]); + +// overlap = Overlapper::alignPrefix(s1, s2, ungapped_params); + +// if (s2 == "GCCTACAGAGTGCAGAGCCAGCCCAGGACAGGGGACAATTACACAGGCGATGGTCCTAAGAACCGAACCTTCCAATCCCAAAACTCTAGACAGGTATCCAA") +// cout << s1 << endl; +// overlap = Overlapper::ageAlignPrefix(s1, s2, score_param); +// if (!overlap.isQualified(minOverlap, minIdentity)) +// continue; + + int delta = overlap.getOverlapLength() - lengthOfSoftclippedPart(); + int offset = 0; + for (auto &ci: cigar) { + if (ci.Type == 'D') offset += ci.Length; + else if (ci.Type == 'I') offset -= ci.Length; + } + int rightBp = clipPosition + offset; + int leftBp = (*it).start + overlap.match[0].start + lengthOfSoftclippedPart() - 1; + +// int delta = overlap.match[1].length() - lengthOfSoftclippedPart(); +// int rightBp = clipPosition; +// // leftBp might need to be adjusted. +// int leftBp = (*it).start + overlap.match[0].start + lengthOfSoftclippedPart() - 1; + + int len = leftBp - rightBp + 1; + int start1 = delta > 0 ? leftBp : leftBp + delta; + int start2 = delta > 0 ? leftBp + delta : leftBp; + int end1 = delta > 0 ? rightBp : rightBp + delta; + int end2 = delta > 0 ? rightBp + delta : rightBp; + +// if (start2 == 23483811) { +// cout << overlap << endl; +// cout << s2 << endl; +// cout << s1 << endl; +// } + + if (len > Helper::SVLEN_THRESHOLD) continue; + return Deletion((*it).referenceName, start1, start2, end1, end2, len, getType()); + } + error("No deletion is found."); +} + +string ForwardBClip::getType() +{ + return "5F"; +} + +void ForwardBClip::fetchSpanningRanges(BamReader &reader, int insLength, std::vector &ranges, int minMapQual) +{ + // SVSeq2.length +// int start = leftmostPosition(); + int start = clipPosition; +// int end = start + insLength + length(); + int end = start + insLength - 2 * length(); + + if (start > end) error("the region is invalid."); + + if (!reader.SetRegion(referenceId, start - 1, referenceId, end)) + error("Could not set the region."); + + BamAlignment al; + while(reader.GetNextAlignment(al)) { +// string xt; +// al.GetTag("XT", xt); +// xt = xt.substr(0,1); + if (al.IsReverseStrand() && !al.IsMateReverseStrand() && al.RefID == al.MateRefID + && al.MapQuality >= minMapQual //&& xt == "U" + && al.Position > al.MatePosition && al.MatePosition + length() - Helper::SVLEN_THRESHOLD <= clipPosition) { + ranges.push_back({al.MatePosition + 1, al.Position + 1}); + } + } + +} + +void ForwardBClip::fecthSizesForSpanningPairs(BamReader &reader, int insLength, std::vector &sizes) +{ + int start = clipPosition; + int end = clipPosition + insLength + length(); + + if (!reader.SetRegion(referenceId, start - 1, referenceId, end)) + error("Could not set the region."); + + vector > records; + BamAlignment al; + while(reader.GetNextAlignment(al)) { + if (al.IsReverseStrand() && !al.IsMateReverseStrand() && al.RefID == al.MateRefID + && al.MapQuality > 0 && al.Position > al.MatePosition) { + records.push_back(make_pair(abs(al.InsertSize), al.Position - clipPosition)); + } + } + sort(records.begin(), records.end(), [](const pair& r1, const pair& r2){ return r1.first < r2.first; }); + cout << ">" << clipPosition << "," << mapPosition << endl; + transform(records.begin(), records.end(), ostream_iterator(cout, " "), [](const pair& r){ + stringstream ss; + ss << "(" << r.first << "," << r.second << ")"; + return ss.str(); + }); + cout << endl; +} + +void ForwardBClip::toTargetRegions(const string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) +{ + int rightmostPos = clipPosition + length(); + + std::vector newRanges(ranges.size()); +// transform(ranges.begin(), ranges.end(), newRanges.begin(), [=](const IRange &ran) { IRange r = {ran.start, ran.start + insLength + length()}; return r; }); + transform(ranges.begin(), ranges.end(), newRanges.begin(), [=](const IRange &ran) { IRange r = {ran.start, ran.start + insLength - length()}; return r; }); + std::vector idClusters; + clusterRanges(newRanges, idClusters); +// Replace with the merging method used by SVSeq2 +// sort(std::begin(newRanges), std::end(newRanges)); +// clusterRanges2(newRanges, idClusters); + for (auto &elt : idClusters) { + int s = newRanges[elt.front()].start; + if (s > rightmostPos) break; + int e = newRanges[elt.back()].end; + if (e > rightmostPos) e = rightmostPos; + if (s > e) break; + regions.push_back({referenceName, s, e}); + } +} + + +/* + +ForwardEClip::ForwardEClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const std::vector &cigar) + : AbstractClip(referenceId, mapPosition, clipPosition, matePosition, sequence, cigar) { +} + +void ForwardEClip::fetchSpanningRanges(BamReader &reader, int insLength, std::vector &ranges) +{ + ranges.push_back({clipPosition, matePosition}); +} + +void ForwardEClip::toTargetRegions(const string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) +{ + int pe = ranges[0].end + length(); + int leftmostPos = clipPosition; + int len1 = length() - cigar[cigar.size() - 1].Length; + int cPrime = ranges[0].end + len1 - insLength; + if (cPrime < leftmostPos) cPrime = leftmostPos; + regions.push_back({referenceName, cPrime, pe}); +} + +Deletion ForwardEClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) +{ + string s1 = regions[0].sequence(faidx); + SequenceOverlap overlap = Overlapper::computeOverlapSW(s1, sequence, minOverlap, minIdentity, ungapped_params); + if (overlap.getOverlapLength() >= minOverlap && + overlap.getPercentIdentity() >= minIdentity * 100) { + int rightBp = regions[0].start + overlap.match[0].start - 1; + int leftBp = (overlap.getOverlapLength() > cigar[cigar.size() - 1].Length) ? clipPosition - overlap.getOverlapLength() + cigar[cigar.size() - 1].Length + : clipPosition; + leftBp--; // left breakpoint refers the position of the last base prior to the clipped part conforming to the VCF format. + int len = leftBp - rightBp; + if (overlap.getOverlapLength() < cigar[cigar.size() - 1].Length) len += cigar[cigar.size() - 1].Length - overlap.getOverlapLength(); + if (len > Helper::SVLEN_THRESHOLD) error("No deletion was found."); + return Deletion(regions[0].referenceName, leftBp, leftBp, rightBp, rightBp, len); + } + error("No deletion was found."); +} +*/ + + +ReverseEClip::ReverseEClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const std::vector &cigar) + : AbstractClip(referenceId, mapPosition, clipPosition, matePosition, sequence, cigar) { +} + +void ReverseEClip::fetchSpanningRanges(BamReader &reader, int insLength, std::vector &ranges, int minMapQual) +{ + // Experiment ID: SVSeq2.length + int start = clipPosition - insLength + length(); +// int end = leftmostPosition() + length(); +// int start = end - insLength - length(); + if (start < 0) start = 0; + int end = clipPosition - length(); + + if (start > end) error("the region is invalid."); + + if (!reader.SetRegion(referenceId, start - 1, referenceId, end)) + error("Could not set the region."); + + BamAlignment al; + while(reader.GetNextAlignment(al)) { +// string xt; +// al.GetTag("XT", xt); +// xt = xt.substr(0,1); + if (al.Position < start - 1) continue; + if (!al.IsReverseStrand() && al.IsMateReverseStrand() && al.RefID == al.MateRefID + && al.MapQuality >= minMapQual //&& xt == "U" + && al.Position < al.MatePosition && al.MatePosition >= clipPosition - Helper::SVLEN_THRESHOLD) { + ranges.push_back({al.Position + 1, al.MatePosition + 1}); + } + } + +} + +void ReverseEClip::fecthSizesForSpanningPairs(BamReader &reader, int insLength, std::vector &sizes) +{ + +} + +void ReverseEClip::toTargetRegions(const string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) +{ + int leftmostPos = clipPosition - length(); + + std::vector newRanges(ranges.size()); +// transform(ranges.begin(), ranges.end(), newRanges.begin(), [=](const IRange &ran) { IRange r = {ran.end - insLength - length(), ran.end}; return r; }); + transform(ranges.begin(), ranges.end(), newRanges.begin(), [=](const IRange &ran) { IRange r = {ran.end - insLength + 2 * length(), ran.end + length()}; return r; }); + std::vector idClusters; + clusterRanges(newRanges, idClusters); +// Replace with the merging method used by SVSeq2 +// sort(std::begin(newRanges), std::end(newRanges)); +// clusterRanges2(newRanges, idClusters); + for (auto &elt : idClusters) { + int e = newRanges[elt.back()].end; + if (e < leftmostPos) continue; + int s = newRanges[elt.front()].start; + if (s < leftmostPos) s = leftmostPos; + if (s > e) continue; + regions.push_back({referenceName, s, e}); + } + +} + +/* +Deletion ReverseEClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) { + for (auto it = regions.rbegin(); it != regions.rend(); ++it) { + if ((*it).length() < lengthOfSoftclippedPart()) continue; + string s1 = (*it).sequence(faidx); + SequenceOverlap overlap = Overlapper::computeOverlapSG(s1, softclippedPart()); + if (overlap.edit_distance > maxEditDistanceForSoftclippedPart()) continue; + int leftEnd = clipPosition - 1; + int offsetToRight = offsetFromThisEnd((*it).referenceName, faidx); + int rightEnd = (*it).start + overlap.match[0].start; + int offsetToLeft = offsetFromThatEnd((*it).referenceName, faidx, rightEnd); + int start1 = leftEnd - offsetToLeft; + int start2 = leftEnd + offsetToRight; + int end1 = rightEnd - offsetToLeft; + int end2 = rightEnd + offsetToRight; + int len = start1 - end1 + 1; + if (len > Helper::SVLEN_THRESHOLD) continue; + return Deletion((*it).referenceName, start1, start2, end1, end2, len, getType()); + } + error("No deletion is found."); +} +*/ + +Deletion ReverseEClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) +{ +// error("No deletion is found."); + ScoreParam score_param(1, -1, 2, 4); + + for (auto it = regions.rbegin(); it != regions.rend(); ++it) { + string s1 = (*it).sequence(faidx); + string s2 = sequence; + SequenceOverlap overlap; + + try { + overlap = Overlapper::computeOverlapSW2(s1, s2, minOverlap, minIdentity, ungapped_params); + } catch (ErrorException& ex) { + continue; + } + +// overlap = Overlapper::alignSuffix(s1, s2, ungapped_params); +// overlap = Overlapper::ageAlignSuffix(s1, s2, score_param); +// if (!overlap.isQualified(minOverlap, minIdentity)) +// continue; + + int delta = overlap.getOverlapLength() - lengthOfSoftclippedPart(); + int offset = 0; + for (auto &ci: cigar) { + if (ci.Type == 'D') offset += ci.Length; + else if (ci.Type == 'I') offset -= ci.Length; + } + int leftBp = clipPosition - 1 - offset; + int rightBp = (*it).start + overlap.match[0].end - lengthOfSoftclippedPart() + 1; + +// int delta = overlap.match[1].length() - lengthOfSoftclippedPart(); +// int leftBp = clipPosition - 1; +// // rightBp might need to be adjusted +// int rightBp = (*it).start + overlap.match[0].end - lengthOfSoftclippedPart() + 1; + + int len = leftBp - rightBp + 1; + int start1 = delta > 0 ? leftBp - delta : leftBp; + int start2 = delta > 0 ? leftBp : leftBp - delta; + int end1 = delta > 0 ? rightBp - delta : rightBp; + int end2 = delta > 0 ? rightBp : rightBp - delta; + +// if (start2 == 54151129) { +// cout << overlap << endl; +// cout << s2 << endl; +// cout << s1 << endl; +// } + + if (len > Helper::SVLEN_THRESHOLD) continue; + return Deletion((*it).referenceName, start1, start2, end1, end2, len, getType()); + } + error("No deletion is found."); +} + +string ReverseEClip::getType() +{ + return "5R"; +} + + +int ForwardBClip::offsetFromThisEnd(string referenceName, FaidxWrapper &faidx) +{ + return numOfThelongestSuffix(softclippedPart(), + faidx.fetch(referenceName, + clipPosition - lengthOfSoftclippedPart(), + clipPosition - 1)); +} + +int ForwardBClip::offsetFromThatEnd(string referenceName, FaidxWrapper &faidx, int orignal) +{ + return numOfTheLongestPrefix(mappedPart(), + faidx.fetch(referenceName, + orignal + 1, + orignal + lengthOfMappedPart())); +} + +int ReverseEClip::offsetFromThisEnd(string referenceName, FaidxWrapper &faidx) +{ + return numOfTheLongestPrefix(softclippedPart(), + faidx.fetch(referenceName, + clipPosition, + clipPosition + lengthOfSoftclippedPart() - 1)); +} + +int ReverseEClip::offsetFromThatEnd(string referenceName, FaidxWrapper &faidx, int orignal) +{ + return numOfThelongestSuffix(mappedPart(), + faidx.fetch(referenceName, + orignal - lengthOfMappedPart(), + orignal - 1)); +} + + +ReverseBClip::ReverseBClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const std::vector &cigar) + : AbstractClip(referenceId, mapPosition, clipPosition, matePosition, sequence, cigar) { +} + +string ReverseBClip::getType() +{ + return "3R"; +} + +Deletion ReverseBClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) +{ + string s1 = regions[0].sequence(faidx); + reverse(s1.begin(), s1.end()); + string s2 = sequence; + reverse(s2.begin(), s2.end()); + SequenceOverlap overlap = Overlapper::computeOverlapSW2(s1, s2, minOverlap, minIdentity, ungapped_params); + + for (size_t i = 0; i < 2; ++i) + overlap.match[i].flipStrand(overlap.length[i]); + + int delta = overlap.getOverlapLength() - lengthOfSoftclippedPart(); + int offset = 0; + for (auto &ci: cigar) { + if (ci.Type == 'D') offset += ci.Length; + else if (ci.Type == 'I') offset -= ci.Length; + } + int rightBp = clipPosition + offset; + int leftBp = regions[0].start + overlap.match[0].start + lengthOfSoftclippedPart() - 1; + + int len = leftBp - rightBp + 1; + int start1 = delta > 0 ? leftBp : leftBp + delta; + int start2 = delta > 0 ? leftBp + delta : leftBp; + int end1 = delta > 0 ? rightBp : rightBp + delta; + int end2 = delta > 0 ? rightBp + delta : rightBp; + if (len > Helper::SVLEN_THRESHOLD) error("No deletion is found."); + return Deletion(regions[0].referenceName, start1, start2, end1, end2, len, getType()); + + error("No deletion is found."); +} + +void ReverseBClip::fetchSpanningRanges(BamReader &reader, int insLength, std::vector &ranges, int minMapQual) +{ + ranges.push_back({matePosition + 1, clipPosition + 1}); +} + +void ReverseBClip::fecthSizesForSpanningPairs(BamReader &reader, int inslength, std::vector &sizes) +{ +} + +void ReverseBClip::toTargetRegions(const string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) +{ + // ran.start, ran.start + insLength - length() + int rightmostPos = clipPosition + length(); + int s = ranges[0].start; + int e = s + insLength - length(); + if (e > rightmostPos) e = rightmostPos; + if (s > e) return; + regions.push_back({referenceName, s, e}); +} + +int ReverseBClip::lengthOfSoftclippedPart() +{ + return cigar[0].Length; +} + +string ReverseBClip::softclippedPart() +{ + return sequence.substr(0, lengthOfSoftclippedPart()); +} + +string ReverseBClip::mappedPart() +{ + return sequence.substr(lengthOfSoftclippedPart()); +} + +int ReverseBClip::offsetFromThisEnd(string referenceName, FaidxWrapper &faidx) +{ +} + +int ReverseBClip::offsetFromThatEnd(string referenceName, FaidxWrapper &faidx, int orignal) +{ +} + +ForwardEClip::ForwardEClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const std::vector &cigar) + : AbstractClip(referenceId, mapPosition, clipPosition, matePosition, sequence, cigar) { +} + +string ForwardEClip::getType() +{ + return "3F"; +} + +Deletion ForwardEClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) +{ + string s1 = regions[0].sequence(faidx); + SequenceOverlap overlap = Overlapper::computeOverlapSW2(s1, sequence, minOverlap, minIdentity, ungapped_params); + + int delta = overlap.getOverlapLength() - lengthOfSoftclippedPart(); + int offset = 0; + for (auto &ci: cigar) { + if (ci.Type == 'D') offset += ci.Length; + else if (ci.Type == 'I') offset -= ci.Length; + } + int leftBp = clipPosition - 1 - offset; + int rightBp = regions[0].start + overlap.match[0].end - lengthOfSoftclippedPart() + 1; + + int len = leftBp - rightBp + 1; + int start1 = delta > 0 ? leftBp - delta : leftBp; + int start2 = delta > 0 ? leftBp : leftBp - delta; + int end1 = delta > 0 ? rightBp - delta : rightBp; + int end2 = delta > 0 ? rightBp : rightBp - delta; + + if (len <= Helper::SVLEN_THRESHOLD) { + return Deletion(regions[0].referenceName, start1, start2, end1, end2, len, getType()); + } + + error("No deletion is found."); +} + +void ForwardEClip::fetchSpanningRanges(BamReader &reader, int insLength, std::vector &ranges, int minMapQual) +{ + ranges.push_back({clipPosition + 1, matePosition + 1}); +} + +void ForwardEClip::fecthSizesForSpanningPairs(BamReader &reader, int inslength, std::vector &sizes) +{ +} + +void ForwardEClip::toTargetRegions(const string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) +{ + // ran.end - insLength + 2 * length(), ran.end + length() + int leftmostPos = clipPosition; + int s = ranges[0].end - insLength + 2 * length(); + if (s < leftmostPos) s = leftmostPos; + int e = ranges[0].end + length(); + if (s > e) return; + regions.push_back({referenceName, s, e}); +} + +int ForwardEClip::lengthOfSoftclippedPart() +{ + return cigar[cigar.size() - 1].Length; +} + +string ForwardEClip::softclippedPart() +{ + return sequence.substr(lengthOfMappedPart()); +} + +string ForwardEClip::mappedPart() +{ + return sequence.substr(0, lengthOfMappedPart()); +} + +int ForwardEClip::offsetFromThisEnd(string referenceName, FaidxWrapper &faidx) +{ +} + +int ForwardEClip::offsetFromThatEnd(string referenceName, FaidxWrapper &faidx, int orignal) +{ +} diff --git a/clip.h b/clip.h new file mode 100644 index 0000000..5af136e --- /dev/null +++ b/clip.h @@ -0,0 +1,191 @@ +#ifndef CLIP_H +#define CLIP_H + +#include "api/BamAux.h" +#include "api/BamReader.h" +#include "Deletion.h" +#include "FaidxWrapper.h" +#include "range.h" +#include "Thirdparty/overlapper.h" + +#include +#include +#include + +struct TargetRegion +{ + std::string referenceName; + int start; + int end; + + std::string sequence(FaidxWrapper &faidx) const { + return faidx.fetch(referenceName, start, end); + } + + int length() const { + return end - start + 1; + } +}; + + +class AbstractClip { +public: + AbstractClip(int referenceId, int mapPosition, int clipPosition, + int matePosition, const std::string& sequence, + const std::vector& cigar); + + int length() const; + + int leftmostPosition() const; + int getClipPosition() const { + return clipPosition; + } + + virtual ~AbstractClip(); + + Deletion call(BamTools::BamReader& reader, FaidxWrapper &faidx, int insLength, int minOverlap, double minIdentity, int minMapQual); + + bool hasConflictWith(AbstractClip *other); + virtual std::string getType() = 0; + bool getConflictFlag() const; + void setConflictFlag(bool value); + std::string toString() { + std::stringstream ss; + ss << getClipPosition() << "\t" << getType(); + return ss.str(); + } + +protected: + + virtual Deletion call(FaidxWrapper &faidx, const std::vector& regions, int minOverlap, double minIdentity) = 0; + virtual void fetchSpanningRanges(BamTools::BamReader &reader, int insLength, std::vector &ranges, int minMapQual) = 0; + virtual void fecthSizesForSpanningPairs(BamTools::BamReader &reader, int inslength, std::vector& sizes) = 0; + virtual void toTargetRegions(const std::string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) = 0; + + virtual int lengthOfSoftclippedPart() = 0; + int lengthOfMappedPart() { + return sequence.size() - lengthOfSoftclippedPart(); + } + + int maxEditDistanceForSoftclippedPart(); + + virtual std::string softclippedPart() = 0; + virtual std::string mappedPart() = 0; + virtual int offsetFromThisEnd(std::string referenceName, FaidxWrapper& faidx) = 0; + virtual int offsetFromThatEnd(std::string referenceName, FaidxWrapper& faidx, int orignal) = 0; + + int referenceId; + int mapPosition; + int clipPosition; + int matePosition; + std::string sequence; + std::vector cigar; + + bool conflictFlag; +}; + +class ForwardBClip : public AbstractClip { +public: + ForwardBClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const std::string& sequence, const std::vector& cigar); + +private: + virtual void fetchSpanningRanges(BamTools::BamReader &reader, int insLength, std::vector &ranges, int minMapQual); + virtual void fecthSizesForSpanningPairs(BamTools::BamReader& reader, int insLength, std::vector& sizes); + virtual void toTargetRegions(const std::string &referenceName, int insLength, std::vector &ranges, std::vector ®ions); + + virtual Deletion call(FaidxWrapper &faidx, const std::vector& regions, int minOverlap, double minIdentity); + + // AbstractClip interface +public: + std::string getType(); + + // AbstractClip interface +protected: + int lengthOfSoftclippedPart() { + return cigar[0].Length; + } + + std::string softclippedPart() { + return sequence.substr(0, lengthOfSoftclippedPart()); + } + + std::string mappedPart() { + return sequence.substr(lengthOfSoftclippedPart()); + } + + int offsetFromThisEnd(std::string referenceName, FaidxWrapper &faidx); + int offsetFromThatEnd(std::string referenceName, FaidxWrapper &faidx, int orignal); +}; + +class ReverseBClip : public AbstractClip { + + // AbstractClip interface +public: + ReverseBClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const std::string& sequence, const std::vector& cigar); + std::string getType(); + +protected: + Deletion call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity); + void fetchSpanningRanges(BamTools::BamReader &reader, int insLength, std::vector &ranges, int minMapQual); + void fecthSizesForSpanningPairs(BamTools::BamReader &reader, int inslength, std::vector &sizes); + void toTargetRegions(const std::string &referenceName, int insLength, std::vector &ranges, std::vector ®ions); + int lengthOfSoftclippedPart(); + std::string softclippedPart(); + std::string mappedPart(); + int offsetFromThisEnd(std::string referenceName, FaidxWrapper &faidx); + int offsetFromThatEnd(std::string referenceName, FaidxWrapper &faidx, int orignal); +}; + +class ForwardEClip : public AbstractClip { + + // AbstractClip interface +public: + ForwardEClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const std::string& sequence, const std::vector& cigar); + std::string getType(); + +protected: + Deletion call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity); + void fetchSpanningRanges(BamTools::BamReader &reader, int insLength, std::vector &ranges, int minMapQual); + void fecthSizesForSpanningPairs(BamTools::BamReader &reader, int inslength, std::vector &sizes); + void toTargetRegions(const std::string &referenceName, int insLength, std::vector &ranges, std::vector ®ions); + int lengthOfSoftclippedPart(); + std::string softclippedPart(); + std::string mappedPart(); + int offsetFromThisEnd(std::string referenceName, FaidxWrapper &faidx); + int offsetFromThatEnd(std::string referenceName, FaidxWrapper &faidx, int orignal); +}; + +class ReverseEClip : public AbstractClip { +public: + ReverseEClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const std::string& sequence, const std::vector& cigar); + +private: + virtual void fetchSpanningRanges(BamTools::BamReader &reader, int insLength, std::vector &ranges, int minMapQual); + virtual void fecthSizesForSpanningPairs(BamTools::BamReader& reader, int insLength, std::vector& sizes); + virtual void toTargetRegions(const std::string &referenceName, int insLength, std::vector &ranges, std::vector ®ions); + + virtual Deletion call(FaidxWrapper &faidx, const std::vector& regions, int minOverlap, double minIdentity); + + // AbstractClip interface +public: + std::string getType(); + + // AbstractClip interface +protected: + int lengthOfSoftclippedPart() { + return cigar[cigar.size() - 1].Length; + } + + std::string softclippedPart() { + return sequence.substr(lengthOfMappedPart()); + } + + std::string mappedPart() { + return sequence.substr(0, lengthOfMappedPart()); + } + + int offsetFromThisEnd(std::string referenceName, FaidxWrapper &faidx); + int offsetFromThatEnd(std::string referenceName, FaidxWrapper &faidx, int orignal); +}; + +#endif // CLIP_H diff --git a/easylogging++.h b/easylogging++.h new file mode 100644 index 0000000..2c4e893 --- /dev/null +++ b/easylogging++.h @@ -0,0 +1,4003 @@ +/////////////////////////////////////////////////////////////////////////////////// +// // +// easylogging++.h - Core of EasyLogging++ // +// // +// EasyLogging++ v8.91 // +// Cross platform logging made easy for C++ applications // +// Author Majid Khan // +// http://www.icplusplus.com/tools/easylogging // +// https://github.com/mkhan3189/EasyLoggingPP // +// // +// Copyright (c) 2012-2013 Majid Khan // +// // +// This software is provided 'as-is', without any express or implied // +// warranty. In no event will the authors be held liable for any damages // +// arising from the use of this software. // +// // +// Permission is granted to anyone to use this software for any purpose, // +// including commercial applications, and to alter it and redistribute // +// it freely, subject to the following restrictions: // +// // +// 1. The origin of this software must not be misrepresented; you must // +// not claim that you wrote the original software. If you use this // +// software in a product, an acknowledgment in the product documentation // +// would be appreciated but is not required. // +// // +// 2. Altered source versions must be plainly marked as such, and must // +// not be misrepresented as being the original software. // +// // +// 3. This notice may not be removed or altered from any source // +// distribution // +// // +// PLEASE NOTE: THIS FILE MAY BE CHANGED. TO GET ORIGINAL VERSION // +// EITHER DOWNLOAD IT FROM http://www.icplusplus.com/tools/easylogging/ // +// OR PULL IT FROM https://github.com/mkhan3189/EasyLoggingPP (master branch) // +// // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef EASYLOGGINGPP_H +#define EASYLOGGINGPP_H +// +// Log location macros +// +#if !defined(__FILE__) +# define __FILE__ "" +#endif // !defined(__FILE__) +#if !defined(__LINE__) +# define __LINE__ 0 +#endif // !defined(__LINE__) +// Appropriate function macro +#if defined(__func__) +# undef __func__ +#endif // defined(__func__) +#if defined(_MSC_VER) && (_MSC_VER >= 1020) +# define __func__ __FUNCSIG__ +#elif defined(__GNUC__) && (__GNUC__ >= 2) +# define __func__ __PRETTY_FUNCTION__ +#elif defined(__clang__) && (__clang__ == 1) +# define __func__ __PRETTY_FUNCTION__ +#else +# define __func__ "" +#endif // defined(_MSC_VER) && (_MSC_VER >= 1020) +// +// Compiler evaluation +// http://isocpp.org/blog/2013/05/gcc-4.8.1-released-c11-feature-complete +// http://msdn.microsoft.com/en-us/library/vstudio/hh567368.aspx +// +// GNU +#if defined(__GNUC__) +# define _ELPP_GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) +# if defined(__GXX_EXPERIMENTAL_CXX0X__) +# define _ELPP_CXX0X 1 +# elif (_ELPP_GCC_VERSION >= 40801) +# define _ELPP_CXX11 1 +# endif // defined(__GXX_EXPERIMENTAL_CXX0X__) +#endif // defined(__GNUC__) +// VC++ +#if defined(_MSC_VER) +# if (_MSC_VER >= 1400) // VC++ 8.0 +# define _ELPP_CRT_DBG_WARNINGS 1 +# else +# define _ELPP_CRT_DBG_WARNINGS 0 +# endif // (_MSC_VER >= 1400) +# if (_MSC_VER == 1600) +# define _ELPP_CXX0X 1 +# elif (_MSC_VER == 1700) +# define _ELPP_CXX11 1 +# endif // (_MSC_VER == 1600) +#else +# define _ELPP_CRT_DBG_WARNINGS 0 +#endif // defined(_MSC_VER) +// Clang +#if defined(__clang__) && (__clang__ == 1) +# define _ELPP_CLANG_VERSION (__clang_major__ * 10000 \ + + __clang_minor__ * 100 \ + + __clang_patchlevel__) +# if (_ELPP_CLANG_VERSION >= 30300) +# define _ELPP_CXX11 1 +# endif // (_ELPP_CLANG_VERSION >= 30300) +#endif // defined(__clang__) && (__clang__ == 1) +// MinGW +#if defined(__MINGW32__) || defined(__MINGW64__) +# define _ELPP_MINGW 1 +#else +# define _ELPP_MINGW 0 +#endif // defined(__MINGW32__) || defined(__MINGW64__) +#if defined(__ANDROID__) +# define _ELPP_NDK 1 +#else +# define _ELPP_NDK 0 +#endif // defined(__ANDROID__) +// Some special functions that are special for VC++ +// This is to prevent CRT security warnings and to override deprecated methods but at the same time +// MinGW does not support some functions, so we need to make sure that proper function is used. +#if _ELPP_CRT_DBG_WARNINGS +# define SPRINTF sprintf_s +# define STRTOK(a,b,c) strtok_s(a,b,c) +#else +# define SPRINTF sprintf +# define STRTOK(a,b,c) strtok(a,b) +#endif +// std::thread availablity +#if defined(__GNUC__) && (!_ELPP_NDK) && (_ELPP_CXX0X || _ELPP_CXX11) +# define _ELPP_STD_THREAD_AVAILABLE 1 +#elif defined(_MSC_VER) && (!_ELPP_NDK) && (_ELPP_CXX11) +# define _ELPP_STD_THREAD_AVAILABLE 1 +#elif defined(__clang__) && (!_ELPP_NDK) && (__clang__ == 1) && (_ELPP_CXX11) +# define _ELPP_STD_THREAD_AVAILABLE 1 +#else +# define _ELPP_STD_THREAD_AVAILABLE 0 +#endif // defined(__GNUC__) && (_ELPP_CXX0X || _ELPP_CXX11) +// Qt +#if defined(QT_CORE_LIB) +# if (defined(QT_VERSION) && QT_VERSION >= QT_VERSION_CHECK(5, 0, 0)) +# define _ELPP_QT_5 1 +# else +# define _ELPP_QT_5 0 +# endif // (defined(QT_VERSION) && QT_VERSION >= QT_VERSION_CHECK(5, 0, 0)) +#endif // defined(QT_CORE_LIB) +// +// High-level log evaluation +// +#if (defined(_DISABLE_LOGS)) +# define _ENABLE_EASYLOGGING 0 +#else +# define _ENABLE_EASYLOGGING 1 +#endif // (!defined(_DISABLE_LOGS)) +// +// OS evaluation +// +// Windows +#if defined(_WIN32) || defined(_WIN64) +# define _ELPP_OS_WINDOWS 1 +#else +# define _ELPP_OS_WINDOWS 0 +#endif // defined(_WIN32) || defined(_WIN64) +// Linux +#if (defined(__linux) || defined(__linux__)) +# define _ELPP_OS_LINUX 1 +#else +# define _ELPP_OS_LINUX 0 +#endif // (defined(__linux) || defined(__linux__)) +// Mac +#if defined(__APPLE__) +# define _ELPP_OS_MAC 1 +#else +# define _ELPP_OS_MAC 0 +#endif // defined(__APPLE__) +// Unix +#define _ELPP_OS_UNIX ((_ELPP_OS_LINUX || _ELPP_OS_MAC) && (!_ELPP_OS_WINDOWS)) +// Assembly +#if (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || \ + (defined(_MSC_VER) && (defined(_M_IX86))) +# define _ELPP_ASSEMBLY_SUPPORTED 1 +#else +# define _ELPP_ASSEMBLY_SUPPORTED 0 +#endif +#if (!defined(_DISABLE_ELPP_ASSERT)) +# if (defined(_STOP_ON_FIRST_ELPP_ASSERTION)) +# define __EASYLOGGINGPP_ASSERT(expr, msg) if (!(expr)) { std::cerr << "EASYLOGGING++ ASSERTION FAILED (LINE: " << __LINE__ << ") [" #expr << "] with message \"" << msg << "\"" << std::endl; exit(1); } +# else +# define __EASYLOGGINGPP_ASSERT(expr, msg) if (!(expr)) { std::cerr << "EASYLOGGING++ ASSERTION FAILED (LINE: " << __LINE__ << ") [" #expr << "] with message \"" << msg << "\"" << std::endl; } +# endif // (defined(_STOP_ON_FIRST_ELPP_ASSERTION)) +#else +# define __EASYLOGGINGPP_ASSERT(x, y) +#endif // (!defined(_DISABLE_ELPP_ASSERT)) +#define __EASYLOGGINGPP_SUPPRESS_UNSED(x) (void)x; +#if _ELPP_OS_UNIX +// Log file permissions for unix-based systems +# define _LOG_PERMS S_IRUSR | S_IWUSR | S_IXUSR | S_IWGRP | S_IRGRP | S_IXGRP | S_IWOTH | S_IXOTH +#endif // _ELPP_OS_UNIX +#if (!defined(_DISABLE_MUTEX) && (_ENABLE_EASYLOGGING)) +# define _ELPP_ENABLE_MUTEX 1 +#else +# define _ELPP_ENABLE_MUTEX 0 +#endif // (!defined(_DISABLE_MUTEX) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_DEBUG_LOGS) && (_ENABLE_EASYLOGGING) && ((defined(_DEBUG)) || (!defined(NDEBUG)))) +# define _ELPP_DEBUG_LOG 1 +#else +# define _ELPP_DEBUG_LOG 0 +#endif // (!defined(_DISABLE_DEBUG_LOGS) && (_ENABLE_EASYLOGGING) && ((defined(_DEBUG)) || (!defined(NDEBUG)))) +#if (!defined(_DISABLE_INFO_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_INFO_LOG 1 +#else +# define _ELPP_INFO_LOG 0 +#endif // (!defined(_DISABLE_INFO_LOGS) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_WARNING_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_WARNING_LOG 1 +#else +# define _ELPP_WARNING_LOG 0 +#endif // (!defined(_DISABLE_WARNING_LOGS) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_ERROR_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_ERROR_LOG 1 +#else +# define _ELPP_ERROR_LOG 0 +#endif // (!defined(_DISABLE_ERROR_LOGS) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_FATAL_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_FATAL_LOG 1 +#else +# define _ELPP_FATAL_LOG 0 +#endif // (!defined(_DISABLE_FATAL_LOGS) && (_ENABLE_EASYLOGGING)) +#if (defined(_QUALITY_ASSURANCE) && (_ENABLE_EASYLOGGING)) +# define _ELPP_QA_LOG 1 +#else +# define _ELPP_QA_LOG 0 +#endif // (defined(_QUALITY_ASSURANCE) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_TRACE_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_TRACE_LOG 1 +#else +# define _ELPP_TRACE_LOG 0 +#endif // (!defined(_DISABLE_TRACE_LOGS) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_VERBOSE_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_VERBOSE_LOG 1 +#else +# define _ELPP_VERBOSE_LOG 0 +#endif // (!defined(_DISABLE_VERBOSE_LOGS) && (_ENABLE_EASYLOGGING)) +#define ELPP_FOR_EACH(variableName, initialValue, operation, limit) unsigned int variableName = initialValue; \ + do { \ + operation \ + variableName = variableName << 1; \ + if (variableName == 0) { ++variableName; } \ + } while (variableName <= limit) +#define ELPP_FOR_EACH_LEVEL(variableName, initialValue, operation) \ + ELPP_FOR_EACH(variableName, initialValue, operation, easyloggingpp::Level::kMaxValid) +#define ELPP_FOR_EACH_CONFIGURATION(variableName, initialValue, operation) \ + ELPP_FOR_EACH(variableName, initialValue, operation, easyloggingpp::ConfigurationType::kMaxValid) +// Includes +#include +#include +#include +#include +#include +#if _ELPP_NDK +# include +#endif // _ELPP_NDK +#if _ELPP_OS_UNIX +# include +# include +# if (_ELPP_ENABLE_MUTEX) +# if (_ELPP_ASSEMBLY_SUPPORTED) +# include +# else +# include +# endif // (_ELPP_ASSEMBLY_SUPPORTED) +# endif // (_ELPP_ENABLE_MUTEX) +#elif _ELPP_OS_WINDOWS +# include +# include +#endif // _ELPP_OS_UNIX +#include +#include +#include +#include +#include +#include +#include +#include +#if (_ELPP_STD_THREAD_AVAILABLE) +# include +#endif // _ELPP_STD_THREAD_AVAILABLE +#if defined(_ELPP_STL_LOGGING) +// For logging STL based templates +# include +# include +# include +# include +# include +# include +# include +#endif // defined(_ELPP_STL_LOGGING) +#if defined(QT_CORE_LIB) && defined(_ELPP_QT_LOGGING) +// For logging Qt based classes & templates +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +#endif // defined(QT_CORE_LIB) && defined(_ELPP_QT_LOGGING) +namespace easyloggingpp { +namespace internal { + +class NoCopy { +protected: + NoCopy(void) {} +private: + NoCopy(const NoCopy&); + NoCopy& operator=(const NoCopy&); +}; + +class StaticClass { +private: + StaticClass(void); + StaticClass(const StaticClass&); + StaticClass& operator=(const StaticClass&); +}; +} // namespace internal + +struct Level : private internal::StaticClass { +public: + enum { + All = 0, Debug = 1, Info = 2, Warning = 4, Error = 8, + Fatal = 16, Verbose = 32, QA = 64, Trace = 128, Unknown = 1010 + }; + + static const unsigned int kMinValid = All; + static const unsigned int kMaxValid = Trace; + + static std::string convertToString(unsigned int level_) { + switch (level_) { + case All: + return std::string("ALL"); + case Debug: + return std::string("DEBUG"); + case Info: + return std::string("INFO"); + case Warning: + return std::string("WARNING"); + case Error: + return std::string("ERROR"); + case Fatal: + return std::string("FATAL"); + case QA: + return std::string("QA"); + case Verbose: + return std::string("VERBOSE"); + case Trace: + return std::string("TRACE"); + default: + return std::string("UNKNOWN"); + } + } + + static unsigned int convertFromString(const std::string& levelStr) { + if (levelStr == "all" || levelStr == "ALL") return Level::All; + if (levelStr == "debug" || levelStr == "DEBUG") return Level::Debug; + if (levelStr == "info" || levelStr == "INFO") return Level::Info; + if (levelStr == "warning" || levelStr == "WARNING") return Level::Warning; + if (levelStr == "error" || levelStr == "ERROR") return Level::Error; + if (levelStr == "fatal" || levelStr == "FATAL") return Level::Fatal; + if (levelStr == "qa" || levelStr == "QA") return Level::QA; + if (levelStr == "verbose" || levelStr == "VERBOSE") return Level::Verbose; + if (levelStr == "trace" || levelStr == "TRACE") return Level::Trace; + return Level::Unknown; + } +}; + +struct ConfigurationType : private internal::StaticClass { +public: + enum { + Enabled = 0, ToFile = 1, ToStandardOutput = 2, Format = 4, Filename = 8, + MillisecondsWidth = 16, PerformanceTracking = 32, RollOutSize = 64, Unknown = 1010 + }; + + static const unsigned int kMinValid = Enabled; + static const unsigned int kMaxValid = RollOutSize; + + static std::string convertToString(unsigned int configurationType_) { + switch (configurationType_) { + case Enabled: + return std::string("ENABLED"); + case Filename: + return std::string("FILENAME"); + case Format: + return std::string("FORMAT"); + case ToFile: + return std::string("TO_FILE"); + case ToStandardOutput: + return std::string("TO_STANDARD_OUTPUT"); + case MillisecondsWidth: + return std::string("MILLISECONDS_WIDTH"); + case PerformanceTracking: + return std::string("PERFORMANCE_TRACKING"); + case RollOutSize: + return std::string("ROLL_OUT_SIZE"); + default: return std::string("UNKNOWN"); + } + } + + static unsigned int convertFromString(const std::string& configStr) { + if (configStr == "enabled" || configStr == "ENABLED") return ConfigurationType::Enabled; + if (configStr == "to_file" || configStr == "TO_FILE") return ConfigurationType::ToFile; + if (configStr == "to_standard_output" || configStr == "TO_STANDARD_OUTPUT") return ConfigurationType::ToStandardOutput; + if (configStr == "format" || configStr == "FORMAT") return ConfigurationType::Format; + if (configStr == "filename" || configStr == "FILENAME") return ConfigurationType::Filename; + if (configStr == "milliseconds_width" || configStr == "MILLISECONDS_WIDTH") return ConfigurationType::MillisecondsWidth; + if (configStr == "performance_tracking" || configStr == "PERFORMANCE_TRACKING") return ConfigurationType::PerformanceTracking; + if (configStr == "roll_out_size" || configStr == "ROLL_OUT_SIZE") return ConfigurationType::RollOutSize; + return ConfigurationType::Unknown; + } +}; + +namespace internal { +struct Aspect : private internal::StaticClass { +public: + enum { + Normal = 0, Conditional = 1, Interval = 2 + }; +}; + +//! +//! Used internally. You should not need this class. +//! +class Constants : private internal::NoCopy { +public: + Constants (void) : + // + // Log level name outputs + // + LOG_INFO_LEVEL_VALUE ("INFO") , + LOG_DEBUG_LEVEL_VALUE ("DEBUG"), + LOG_WARNING_LEVEL_VALUE("WARN"), + LOG_ERROR_LEVEL_VALUE ("ERROR"), + LOG_FATAL_LEVEL_VALUE ("FATAL"), + LOG_VERBOSE_LEVEL_VALUE("VER"), + LOG_QA_LEVEL_VALUE ("QA"), + LOG_TRACE_LEVEL_VALUE ("TRACE"), + // + // Format specifiers + // + APP_NAME_FORMAT_SPECIFIER ("%app"), + LOGGER_ID_FORMAT_SPECIFIER ("%logger"), + THREAD_ID_FORMAT_SPECIFIER ("%thread"), + LEVEL_FORMAT_SPECIFIER ("%level"), + DATE_ONLY_FORMAT_SPECIFIER ("%date"), + TIME_ONLY_FORMAT_SPECIFIER ("%time"), + DATE_TIME_FORMAT_SPECIFIER ("%datetime"), + LOCATION_FORMAT_SPECIFIER ("%loc"), + FUNCTION_FORMAT_SPECIFIER ("%func"), + USER_FORMAT_SPECIFIER ("%user"), + HOST_FORMAT_SPECIFIER ("%host"), + LOG_MESSAGE_FORMAT_SPECIFIER ("%log"), + VERBOSE_LEVEL_FORMAT_SPECIFIER ("%vlevel"), + // + // Others + // + NULL_POINTER ("nullptr"), + FORMAT_SPECIFIER_ESCAPE_CHAR ('E'), + MAX_LOG_PER_CONTAINER (100), + MAX_LOG_PER_COUNTER (100000), + DEFAULT_MILLISECOND_OFFSET (1000), + MAX_VERBOSE_LEVEL (9), + CURRENT_VERBOSE_LEVEL (0), // Set dynamically from registeredLoggers +#if _ELPP_OS_UNIX + PATH_SLASH ("/"), +#elif _ELPP_OS_WINDOWS + PATH_SLASH ("\\"), +#endif // _ELPP_OS_UNIX, + DEFAULT_LOG_FILENAME ("myeasylog.log") + { + // Trivial logger configuration - only to set format (difference: not using %logger) + std::stringstream ss; + ss << " * ALL:\n"; + ss << " FORMAT = %datetime %level %log\n"; + ss << "* DEBUG:\n"; + ss << " FORMAT = %datetime %level [%user@%host] [%func] [%loc] %log\n"; + // INFO and WARNING uses is defined by ALL + ss << "* ERROR:\n"; + ss << " FORMAT = %datetime %level %log\n"; + ss << "* FATAL:\n"; + ss << " FORMAT = %datetime %level %log\n"; + ss << "* QA:\n"; + ss << " FORMAT = %datetime %level %log\n"; + ss << "* VERBOSE:\n"; + ss << " FORMAT = %datetime %level-%vlevel %log\n"; + ss << "* TRACE:\n"; + ss << " FORMAT = %datetime %level [%func] [%loc] %log\n"; + DEFAULT_LOGGER_CONFIGURATION = ss.str(); + } // C'tor + // + // Log level name outputs + // + const std::string LOG_INFO_LEVEL_VALUE; + const std::string LOG_DEBUG_LEVEL_VALUE; + const std::string LOG_WARNING_LEVEL_VALUE; + const std::string LOG_ERROR_LEVEL_VALUE; + const std::string LOG_FATAL_LEVEL_VALUE; + const std::string LOG_VERBOSE_LEVEL_VALUE; + const std::string LOG_QA_LEVEL_VALUE; + const std::string LOG_TRACE_LEVEL_VALUE; + // + // Format specifiers + // + const std::string APP_NAME_FORMAT_SPECIFIER; + const std::string LOGGER_ID_FORMAT_SPECIFIER; + const std::string THREAD_ID_FORMAT_SPECIFIER; + const std::string LEVEL_FORMAT_SPECIFIER; + const std::string DATE_ONLY_FORMAT_SPECIFIER; + const std::string TIME_ONLY_FORMAT_SPECIFIER; + const std::string DATE_TIME_FORMAT_SPECIFIER; + const std::string LOCATION_FORMAT_SPECIFIER; + const std::string FUNCTION_FORMAT_SPECIFIER; + const std::string USER_FORMAT_SPECIFIER; + const std::string HOST_FORMAT_SPECIFIER; + const std::string LOG_MESSAGE_FORMAT_SPECIFIER; + const std::string VERBOSE_LEVEL_FORMAT_SPECIFIER; + // + // Others + // + const std::string NULL_POINTER; + const char FORMAT_SPECIFIER_ESCAPE_CHAR; + const unsigned int MAX_LOG_PER_CONTAINER; + const unsigned int MAX_LOG_PER_COUNTER; + const unsigned int DEFAULT_MILLISECOND_OFFSET; + const int MAX_VERBOSE_LEVEL; + int CURRENT_VERBOSE_LEVEL; + const std::string PATH_SLASH; + const std::string DEFAULT_LOG_FILENAME; + std::string DEFAULT_LOGGER_CONFIGURATION; + + enum kFormatFlags { + kDateOnly = 2, + kTimeOnly = 4, + kDateTime = 8, + kLoggerId = 16, + kLocation = 32, + kFunction = 64, + kUser = 128, + kHost = 256, + kLogMessage = 512, + kVerboseLevel = 1024, + kAppName = 2048, + kThreadId = 4096 + }; +}; // class Constants +namespace threading { + +//! +//! To take care of shared resources in multi-threaded application. Used internally, you should not need it. +//! +class Mutex { +public: +#if _ELPP_ASSEMBLY_SUPPORTED +# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +# define _ELPP_MUTEX_LOCK_GNU_ASM(lf_, old_) "movl $1,%%eax\n" \ + "\txchg %%eax,%0\n" \ + "\tmovl %%eax,%1\n" \ + "\t" : "=m" (lf_), "=m" (old_) : : "%eax", "memory" +# define _ELPP_MUTEX_UNLOCK_GNU_ASM(lf_) "movl $0,%%eax\n" \ + "\txchg %%eax,%0\n" \ + "\t" : "=m" (lf_) : : "%eax", "memory" +# endif // defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + Mutex(void) : lockerFlag_(0) { + } +#else + Mutex(void) { +# if _ELPP_OS_UNIX + pthread_mutex_init(&underlyingMutex_, NULL); +# elif _ELPP_OS_WINDOWS + InitializeCriticalSection(&underlyingMutex_); +# endif // _ELPP_OS_UNIX + } + + virtual ~Mutex(void) { +# if _ELPP_OS_UNIX + pthread_mutex_destroy(&underlyingMutex_); +# elif _ELPP_OS_WINDOWS + DeleteCriticalSection(&underlyingMutex_); +# endif // _ELPP_OS_UNIX + } +#endif // _ELPP_ASSEMBLY_SUPPORTED + + inline void lock(void) { +#if _ELPP_ASSEMBLY_SUPPORTED + bool locked = false; + while (!locked) { + locked = tryLock(); + if (!locked) { +# if _ELPP_OS_UNIX + sched_yield(); +# elif _ELPP_OS_WINDOWS + Sleep(0); +# endif + } + } +#else +# if _ELPP_OS_UNIX + pthread_mutex_lock(&underlyingMutex_); +# elif _ELPP_OS_WINDOWS + EnterCriticalSection(&underlyingMutex_); +# endif // _ELPP_OS_UNIX +#endif // _ELPP_ASSEMBLY_SUPPORTED + } + + inline bool tryLock(void) { +#if _ELPP_ASSEMBLY_SUPPORTED + int oldLock_; +# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + asm volatile (_ELPP_MUTEX_LOCK_GNU_ASM(lockerFlag_, oldLock_)); +# elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + int *ptrLock = &lockerFlag_; + __asm { + mov eax,1 + mov ecx,ptrLock + xchg eax,[ecx] + mov oldLock_,eax + } +# endif // defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + return (oldLock_ == 0); +#else +# if _ELPP_OS_UNIX + return (pthread_mutex_trylock(&underlyingMutex_) == 0) ? true : false; +# elif _ELPP_OS_WINDOWS + return TryEnterCriticalSection(&underlyingMutex_) ? true : false; +# endif // _ELPP_OS_UNIX +#endif // _ELPP_ASSEMBLY_SUPPORTED + } + + inline void unlock(void) { +#if _ELPP_ASSEMBLY_SUPPORTED +# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + asm volatile (_ELPP_MUTEX_UNLOCK_GNU_ASM(lockerFlag_)); +# elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + int *ptrLock = &lockerFlag_; + __asm { + mov eax,0 + mov ecx,ptrLock + xchg eax,[ecx] + } +# endif // defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#else +# if _ELPP_OS_UNIX + pthread_mutex_unlock(&underlyingMutex_); +# elif _ELPP_OS_WINDOWS + LeaveCriticalSection(&underlyingMutex_); +# endif // _ELPP_OS_UNIX +#endif // _ELPP_ASSEMBLY_SUPPORTED + } +private: +#if _ELPP_ASSEMBLY_SUPPORTED + int lockerFlag_; +#else +# if _ELPP_OS_UNIX + pthread_mutex_t underlyingMutex_; +# elif _ELPP_OS_WINDOWS + CRITICAL_SECTION underlyingMutex_; +# endif // _ELPP_OS_UNIX +#endif // _ELPP_ASSEMBLY_SUPPORTED +}; // class Mutex +//! +//! Scoped mutex that works same as C++11 std::lock_guard. Used internally, you should not use it. +//! +class ScopedLock : private internal::NoCopy { +public: + explicit ScopedLock(Mutex& m_) { + mutex_ = &m_; + mutex_->lock(); + } + + virtual ~ScopedLock(void) { + mutex_->unlock(); + } +private: + Mutex* mutex_; + ScopedLock(void); +}; // class ScopedLock + +//! +//! \return ID of current thread. If std::thread is available it uses get_id() otherwise if on windows it uses +//! GetCurrentThreadId() otherwise empty string. Used internally, you should not use it. +//! +inline std::string getCurrentThreadId(void) { + std::stringstream ss; +#if (_ELPP_STD_THREAD_AVAILABLE) + ss << std::this_thread::get_id(); +#else +# if (_ELPP_OS_WINDOWS) + ss << GetCurrentThreadId(); +# endif // (_ELPP_OS_WINDOWS) +#endif + return ss.str(); +} + +} // namespace threading +namespace utilities { + +template +inline void safeDelete(T*& pointer, bool checkNullity = true) { + if (checkNullity && pointer == NULL) return; + delete pointer; + pointer = NULL; +} + +//! +//! String utilities class used internally. You should not use it. +//! +class StringUtils : private internal::StaticClass { +public: + static inline std::string trim(const std::string &str) { + std::size_t s = str.find_first_not_of(" \n\r\t"); + std::size_t e = str.find_last_not_of(" \n\r\t"); + if ((s == std::string::npos) || (e == std::string::npos)) { + return ""; + } + else { + return str.substr(s, e - s + 1); + } + } + + static inline bool startsWith(const std::string& str, const std::string& start) { + return (str.length() >= start.length()) && (str.compare(0, start.length(), start) == 0); + } + + static inline bool endsWith(const std::string& str, const std::string& end) { + return (str.length() >= end.length()) && (str.compare(str.length() - end.length(), end.length(), end) == 0); + } + + static inline std::vector& split(const std::string& s, char delim, std::vector& elems) { + std::stringstream ss(s); + std::string item; + while (std::getline(ss, item, delim)) { + elems.push_back(item); + } + return elems; + } + + static inline std::string replaceAll(const std::string& str, const std::string& replaceWhat, const std::string& replaceWith) { + if (replaceWhat == replaceWith) + return str; + std::string result = str; + std::size_t foundAt = std::string::npos; + while ((foundAt = result.find(replaceWhat)) != std::string::npos) { + result.replace(foundAt, replaceWhat.length(), replaceWith); + } + return result; + } + + static inline std::string stripAllWhiteSpaces(const std::string& str) { + std::string result = replaceAll(str, " ", ""); + result = replaceAll(result, "\n", ""); + result = replaceAll(result, "\r", ""); + result = replaceAll(result, "\t", ""); + return result; + } + + static inline void tolower(std::string& str) { + std::transform(str.begin(), str.end(), str.begin(), ::tolower); + } +}; + +//! +//! Operating System utilities class used internally. You should not use it. +//! +class OSUtils : private internal::StaticClass { +public: +#if _ELPP_OS_WINDOWS + static const char* getWindowsEnvironmentVariable(const char* variableName) { + const DWORD bufferLen = 50; + static char buffer[bufferLen]; + if (GetEnvironmentVariableA(variableName, buffer, bufferLen)) { + return buffer; + } + return NULL; + } +#endif // _ELPP_OS_WINDOWS +#if _ELPP_NDK + static std::string getProperty(const char* prop) { + char propVal[PROP_VALUE_MAX + 1]; + __system_property_get(prop, propVal); + return std::string(propVal); + } + + static std::string getDeviceName(void) { + std::stringstream ss; + std::string manufacturer = getProperty("ro.product.manufacturer"); + std::string model = getProperty("ro.product.model"); + if (manufacturer.empty() && model.empty()) { + return std::string(); + } + ss << manufacturer << " " << model; + return ss.str(); + } +#endif // _ELPP_NDK + // Runs command on terminal and returns the output. + // This is applicable only on linux and mac, for all other OS, an empty string is returned. + static const std::string getBashOutput(const char* command_) { + if (command_ == NULL) { + return std::string(); + } +#if _ELPP_OS_UNIX && !_ELPP_NDK + FILE* proc = NULL; + if ((proc = popen(command_, "r")) == NULL) { + std::cerr << "\nUnable to run command [" << command_ << "]" << std::endl; + return std::string(); + } + char hBuff[4096]; + if (fgets(hBuff, sizeof(hBuff), proc) != NULL) { + pclose(proc); + if (hBuff[strlen(hBuff) - 1] == '\n') { + hBuff[strlen(hBuff) - 1] = '\0'; + } + return std::string(hBuff); + } + return std::string(); +#else + return std::string(); +#endif // _ELPP_OS_UNIX + } + + static std::string getEnvironmentVariable(const char* variableName, const char* defaultVal, const char* alternativeBashCommand = NULL) { +#if _ELPP_OS_UNIX + const char* val = getenv(variableName); +#elif _ELPP_OS_WINDOWS + const char* val = getWindowsEnvironmentVariable(variableName); +#endif // _ELPP_OS_UNIX + if ((val == NULL) || ((strcmp(val, "") == 0))) { +#if _ELPP_OS_UNIX + // Try harder on unix-based systems + std::string valBash = internal::utilities::OSUtils::getBashOutput(alternativeBashCommand); + if (valBash.empty()) { + return std::string(defaultVal); + } else { + return valBash; + } +#elif _ELPP_OS_WINDOWS + return std::string(defaultVal); +#endif // _ELPP_OS_WINDOWS + } + return std::string(val); + } + + // Gets current username. + static const std::string currentUser(void) { +#if _ELPP_OS_UNIX && !_ELPP_NDK + return getEnvironmentVariable("USER", "user", "whoami"); +#elif _ELPP_OS_WINDOWS + return getEnvironmentVariable("USERNAME", "user"); +#elif _ELPP_NDK + return std::string("android"); +#else + return std::string(); +#endif // _ELPP_OS_UNIX + } + + // Gets current host name or computer name. + static const std::string currentHost(void) { +#if _ELPP_OS_UNIX && !_ELPP_NDK + return getEnvironmentVariable("HOSTNAME", "unknown-host", "hostname"); +#elif _ELPP_OS_WINDOWS + return getEnvironmentVariable("COMPUTERNAME", "unknown-host"); +#elif _ELPP_NDK + return getDeviceName(); +#else + return std::string(); +#endif // _ELPP_OS_UNIX + } + + // Determines whether or not provided path_ exist in current file system + static inline bool pathExists(const char* path_) { + if (path_ == NULL) { + return false; + } +#if _ELPP_OS_UNIX + struct stat st; + return (stat(path_, &st) == 0); +#elif _ELPP_OS_WINDOWS + DWORD fileType = GetFileAttributesA(path_); + if (fileType == INVALID_FILE_ATTRIBUTES) { + return false; + } + return (fileType & FILE_ATTRIBUTE_DIRECTORY) == 0 ? false : true; +#endif // _ELPP_OS_UNIX + } + + // Creates path as specified + static bool createPath(const std::string& path_) { + if (path_.empty()) { + return false; + } + if (internal::utilities::OSUtils::pathExists(path_.c_str())) { + return true; + } +#if _ELPP_OS_UNIX + const char* pathDelim_ = "/"; +#elif _ELPP_OS_WINDOWS + char pathDelim_[] = "\\"; +#endif // _ELPP_OS_UNIX + int status = -1; + + char* currPath_ = const_cast(path_.c_str()); + std::string buildingPath_ = std::string(); +#if _ELPP_OS_UNIX + if (path_[0] == '/') { + buildingPath_ = "/"; + } + currPath_ = STRTOK(currPath_, pathDelim_, 0); +#elif _ELPP_OS_WINDOWS + // Use secure functions API + char* nextTok_; + currPath_ = STRTOK(currPath_, pathDelim_, &nextTok_); +#endif // _ELPP_OS_UNIX + while (currPath_ != NULL) { + buildingPath_.append(currPath_); + buildingPath_.append(pathDelim_); +#if _ELPP_OS_UNIX + status = mkdir(buildingPath_.c_str(), _LOG_PERMS); + currPath_ = STRTOK(NULL, pathDelim_, 0); +#elif _ELPP_OS_WINDOWS + status = _mkdir(buildingPath_.c_str()); + currPath_ = STRTOK(NULL, pathDelim_, &nextTok_); +#endif // _ELPP_OS_UNIX + } + if (status == -1) { + return false; + } + return true; + } + + static std::string getPathFromFilename(const std::string& fullPath_, internal::Constants* constants_) { + if (fullPath_ == "" || fullPath_.find(constants_->PATH_SLASH) == std::string::npos) { + return fullPath_; + } + std::size_t lastSlashAt = fullPath_.find_last_of(constants_->PATH_SLASH); + if (lastSlashAt == 0) { + return constants_->PATH_SLASH; + } + return fullPath_.substr(0, lastSlashAt + 1); + } +}; // class OSUtils + +//! +//! Contains static functions related to log manipulation used internally. You should not use it. +//! +class LogManipulator : private internal::StaticClass { +public: + // Updates the formatSpecifier_ for currentFormat_ to value_ provided + static void updateFormatValue(const std::string& formatSpecifier_, + const std::string& value_, std::string& currentFormat_, + internal::Constants* constants_) { + std::size_t foundAt = std::string::npos; + while ((foundAt = currentFormat_.find(formatSpecifier_, foundAt + 1)) != std::string::npos){ + if (currentFormat_[foundAt > 0 ? foundAt - 1 : 0] == constants_->FORMAT_SPECIFIER_ESCAPE_CHAR) { + currentFormat_.erase(foundAt > 0 ? foundAt - 1 : 0, 1); + ++foundAt; + } else { + currentFormat_ = currentFormat_.replace(foundAt, formatSpecifier_.size(), value_); + return; + } + } + } +}; // class LogManipulator + +//! +//! Contains utility functions related to date/time used internally. You should not use it. +//! +class DateUtils : private internal::StaticClass { +public: +#if _ELPP_OS_WINDOWS + static void gettimeofday(struct timeval *tv) { + if (tv != NULL) { +# if defined(_MSC_EXTENSIONS) + const unsigned __int64 delta_ = 11644473600000000Ui64; +# else + const unsigned __int64 delta_ = 11644473600000000ULL; +# endif // defined(_MSC_EXTENSIONS) + const double secOffSet = 0.000001; + const unsigned long usecOffSet = 1000000; + FILETIME fileTime_; + GetSystemTimeAsFileTime(&fileTime_); + unsigned __int64 present_ = 0; + present_ |= fileTime_.dwHighDateTime; + present_ = present_ << 32; + present_ |= fileTime_.dwLowDateTime; + present_ /= 10; // mic-sec + // Subtract the difference + present_ -= delta_; + tv->tv_sec = static_cast(present_ * secOffSet); + tv->tv_usec = static_cast(present_ % usecOffSet); + } + } +#endif // _ELPP_OS_WINDOWS + + // Gets current date and time with milliseconds. + static std::string getDateTime(const std::string& bufferFormat_, unsigned int type_, internal::Constants* constants_, std::size_t milliSecondOffset_ = 1000) { + long milliSeconds = 0; + const int kDateBuffSize_ = 30; + char dateBuffer_[kDateBuffSize_] = ""; + char dateBufferOut_[kDateBuffSize_] = ""; +#if _ELPP_OS_UNIX + bool hasTime_ = ((type_ & constants_->kDateTime) || (type_ & constants_->kTimeOnly)); + timeval currTime; + gettimeofday(&currTime, NULL); + if (hasTime_) { + milliSeconds = currTime.tv_usec / milliSecondOffset_ ; + } + struct tm * timeInfo = localtime(&currTime.tv_sec); + strftime(dateBuffer_, sizeof(dateBuffer_), bufferFormat_.c_str(), timeInfo); + if (hasTime_) { + SPRINTF(dateBufferOut_, "%s.%03ld", dateBuffer_, milliSeconds); + } else { + SPRINTF(dateBufferOut_, "%s", dateBuffer_); + } +#elif _ELPP_OS_WINDOWS + const char* kTimeFormatLocal_ = "HH':'mm':'ss"; + const char* kDateFormatLocal_ = "dd/MM/yyyy"; + if ((type_ & constants_->kDateTime) || (type_ & constants_->kDateOnly)) { + if (GetDateFormatA(LOCALE_USER_DEFAULT, 0, 0, kDateFormatLocal_, dateBuffer_, kDateBuffSize_) != 0) { + SPRINTF(dateBufferOut_, "%s", dateBuffer_); + } + } + if ((type_ & constants_->kDateTime) || (type_ & constants_->kTimeOnly)) { + if (GetTimeFormatA(LOCALE_USER_DEFAULT, 0, 0, kTimeFormatLocal_, dateBuffer_, kDateBuffSize_) != 0) { + milliSeconds = static_cast(GetTickCount()) % milliSecondOffset_; + if (type_ & constants_->kDateTime) { + SPRINTF(dateBufferOut_, "%s %s.%03ld", dateBufferOut_, dateBuffer_, milliSeconds); + } else { + SPRINTF(dateBufferOut_, "%s.%03ld", dateBuffer_, milliSeconds); + } + } + } +#endif // _ELPP_OS_UNIX + return std::string(dateBufferOut_); + } + + static std::string formatMilliSeconds(double milliSeconds_) { + double result = milliSeconds_; + std::string unit = "ms"; + std::stringstream stream_; + if (result > 1000.0f) { + result /= 1000; unit = "seconds"; + if (result > 60.0f) { + result /= 60; unit = "minutes"; + if (result > 60.0f) { + result /= 60; unit = "hours"; + if (result > 24.0f) { + result /= 24; unit = "days"; + } + } + } + } + stream_ << result << " " << unit; + return stream_.str(); + } + + static inline double getTimeDifference(const timeval& endTime_, const timeval& startTime_) { + return static_cast((((endTime_.tv_sec - startTime_.tv_sec) * 1000000) + (endTime_.tv_usec - startTime_.tv_usec)) / 1000); + } +}; // class DateUtils +} // namespace utilities + +//! +//! Internal repository base to manage memory on heap. Used internally, you should not use it. +//! +template +class Registry { +public: + Registry(void) { + } + + virtual ~Registry(void) { + unregisterAll(); + } + + Registry(const Registry& other_) { + if (this != &other_) { + unregisterAll(); + for (std::size_t i = 0; i < other_.list_.size(); ++i) { + Class* curr_ = other_.list_.at(i); + if (curr_) { + list_.push_back(new Class(*curr_)); + } + } + } + } + + Registry& operator=(const Registry& other_) { + if (this == &other_) { + return *this; + } + unregisterAll(); + for (std::size_t i = 0; i < other_.list_.size(); ++i) { + Class* curr_ = other_.list_.at(i); + if (curr_) { + list_.push_back(new Class(*curr_)); + } + } + return *this; + } + + inline void registerNew(Class* c_) { + list_.push_back(c_); + } + + bool operator!=(const Registry& other_) { + if (list_.size() != other_.list_.size()) { + return true; + } + for (std::size_t i = 0; i < list_.size(); ++i) { + if (list_.at(i) != other_.list_.at(i)) { + return true; + } + } + return false; + } + + bool operator==(const Registry& other_) { + if (list_.size() != other_.list_.size()) { + return false; + } + for (std::size_t i = 0; i < list_.size(); ++i) { + if (list_.at(i) != other_.list_.at(i)) { + return false; + } + } + return true; + } + + template + Class* get(const T& t_) { + Iterator iter = std::find_if(list_.begin(), list_.end(), Predicate(t_)); + if (iter != list_.end() && *iter != NULL) { + return *iter; + } + return NULL; + } + + template + Class* get(const T& t_, const T2& t2_) { + Iterator iter = std::find_if(list_.begin(), list_.end(), Predicate(t_, t2_)); + if (iter != list_.end() && *iter != NULL) { + return *iter; + } + return NULL; + } + + template + inline bool exist(const T& t_) { + return (get(t_) != NULL); + } + + inline std::size_t count(void) const { + return list_.size(); + } + + inline bool empty(void) const { + return list_.empty(); + } + + Class* at(std::size_t i) const { + return list_.at(i); + } + +protected: + typedef typename std::vector::iterator Iterator; + + inline void unregisterAll(void) { + if (!empty()) { + std::for_each(list_.begin(), list_.end(), std::bind1st(std::mem_fun(&Registry::release), this)); + list_.clear(); + } + } + + inline void unregister(Class*& c_) { + if (c_) { + Iterator iter = list_.begin(); + for (; iter != list_.end(); ++iter) { + if (c_ == *iter) { + break; + } + } + if (iter != list_.end() && *iter != NULL) { + list_.erase(iter); + internal::utilities::safeDelete(c_); + } + } + } + + inline std::vector& list(void) { + return list_; + } +private: + std::vector list_; + + inline void release(Class* c_) { + internal::utilities::safeDelete(c_); + } +}; // class Registry + +//! +//! Scoped pointer used internally. You should not use it. +//! +template +class ScopedPointer { +public: + explicit ScopedPointer(T* ptr_ = 0) : + ptr_(ptr_), referenceCounter_(0) { + referenceCounter_ = new ReferenceCounter(); + referenceCounter_->increment(); + } + + ScopedPointer(const ScopedPointer& scopedPointer_) : + ptr_(scopedPointer_.ptr_), referenceCounter_(scopedPointer_.referenceCounter_) { + referenceCounter_->increment(); + } + + ScopedPointer& operator=(const ScopedPointer& other_) { + if (this != &other_) + { + validate(); + ptr_ = other_.ptr_; + referenceCounter_ = other_.referenceCounter_; + referenceCounter_->increment(); + } + return *this; + } + + virtual ~ScopedPointer(void) { + validate(); + } + + T& operator*(void) { + return *ptr_; + } + + T* operator->(void) { + return ptr_; + } + + T* pointer(void) { + return ptr_; + } + + class ReferenceCounter { + public: + ReferenceCounter(void) : count_(0) { + } + + ReferenceCounter& operator=(const ReferenceCounter& other_) { + if (this != &other_) { + count_ = other_.count_; + } + return *this; + } + + void increment(void) { + ++count_; + } + + int decrement(void) { + return this == NULL ? 0 : --count_; + } + + private: + int count_; + }; +private: + T* ptr_; + ReferenceCounter* referenceCounter_; + + void validate(void) { + if(referenceCounter_->decrement() == 0) { + internal::utilities::safeDelete(ptr_, false); + internal::utilities::safeDelete(referenceCounter_, false); + } + } +}; + +//! +//! Class that represents single configuration. +//! +//! Single configuration has a level (easyloggingpp::Level), type (easyloggingpp::ConfigurationType) +//! and std::string based value. This value is later parsed into more appropriate data type depending on +//! type +//! +class Configuration { +public: + //! + //! Full constructor used to set initial value of configuration + //! \param level_ + //! \param type_ + //! \param value_ + //! + Configuration(unsigned int level_, unsigned int type_, const std::string& value_) : + level_(level_), + type_(type_), + value_(value_) { + } + + //! + //! \return Level of current configuration + //! + unsigned int level(void) const { + return level_; + } + + //! + //! \return Configuration type of current configuration + //! + unsigned int type(void) const { + return type_; + } + + //! + //! \return String based configuration value + //! + std::string value(void) const { + return value_; + } + + //! + //! Set string based configuration value + //! \param value_ Value to set. Values have to be std::string; For boolean values use "true", "false", for any integral values + //! use them in quotes. They will be parsed when configuring + //! + void setValue(const std::string& value_) { + this->value_ = value_; + } + + //! + //! Predicate used to find configuration from configuration repository. This is used internally. + //! + class Predicate { + public: + Predicate(unsigned int level_, unsigned int type_) : + level_(level_), + type_(type_) { + } + + bool operator()(const Configuration* conf_) { + return ((conf_ != NULL) && (conf_->level() == level_) && (conf_->type() == type_)); + } + + private: + unsigned int level_; + unsigned int type_; + }; +private: + unsigned int level_; + unsigned int type_; + std::string value_; +}; + +} // namespace internal + +//! +//! Configuration repository that represents configuration for single logger +//! +class Configurations : public internal::Registry { +public: + //! + //! Default constructor + //! + Configurations(void) : + isFromFile_(false) { + } + + //! + //! Constructor used to set configurations via configuration file + //! \param configurationFile_ Full path to configuration file + //! \param base_ Configurations to base new configuration repository off. This value is used when you want to use + //! existing Configurations to base all the values and then set rest of configuration via configuration file. + //! + Configurations(const std::string& configurationFile_, Configurations* base_ = NULL) : + configurationFile_(configurationFile_), + isFromFile_(false) { + parseFromFile(configurationFile_, base_); + } + + //! + //! Set configurations based on other configurations + //! \param base_ Pointer to existing configurations. + //! + inline void setFromBase(Configurations* base_) { + if (base_ == NULL || base_ == this) return; + std::for_each(base_->list().begin(), base_->list().end(), std::bind1st(std::mem_fun(&Configurations::set), this)); + } + + //! + //! Checks to see whether specified configuration type exist in this repository + //! \param configurationType_ Configuration type to check against. Use easyloggingpp::ConfigurationType to prevent confusions + //! \return True if exist, false otherwise + //! + inline bool contains(unsigned int configurationType_) { + ELPP_FOR_EACH_CONFIGURATION(i, ConfigurationType::kMinValid, + if (get(i, configurationType_) != NULL) { + return true; + } + ); + return false; + } + + //! + //! Sets configuration for specified level_ and configurationType_. If configuration already exists for specified + //! level and configuration type, value just gets updated. + //! Remember, it is not recommended to set skip_ELPPALL_Check to false unless you know exactly what you are doing + //! \param level_ Level to set configuration for. Use easyloggingpp::Level to prevent confusion + //! \param configurationType_ Configuration type to set configuration against. Use easyloggingpp::ConfigurationType to prevent confusion + //! \param value_ String based configuration value + //! \param skipLEVEL_ALL Determines whether to skip 'easyloggingpp::Level::All'. This is skipped by default because setting + //! 'All' may override configuration. Be careful with this. + //! + void set(unsigned int level_, unsigned int configurationType_, const std::string& value_, bool skipLEVEL_ALL = false) { + if (value_ == "") return; // ignore empty values + if ((configurationType_ == ConfigurationType::PerformanceTracking && level_ != Level::All) || + (configurationType_ == ConfigurationType::MillisecondsWidth && level_ != Level::All)) { + // configurationType_ not applicable for this level_ + return; + } + internal::Configuration* conf_ = get(level_, configurationType_); + if (conf_ == NULL) { + registerNew(new internal::Configuration(level_, configurationType_, value_)); + } else { + // Configuration already there, just update the value! + conf_->setValue(value_); + } + if (!skipLEVEL_ALL && level_ == Level::All) { + setAll(configurationType_, value_, true); + } + } + + //! + //! Parse configuration from file. + //! \param configurationFile_ + //! \param base_Configurations to base new configuration repository off. This value is used when you want to use + //! existing Configurations to base all the values and then set rest of configuration via configuration file. + //! \return True if successfully parsed, false otherwise. You may define '_STOP_ON_FIRST_ELPP_ASSERTION' to make sure you + //! do not proceed without successful parse. + //! + bool parseFromFile(const std::string& configurationFile_, Configurations* base_ = NULL) { + setFromBase(base_); + std::ifstream fileStream_(configurationFile_.c_str(), std::ifstream::in); + __EASYLOGGINGPP_ASSERT(fileStream_.is_open(), "Unable to open configuration file [" << configurationFile_ << "] for parsing."); + bool parsedSuccessfully_ = false; + std::string line = std::string(); + unsigned int currLevel = 0; + while (fileStream_.good()) { + std::getline(fileStream_, line); + parsedSuccessfully_ = Parser::parseLine(line, currLevel, this); + __EASYLOGGINGPP_ASSERT(parsedSuccessfully_, "Unable to parse configuration line: " << line); + } + isFromFile_ = true; + return parsedSuccessfully_; + } + + //! + //! Parse configurations from configuration string. This configuration string has same syntax as configuration file contents. Make + //! sure all the necessary new line characters are provided. + //! \param configurationsString + //! \return True if successfully parsed, false otherwise. You may define '_STOP_ON_FIRST_ELPP_ASSERTION' to make sure you + //! do not proceed without successful parse. + //! + bool parseFromText(const std::string& configurationsString) { + bool parsedSuccessfully_ = false; + std::string line = std::string(); + unsigned int currLevel = 0; + std::vector lines; + internal::utilities::StringUtils::split(configurationsString, '\n', lines); + for (std::size_t i = 0; i < lines.size(); ++i) { + line = lines.at(i); + parsedSuccessfully_ = Parser::parseLine(line, currLevel, this); + __EASYLOGGINGPP_ASSERT(parsedSuccessfully_, "Unable to parse configuration line: " << line); + } + isFromFile_ = false; + return parsedSuccessfully_; + } + + //! + //! Sets configurations to default configurations set by easylogging++. + //! NOTE: This has nothing to do with Loggers::setDefaultConfigurations - thats completely different thing. This is + //! library's own default format. + //! + void setToDefault(void) { + setAll(ConfigurationType::Enabled, "true"); +#if _ELPP_OS_UNIX +# if _ELPP_NDK + setAll(ConfigurationType::Filename, "/data/local/tmp/myeasylog.txt"); +# else + setAll(ConfigurationType::Filename, "/tmp/logs/myeasylog.log"); +# endif // _ELPP_NDK +#elif _ELPP_OS_WINDOWS + setAll(ConfigurationType::Filename, "logs\\myeasylog.log"); +#endif // _ELPP_OS_UNIX + setAll(ConfigurationType::ToFile, "true"); + setAll(ConfigurationType::ToStandardOutput, "true"); + setAll(ConfigurationType::MillisecondsWidth, "3"); + setAll(ConfigurationType::PerformanceTracking, "false"); + setAll(easyloggingpp::ConfigurationType::RollOutSize, "0"); + setAll(ConfigurationType::Format, "%datetime %level [%logger] %log"); + set(Level::Debug, ConfigurationType::Format, "%datetime %level [%logger] [%user@%host] [%func] [%loc] %log"); + // INFO and WARNING are set to default by Level::ALL + set(Level::Error, ConfigurationType::Format, "%datetime %level [%logger] %log"); + set(Level::Fatal, ConfigurationType::Format, "%datetime %level [%logger] %log"); + set(Level::Verbose, ConfigurationType::Format, "%datetime %level-%vlevel [%logger] %log"); + set(Level::QA, ConfigurationType::Format, "%datetime %level [%logger] %log"); + set(Level::Trace, ConfigurationType::Format, "%datetime %level [%logger] [%func] [%loc] %log"); + } + + //! + //! Sets configuration for all levels. + //! Remember, it is not recommended to set skip_ELPPALL_Check to false unless you know exactly what you are doing + //! \param configurationType_ + //! \param value_ + //! \param skipLEVEL_ALL Determines whether to skip 'easyloggingpp::Level::All'. This is skipped by default because setting + //! 'All' may override configuration. Be careful with this. + //! + inline void setAll(unsigned int configurationType_, const std::string& value_, bool skipLEVEL_ALL = false) { + if (!skipLEVEL_ALL) { + set(Level::All, configurationType_, value_); + } + ELPP_FOR_EACH_LEVEL(i, Level::Debug, + set(i, configurationType_, value_); + ); + } + + //! + //! Clears the repository. + //! All the configurations are maintained on heap for faster access so if you are sure you will not use this + //! repository and you have configured all the loggers against this or you have used this configuration for all the + //! purposes you need it for, you may retain memory by using this method. If you do not do this, internal memory management + //! does it itself at the end of application execution. + //! + inline void clear(void) { + unregisterAll(); + } + + //! + //! \return Returns configuration file used in parsing this configurations. If this repository was set manually or by text + //! this returns empty string. + //! + std::string configurationFile(void) const { + return configurationFile_; + } + + //! + //! Parser used internally to parse configurations from file or text. You should not need this unless you are working on + //! some tool for EasyLogging++ + //! + class Parser : private internal::StaticClass { + public: + static void ignoreComments(std::string& line) { + std::size_t foundAt = 0; + std::size_t quotesStart = line.find("\""); + std::size_t quotesEnd = std::string::npos; + if (quotesStart != std::string::npos) { + quotesEnd = line.find("\"", quotesStart + 1); + } + if ((foundAt = line.find("//")) != std::string::npos) { + if (foundAt < quotesEnd) { + foundAt = line.find("//", quotesEnd + 1); + } + line = line.substr(0, foundAt); + } + } + + static inline bool isLevel(const std::string& line) { + return internal::utilities::StringUtils::startsWith(line, "*"); + } + + static inline bool isConfig(const std::string& line) { + std::size_t assignment = line.find('='); + return line != "" && + (line[0] >= 65 || line[0] <= 90 || line[0] >= 97 || line[0] <= 122) && + (assignment != std::string::npos) && + (line.size() > assignment); + } + + static inline bool isComment(const std::string& line) { + return internal::utilities::StringUtils::startsWith(line, "//"); + } + + static bool parseLine(std::string& line, unsigned int& currLevel, Configurations* conf) { + std::string currLevelStr = std::string(); + unsigned int currConfig = 0; + std::string currConfigStr = std::string(); + std::string currValue = std::string(); + line = internal::utilities::StringUtils::trim(line); + if (isComment(line)) return true; + ignoreComments(line); + if (line == "") { + // Comment ignored + return true; + } + if (isLevel(line)) { + currLevelStr = internal::utilities::StringUtils::stripAllWhiteSpaces(line); + if (currLevelStr.size() <= 2) { + return true; + } + currLevelStr = currLevelStr.substr(1, currLevelStr.size() - 2); + internal::utilities::StringUtils::tolower(currLevelStr); + currLevel = Level::convertFromString(currLevelStr); + return true; + } + if (isConfig(line)) { + std::size_t assignment = line.find('='); + currConfigStr = line.substr(0, assignment); + currConfigStr = internal::utilities::StringUtils::stripAllWhiteSpaces(currConfigStr); + internal::utilities::StringUtils::tolower(currConfigStr); + currConfig = ConfigurationType::convertFromString(currConfigStr); + currValue = line.substr(assignment + 1); + currValue = internal::utilities::StringUtils::trim(currValue); + std::size_t quotesStart = currValue.find("\"", 0); + std::size_t quotesEnd = std::string::npos; + if (quotesStart != std::string::npos) { + quotesEnd = currValue.find("\"", quotesStart + 1); + } + if (quotesStart != std::string::npos && quotesEnd != std::string::npos) { + // Quote provided - check and strip if valid + __EASYLOGGINGPP_ASSERT((quotesStart < quotesEnd), "Configuration error - No ending quote found in [" << currConfigStr << "]"); + __EASYLOGGINGPP_ASSERT((quotesStart + 1 != quotesEnd), "Empty configuration value for [" << currConfigStr << "]"); + if ((quotesStart != quotesEnd) && (quotesStart + 1 != quotesEnd)) { + // Explicit check in case if assertion is disabled + currValue = currValue.substr(quotesStart + 1, quotesEnd - 1); + } + } + } + __EASYLOGGINGPP_ASSERT(currLevel != Level::Unknown, "Unrecognized severity level [" << currLevelStr << "]"); + __EASYLOGGINGPP_ASSERT(currConfig != ConfigurationType::Unknown, "Unrecognized configuration [" << currConfigStr << "]"); + if (currLevel == Level::Unknown || currConfig == ConfigurationType::Unknown) { + return false; // unrecognizable level or config + } + conf->set(currLevel, currConfig, currValue); + return true; + } + }; // class Parser +private: + std::string configurationFile_; + bool isFromFile_; + internal::threading::Mutex mutex_; + + inline void set(internal::Configuration* conf_) { + if (conf_ == NULL) return; + set(conf_->level(), conf_->type(), conf_->value()); + } +}; // class Configurations + +class Loggers; // fwd declaration + +namespace internal { + +class RegisteredLoggers; // fwd declaration +class Writer; // fwd declaration + +//! +//! Configuration map used internally for faster access of configuration while executing. +//! +template +class ConfigurationMap { +public: + typedef typename std::pair Entry; + + ConfigurationMap(void) { + table = new Entry*[Level::kMaxValid + 1]; + for (unsigned int i = 0; i < (Level::kMaxValid + 1); ++i) { + table[i] = NULL; + } + count = 0; + } + + const T& get(unsigned int level_, bool forceGetLevel = false) { + if (forceGetLevel || table[level_] != NULL) { + if (table[level_] == NULL) { + return default_; + } + return table[level_]->second; + } else if (table[Level::All] != NULL) { + return table[Level::All]->second; + } + return default_; + } + + void set(unsigned int level_, const T& value) { + // Unset any existing value for this level + unset(level_); + table[level_] = new Entry(level_, value); + ++count; + } + + void unset(unsigned int level_) { + if (table[level_] != NULL) { + internal::utilities::safeDelete(table[level_]); + if (count > 0) + --count; + } + } + + inline bool exist(unsigned int level_) const { + return table[level_] != NULL; + } + + inline bool exist(unsigned int level_, const T& value) { + return get(level_, true) == value; + } + + void clear(void) { + for (unsigned int i = 0; i < (Level::kMaxValid + 1); ++i) { + internal::utilities::safeDelete(table[i]); + } + delete[] table; + count = 0; + } + + virtual ~ConfigurationMap(void) { + clear(); + } + + inline void setDefault(const T& default_) { + this->default_ = default_; + } + + inline std::size_t size(void) const { + return count; + } +private: + Entry** table; + std::size_t count; + T default_; +}; + +//! +//! Configurations used internally that defines data type of each configuration from easyloggingpp::ConfigurationType +//! +class TypedConfigurations { +public: + TypedConfigurations(const Configurations& configurations, internal::Constants* constants_) { + this->constants_ = constants_; + this->configurations_ = configurations; + enabledMap_.setDefault(false); + toFileMap_.setDefault(false); + toStandardOutputMap_.setDefault(false); + filenameMap_.setDefault(""); + logFormatMap_.setDefault(""); + dateFormatMap_.setDefault(""); + dateFormatSpecifierMap_.setDefault(""); + millisecondsWidthMap_.setDefault(3); + performanceTrackingMap_.setDefault(false); + fileStreamMap_.setDefault(NULL); + formatFlagMap_.setDefault(0x0); + rollOutSizeMap_.setDefault(0); + parse(configurations); + } + + virtual ~TypedConfigurations(void) { + deleteFileStreams(); + } + + const Configurations& configurations(void) const { + return configurations_; + } +private: + internal::ConfigurationMap enabledMap_; + internal::ConfigurationMap toFileMap_; + internal::ConfigurationMap filenameMap_; + internal::ConfigurationMap toStandardOutputMap_; + internal::ConfigurationMap logFormatMap_; + internal::ConfigurationMap dateFormatMap_; + internal::ConfigurationMap dateFormatSpecifierMap_; + internal::ConfigurationMap millisecondsWidthMap_; + internal::ConfigurationMap performanceTrackingMap_; + internal::ConfigurationMap fileStreamMap_; + internal::ConfigurationMap formatFlagMap_; + internal::ConfigurationMap rollOutSizeMap_; + internal::Constants* constants_; + Configurations configurations_; + + friend class Writer; + friend class easyloggingpp::Loggers; + + inline bool enabled(unsigned int level_) { + return enabledMap_.get(level_); + } + + inline bool toFile(unsigned int level_) { + return toFileMap_.get(level_); + } + + inline const std::string& filename(unsigned int level_) { + return filenameMap_.get(level_); + } + + inline bool toStandardOutput(unsigned int level_) { + return toStandardOutputMap_.get(level_); + } + + inline const std::string& logFormat(unsigned int level_) { + return logFormatMap_.get(level_); + } + + inline const std::string& dateFormat(unsigned int level_) { + return dateFormatMap_.get(level_); + } + + inline const std::string& dateFormatSpecifier(unsigned int level_) { + return dateFormatSpecifierMap_.get(level_); + } + + inline int millisecondsWidth(unsigned int level_ = Level::All) { + return millisecondsWidthMap_.get(level_); + } + + inline bool performanceTracking(unsigned int level_ = Level::All) { + return performanceTrackingMap_.get(level_); + } + + inline std::fstream* fileStream(unsigned int level_) { + return fileStreamMap_.get(level_); + } + + inline std::size_t rollOutSize(unsigned int level_) { + return rollOutSizeMap_.get(level_); + } + + inline int formatFlag(unsigned int level_) { + return formatFlagMap_.get(level_); + } + + void parse(const Configurations& configurations_) { + for (std::size_t i = 0; i < configurations_.count(); ++i) { + Configuration* conf = configurations_.at(i); + switch (conf->type()) { + case ConfigurationType::Enabled: + setValue(conf->level(), getBool(conf->value()), enabledMap_); + break; + case ConfigurationType::ToFile: + setValue(conf->level(), getBool(conf->value()), toFileMap_); + break; + case ConfigurationType::Filename: + insertFilename(conf->level(), conf->value()); + break; + case ConfigurationType::ToStandardOutput: + setValue(conf->level(), getBool(conf->value()), toStandardOutputMap_); + break; + case ConfigurationType::Format: + determineFormats(conf->level(), conf->value()); + break; + case ConfigurationType::MillisecondsWidth: + if (conf->level() == Level::All) { + int origVal = getInt(conf->value()); + int msl_; +#if _ELPP_OS_UNIX + switch (origVal) { + case 3: + msl_ = 1000; + break; + case 4: + msl_ = 100; + break; + case 5: + msl_ = 10; + break; + case 6: + msl_ = 1; + break; + default: + msl_ = constants_->DEFAULT_MILLISECOND_OFFSET; + } +#elif _ELPP_OS_WINDOWS + msl_ = 1000; + __EASYLOGGINGPP_SUPPRESS_UNSED(origVal); +#endif // _ELPP_OS_UNIX + setValue(conf->level(), msl_, millisecondsWidthMap_); + } + break; + case ConfigurationType::PerformanceTracking: + if (conf->level() == Level::All) { + setValue(conf->level(), getBool(conf->value()), performanceTrackingMap_); + } + break; + case ConfigurationType::RollOutSize: + setValue(conf->level(), static_cast(getULong(conf->value())), rollOutSizeMap_); + unsigned int validLevel_ = 0; + std::string rolloutFilename_ = std::string(); + checkRollOuts(conf->level(), validLevel_, rolloutFilename_); + break; + } + } + } + + void determineFormats(unsigned int level_, const std::string& originalFormat) { + unsigned int formatSpec = 0x0; + if (originalFormat.find(constants_->APP_NAME_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kAppName; + } + if (originalFormat.find(constants_->LOGGER_ID_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kLoggerId; + } + if (originalFormat.find(constants_->THREAD_ID_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kThreadId; + } + if (originalFormat.find(constants_->LOCATION_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kLocation; + } + if (originalFormat.find(constants_->FUNCTION_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kFunction; + } + if (originalFormat.find(constants_->USER_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kUser; + } + if (originalFormat.find(constants_->HOST_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kHost; + } + if (originalFormat.find(constants_->LOG_MESSAGE_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kLogMessage; + } + if (originalFormat.find(constants_->VERBOSE_LEVEL_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kVerboseLevel; + } + if (originalFormat.find(constants_->DATE_TIME_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kDateTime; + setValue(level_, constants_->DATE_TIME_FORMAT_SPECIFIER, dateFormatSpecifierMap_); + } else if (originalFormat.find(constants_->DATE_ONLY_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kDateOnly; + setValue(level_, constants_->DATE_ONLY_FORMAT_SPECIFIER, dateFormatSpecifierMap_); + } else if (originalFormat.find(constants_->TIME_ONLY_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kTimeOnly; + setValue(level_, constants_->TIME_ONLY_FORMAT_SPECIFIER, dateFormatSpecifierMap_); + } +#if _ELPP_OS_UNIX + const std::string kTimeFormatLocal_ = "%H:%M:%S"; + const std::string kDateFormatLocal_ = "%d/%m/%Y"; + std::string dateFormat; + + if (formatSpec & constants_->kDateOnly) { + dateFormat = kDateFormatLocal_; + } else if (formatSpec & constants_->kTimeOnly) { + dateFormat = kTimeFormatLocal_; + } else { + std::stringstream ss; + ss << kDateFormatLocal_ << " " << kTimeFormatLocal_; + dateFormat = ss.str(); + } + setValue(level_, dateFormat, dateFormatMap_); +#endif // _ELPP_OS_UNIX + setValue(level_, formatSpec, formatFlagMap_); + // Update %level + std::string origFormatCopy = originalFormat; + switch (level_) { + case Level::Debug: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_DEBUG_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Info: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_INFO_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Warning: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_WARNING_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Error: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_ERROR_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Fatal: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_FATAL_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Verbose: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_VERBOSE_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::QA: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_QA_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Trace: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_TRACE_LEVEL_VALUE, origFormatCopy, constants_); + break; + } + setValue(level_, origFormatCopy + "\n", logFormatMap_); + } + + void deleteFileStreams(void) { + ELPP_FOR_EACH_LEVEL(i, Level::kMinValid, + removeFile(i); + ); + } + + // This is different since we need unique values + void insertFilename(unsigned int level_, const std::string& fname_, bool forceNew = false) { + std::string fnameFull = fname_; + if (internal::utilities::StringUtils::endsWith(fnameFull, constants_->PATH_SLASH)) { + fnameFull.append(constants_->DEFAULT_LOG_FILENAME); + } + std::string path_ = internal::utilities::OSUtils::getPathFromFilename(fnameFull, constants_); + if (path_.size() < fnameFull.size()) { + // Contains path - create it if it does not already exist + internal::utilities::OSUtils::createPath(path_); + } + if (filenameMap_.size() == 0) { + filenameMap_.set(Level::All, fnameFull); + std::fstream *fsAll = newFileStream(fnameFull, forceNew); + if (fsAll != NULL) { + fileStreamMap_.set(Level::All, fsAll); + } + return; + } + ELPP_FOR_EACH_LEVEL(i, Level::kMinValid, + if (filenameMap_.exist(i, fnameFull)) { + return; + } + ); + filenameMap_.set(level_, fnameFull); + // Just before we proceed and create new file stream we check for existing one on same level, + // if we have existing one, we first delete it to prevent memory leak. + std::fstream *fs = fileStreamMap_.get(level_, true); + internal::utilities::safeDelete(fs); + fileStreamMap_.unset(level_); + fs = newFileStream(fnameFull, forceNew); + if (fs != NULL) { + fileStreamMap_.set(level_, fs); + } + } + + template + void setValue(unsigned int level_, const T& value_, internal::ConfigurationMap& map_, bool skipLEVEL_ALL = false) { + if (map_.size() == 0 && !skipLEVEL_ALL) { + map_.set(Level::All, value_); + return; + } + if (map_.exist(static_cast(Level::All), value_)) { + return; + } + map_.set(level_, value_); + } + + std::fstream* newFileStream(const std::string& filename, bool forceNew = false) { + std::fstream *fs = NULL; + if (forceNew) { + fs = new std::fstream(filename.c_str(), std::fstream::out); + } else { + fs = new std::fstream(filename.c_str(), std::fstream::out | std::fstream::app); + } + if (fs->is_open()) { + fs->flush(); + } else { + internal::utilities::safeDelete(fs, false); + std::cerr << "Bad file [" << filename << "]" << std::endl; + return NULL; + } + return fs; + } + + void removeFile(unsigned int level_) { + std::fstream* fs = fileStream(level_); + if (!fs) { + return; + } + if (fs->is_open()) { + fs->close(); + } + internal::utilities::safeDelete(fs, false); + fileStreamMap_.unset(level_); + filenameMap_.unset(level_); + } + + unsigned long getULong(const std::string& confValue_) { + bool valid = true; + std::string trimmedVal = internal::utilities::StringUtils::trim(confValue_); + if (trimmedVal.size() == 0) { + valid = false; + __EASYLOGGINGPP_SUPPRESS_UNSED(valid); + __EASYLOGGINGPP_ASSERT(valid, "Configuration value not a valid integer " << trimmedVal); + } + for (std::size_t i = 0; i < trimmedVal.size(); ++i) { + if (trimmedVal[i] < 48 || trimmedVal[i] > 57) { + valid = false; + break; + } + } + __EASYLOGGINGPP_SUPPRESS_UNSED(valid); + __EASYLOGGINGPP_ASSERT(valid, "Configuration value not a valid integer " << trimmedVal); + return atol(confValue_.c_str()); + } + + inline int getInt(const std::string& confValue_) { + return static_cast(getULong(confValue_)); + } + + inline bool getBool(const std::string& confValue_) { + std::string trimmedVal = internal::utilities::StringUtils::trim(confValue_); + return (trimmedVal == "1" || trimmedVal == "true" || trimmedVal == "TRUE"); + } + + std::size_t getSizeOfFile(std::fstream *fs) { + if (!fs) { + return 0; + } + std::streampos currPos = fs->tellg(); + fs->seekg (0, fs->end); + std::size_t size = static_cast(fs->tellg()); + fs->seekg (currPos); + return size; + } + + bool checkRollOuts(unsigned int level_, unsigned int& validLevel_, std::string& fname_) { + std::fstream* fs = fileStream(level_); + std::size_t rollOutSize_ = rollOutSize(level_); + if (rollOutSize_ != 0 && getSizeOfFile(fs) >= rollOutSize_) { + fname_ = filename(level_); +#if defined(_ELPP_INTERNAL_INFO) + std::cout << "Cleaning log file [" << fname_ << "]\n"; +#endif // defined(_ELPP_INTERNAL_INFO) + // Find and reset correct level. By correct level we mean the current + // available level in fileStream because this level_ could actually be using + // configurations from Level::All and you do not want to create a brand new + // stream just because we are rolling log away + validLevel_ = findValidLevel(fileStreamMap_, level_); + forceReinitiateFile(validLevel_, fname_); + return true; + } + return false; + } + + template + inline unsigned int findValidLevel(internal::ConfigurationMap& map_, unsigned int refLevel_) { + return map_.exist(refLevel_) ? refLevel_ : static_cast(Level::All); + } + + inline void forceReinitiateFile(unsigned int level_, const std::string& filename_) { + removeFile(level_); + insertFilename(level_, filename_, true); + } +}; +} // namespace internal + +//! +//! Represents single logger used to write log. +//! +class Logger { +public: + //! + //! Minimal constructor to set logger ID and constants. You should not use this constructor manually, instead use + //! easyloggingpp::Loggers::getLogger + //! \param uniqueIdentifier_ Logger ID that you will require to get logger from logger repository + //! \param constants_ Use easyloggingpp::internal::registeredLoggers->constants() + //! + Logger(const std::string& uniqueIdentifier_, internal::Constants* constants_) : + id_(uniqueIdentifier_), + constants_(constants_), + typedConfigurations_(NULL), + stream_(new std::stringstream()) { + Configurations defaultConfs; + defaultConfs.setToDefault(); + configure(defaultConfs); + userConfigurations_ = defaultConfs; + defaultConfs.clear(); + } + + //! + //! Full constructor to set logger ID, constants and configuration. + //! \param uniqueIdentifier_ Logger ID that you will require to get logger from logger repository + //! \param constants_ Use easyloggingpp::internal::registeredLoggers->constants() + //! \param configurations Configurations to set logger against + //! + Logger(const std::string& uniqueIdentifier_, internal::Constants* constants_, const Configurations& configurations) : + id_(uniqueIdentifier_), + constants_(constants_), + typedConfigurations_(NULL), + stream_(new std::stringstream()) { + configure(configurations); + } + + virtual ~Logger(void) { + internal::utilities::safeDelete(typedConfigurations_); + internal::utilities::safeDelete(stream_); + } + + //! + //! \return Logger ID + //! + inline std::string id(void) const { + return id_; + } + + //! + //! Configures logger against specified configurations + //! \param configurations_ + //! + void configure(const Configurations& configurations_) { +#if _ELPP_ENABLE_MUTEX + internal::threading::ScopedLock slock_(mutex_); + __EASYLOGGINGPP_SUPPRESS_UNSED(slock_); +#endif // _ELPP_ENABLE_MUTEX + // Configuring uses existing configuration as starting point + // and then sets configurations_ as base to prevent losing any + // previous configurations + Configurations base_ = userConfigurations_; + if (userConfigurations_ != configurations_) { + userConfigurations_ = configurations_; + base_.setFromBase(const_cast(&configurations_)); + } + internal::utilities::safeDelete(typedConfigurations_); + typedConfigurations_ = new internal::TypedConfigurations(base_, constants_); + configured_ = true; + } + + //! + //! Reconfigures logger + //! + inline void reconfigure(void) { + configure(this->userConfigurations_); + } + + //! + //! \return Application name for this logger + //! + inline std::string applicationName(void) const { + return applicationName_; + } + + + //! + //! Application name can vary from logger to logger. For example for a library application name may be different. + //! This is whats used later when you use '%app' in log format + //! + inline void setApplicationName(const std::string& applicationName_) { + this->applicationName_ = applicationName_; + } + + //! + //! \return Configurations that this logger is set against + //! + inline Configurations& configurations(void) { + return userConfigurations_; + } + + //! + //! \return Whether or not logger is configured. + //! + inline bool configured(void) const { + return configured_; + } + + //! + //! Predicate used in logger repository to find logger. This is used internally. You should not use it. + //! + class Predicate { + public: + explicit Predicate(const std::string& id_) : + id_(id_) { + } + inline bool operator()(const Logger* logger_) { + return ((logger_ != NULL) && (logger_->id() == id_)); + } + private: + std::string id_; + }; +private: + std::string id_; + internal::Constants* constants_; + Configurations userConfigurations_; + internal::TypedConfigurations* typedConfigurations_; + std::stringstream* stream_; + std::string applicationName_; + bool configured_; + internal::threading::Mutex mutex_; + friend class internal::Writer; + friend class Loggers; + friend class internal::RegisteredLoggers; + + Logger(void); + + std::stringstream* stream(void) { + return stream_; + } + + inline void acquireLock(void) { + mutex_.lock(); + } + + inline void releaseLock(void) { + mutex_.unlock(); + } +}; + +namespace internal { +//! +//! Internal log counter used for interval logging +//! +class LogCounter : private internal::NoCopy { +public: + explicit LogCounter(internal::Constants* constants_) : + file_(""), + line_(0), + position_(1), + constants_(constants_) { + } + + LogCounter(const char* file_, + unsigned long int line_, + internal::Constants* constants_) : + file_(file_), + line_(line_), + position_(1), + constants_(constants_) { + } + + virtual ~LogCounter(void) { + } + + inline void resetLocation(const char* file_, + unsigned long int line_) { + this->file_ = file_; + this->line_ = line_; + } + + inline void reset(std::size_t n_) { + if (position_ >= constants_->MAX_LOG_PER_COUNTER) { + position_ = (n_ >= 1 ? constants_->MAX_LOG_PER_COUNTER % n_ : 0); + } + ++position_; + } + + inline const char* file(void) const { + return file_; + } + + inline unsigned long int line(void) const { + return line_; + } + + inline std::size_t position(void) const { + return position_; + } + + class Predicate { + public: + Predicate(const char* file_, unsigned long int line_) + : file_(file_), + line_(line_) { + } + inline bool operator()(const LogCounter* counter_) { + return ((counter_ != NULL) && + (counter_->file_ == file_) && + (counter_->line_ == line_)); + } + + private: + const char* file_; + unsigned long int line_; + }; +private: + const char* file_; + unsigned long int line_; + std::size_t position_; + internal::Constants* constants_; +}; // class LogCounter + +//! +//! Internal LogCounter repository +//! +class RegisteredCounters : public Registry { +public: + bool validate(const char* file_, unsigned long int line_, std::size_t n_, internal::Constants* constants_) { +#if _ELPP_ENABLE_MUTEX + internal::threading::ScopedLock slock_(mutex_); + __EASYLOGGINGPP_SUPPRESS_UNSED(slock_); +#endif // _ELPP_ENABLE_MUTEX + bool result_ = false; + internal::LogCounter* counter_ = get(file_, line_); + if (counter_ == NULL) { + registerNew(counter_ = new internal::LogCounter(file_, line_, constants_)); + } + if (n_ >= 1 && counter_->position() != 0 && counter_->position() % n_ == 0) { + result_ = true; + } + counter_->reset(n_); + return result_; + } +private: + internal::threading::Mutex mutex_; +}; // class RegisteredCounters + +//! +//! Internal logger repository. You should not access functionalities directly, you should use easyloggingpp::Loggers instead +//! +class RegisteredLoggers : public internal::Registry { +public: + RegisteredLoggers(void) : + constants_(new internal::Constants()), + username_(internal::utilities::OSUtils::currentUser()), + hostname_(internal::utilities::OSUtils::currentHost()), + counters_(new internal::RegisteredCounters()) { + defaultConfigurations_.setToDefault(); + Configurations conf; + conf.setToDefault(); + conf.parseFromText(constants_->DEFAULT_LOGGER_CONFIGURATION); + registerNew(new Logger("trivial", constants_, conf)); + registerNew(new Logger("business", constants_)); + registerNew(new Logger("security", constants_)); + Configurations confPerformance; + confPerformance.setToDefault(); + confPerformance.setAll(ConfigurationType::PerformanceTracking, "true"); + registerNew(new Logger("performance", constants_, confPerformance)); + } + + virtual ~RegisteredLoggers(void) { + internal::utilities::safeDelete(constants_); + internal::utilities::safeDelete(counters_); + } + + inline internal::Constants* constants(void) const { + return constants_; + } + + inline RegisteredCounters* counters(void) { + return counters_; + } + + inline bool validateCounter(const char* file_, unsigned long int line_, std::size_t n_) { + return counters_->validate(file_, line_, n_, constants_); + } +private: + internal::Constants* constants_; + std::string username_; + std::string hostname_; + internal::threading::Mutex mutex_; + internal::RegisteredCounters* counters_; + Configurations defaultConfigurations_; + + friend class Writer; + friend class easyloggingpp::Loggers; + + inline const std::string& username(void) { + return username_; + } + + inline const std::string& hostname(void) { + return hostname_; + } + + inline void setDefaultConfigurations(const Configurations& configurations) { + defaultConfigurations_.setFromBase(const_cast(&configurations)); + } + + Logger* get(const std::string& id_, bool forceCreation_ = true) { +#if _ELPP_ENABLE_MUTEX + internal::threading::ScopedLock slock_(mutex_); + __EASYLOGGINGPP_SUPPRESS_UNSED(slock_); +#endif // _ELPP_ENABLE_MUTEX + Logger* logger_ = internal::Registry::get(id_); + if (logger_ == NULL && forceCreation_) { + logger_ = new Logger(id_, constants_, defaultConfigurations_); + registerNew(logger_); + } + return logger_; + } + + inline void unregister(Logger*& logger_) { +#if _ELPP_ENABLE_MUTEX + internal::threading::ScopedLock slock_(mutex_); +#endif // _ELPP_ENABLE_MUTEX + internal::Registry::unregister(logger_); + } + + inline void acquireLock(void) { + mutex_.lock(); + } + + inline void releaseLock(void) { + mutex_.unlock(); + } + + void setApplicationArguments(int argc, char** argv) { + while (argc-- > 0) { + // Look for --v=X argument + if ((strlen(argv[argc]) >= 5) && (argv[argc][0] == '-') && (argv[argc][1] == '-') && + (argv[argc][2] == 'v') && (argv[argc][3] == '=') && (isdigit(argv[argc][4]))) { + // Current argument is --v=X + // where X is a digit between 0-9 + constants_->CURRENT_VERBOSE_LEVEL = atoi(argv[argc] + 4); + } + // Look for -v argument + else if ((strlen(argv[argc]) == 2) && (argv[argc][0] == '-') && (argv[argc][1] == 'v')) { + constants_->CURRENT_VERBOSE_LEVEL = constants_->MAX_VERBOSE_LEVEL; + } + // Look for --verbose argument + else if ((strlen(argv[argc]) == 9) && (argv[argc][0] == '-') && (argv[argc][1] == '-') && + (argv[argc][2] == 'v') && (argv[argc][3] == 'e') && (argv[argc][4] == 'r') && + (argv[argc][5] == 'b') && (argv[argc][6] == 'o') && (argv[argc][7] == 's') && + (argv[argc][8] == 'e')) { + constants_->CURRENT_VERBOSE_LEVEL = constants_->MAX_VERBOSE_LEVEL; + } + } + } + + inline void setApplicationArguments(int argc, const char** argv) { + setApplicationArguments(argc, const_cast(argv)); + } +}; + +extern internal::ScopedPointer registeredLoggers; +#if defined(_ELPP_STL_LOGGING) +namespace workarounds { +// There is workaround needed to loop through some stl containers. In order to do that, we need iterable containers +// of same type and provide iterator interface and pass it on to writeIterator(). +// Remember, this is passed by value in constructor so that we dont change original containers. +// This operation is as expensive as O(class_.size()) or O(constants->MAX_LOG_PER_COUNTER) which ever is smaller. + +// +// Abstract IterableContainer template that provides interface for iterable classes of type T +// +template +class IterableContainer { +public: + typedef typename Container::iterator iterator; + typedef typename Container::const_iterator const_iterator; + IterableContainer(void){} + virtual ~IterableContainer(void) {} + iterator begin(void) { return getContainer().begin(); } + iterator end(void) { return getContainer().end(); } + const_iterator begin(void) const { return getContainer().begin(); } + const_iterator end(void) const { return getContainer().end(); } +private: + virtual Container& getContainer(void) = 0; +}; + +// +// Implements IterableContainer and provides iterable std::priority_queue class +// +template, typename Comparator = std::less > +class IterablePriorityQueue : public IterableContainer, public std::priority_queue { +public: + IterablePriorityQueue(std::priority_queue queue_) { + std::size_t count_ = 0; + while (++count_ < registeredLoggers->constants()->MAX_LOG_PER_CONTAINER && !queue_.empty()) { + this->push(queue_.top()); + queue_.pop(); + } + } +private: + inline Container& getContainer(void) { + return this->c; + } +}; + +// +// Implements IterableContainer and provides iterable std::queue class +// +template > +class IterableQueue : public IterableContainer, public std::queue { +public: + IterableQueue(std::queue queue_) { + std::size_t count_ = 0; + while (++count_ < registeredLoggers->constants()->MAX_LOG_PER_CONTAINER && !queue_.empty()) { + this->push(queue_.front()); + queue_.pop(); + } + } +private: + inline Container& getContainer(void) { + return this->c; + } +}; + +// +// Implements IterableContainer and provides iterable std::stack class +// +template > +class IterableStack : public IterableContainer, public std::stack { +public: + IterableStack(std::stack stack_) { + std::size_t count_ = 0; + while (++count_ < registeredLoggers->constants()->MAX_LOG_PER_CONTAINER && !stack_.empty()) { + this->push(stack_.top()); + stack_.pop(); + } + } +private: + inline Container& getContainer(void) { + return this->c; + } +}; +} // namespace workarounds +#endif //defined(_ELPP_STL_LOGGING) + +#define _ELPP_STREAM(l) (*(l->stream())) + +class NullWriter : private internal::NoCopy { +public: + NullWriter(void) {} + + template + inline NullWriter& operator<<(const T&) { + return *this; + } +}; + +class Writer : private internal::NoCopy { +public: + Writer(const std::string& loggerId_, + unsigned int aspect_, + unsigned int severity_, + const char* func_, + const char* file_, + const unsigned long int line_, + bool condition_ = true, + int verboseLevel_ = 0, + int counter_ = 0) : + aspect_(aspect_), + severity_(severity_), + func_(func_), + file_(file_), + line_(line_), + condition_(condition_), + verboseLevel_(verboseLevel_), + counter_(counter_), + proceed_(true) { + constants_ = registeredLoggers->constants(); + logger_ = registeredLoggers->get(loggerId_, false); + if (logger_ == NULL) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Logger [" << loggerId_ << "] not registered or configured yet!"); + proceed_ = false; + } +#if _ELPP_ENABLE_MUTEX + registeredLoggers->acquireLock(); + mutex_.lock(); +#endif // _ELPP_ENABLE_MUTEX + + if (proceed_) { + proceed_ = logger_->typedConfigurations_->enabled(severity_); + } + if (proceed_) { +#if (defined(_ELPP_STRICT_ROLLOUT)) + checkRollOuts(severity_, logger_); +#endif // (defined(_ELPP_STRICT_ROLLOUT)) + } + if (proceed_ && (severity_ == Level::Verbose)) { + proceed_ = (verboseLevel_ <= constants_->CURRENT_VERBOSE_LEVEL); + } + if (proceed_ && (aspect_ == Aspect::Conditional)) { + proceed_ = condition_; + } + } + + virtual ~Writer(void) { + if (proceed_) { + buildAndWriteLine(); + } +#if _ELPP_ENABLE_MUTEX + registeredLoggers->releaseLock(); + mutex_.unlock(); +#endif // _ELPP_ENABLE_MUTEX + } + + inline Writer& operator<<(const std::string& log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(char log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(bool log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(signed short log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(unsigned short log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(signed int log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(unsigned int log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(signed long log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(unsigned long log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(float log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(double log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(char* log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(const char* log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(const void* log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(long double log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(const std::wstring& log_) { + if (!proceed_) { return *this; } + return operator<<(log_.c_str()); + } + inline Writer& operator<<(const wchar_t* log_) { + if (!proceed_) { return *this; } + if (log_ == NULL) { + _ELPP_STREAM(logger_) << constants_->NULL_POINTER; + return *this; + } + std::size_t len_ = wcslen(log_) + 1; + char* buff_ = (char*)malloc(len_ + 1); +# if _ELPP_OS_UNIX || (_ELPP_OS_WINDOWS && !_ELPP_CRT_DBG_WARNINGS) + std::wcstombs(buff_, log_, len_); +# elif _ELPP_OS_WINDOWS + std::size_t convCount_ = 0; + mbstate_t mbState_; + ::memset((void*)&mbState_, 0, sizeof(mbState_)); + wcsrtombs_s(&convCount_, buff_, len_, &log_, len_, &mbState_); +# endif // _ELPP_OS_UNIX + _ELPP_STREAM(logger_) << buff_; + free(buff_); + return *this; + } +#if defined(_ELPP_STL_LOGGING) + template + inline Writer& operator<<(const std::vector& vec_) { + if (!proceed_) { return *this; } + return writeIterator(vec_.begin(), vec_.end(), vec_.size()); + } + template + inline Writer& operator<<(const std::list& list_) { + if (!proceed_) { return *this; } + return writeIterator(list_.begin(), list_.end(), list_.size()); + } + template + inline Writer& operator<<(const std::deque& deque_) { + if (!proceed_) { return *this; } + return writeIterator(deque_.begin(), deque_.end(), deque_.size()); + } + template + inline Writer& operator<<(const std::queue& queue_) { + if (!proceed_) { return *this; } + internal::workarounds::IterableQueue iterableQueue_ = + static_cast >(queue_); + return writeIterator(iterableQueue_.begin(), iterableQueue_.end(), iterableQueue_.size()); + } + template + inline Writer& operator<<(const std::stack& stack_) { + if (!proceed_) { return *this; } + internal::workarounds::IterableStack iterableStack_ = + static_cast >(stack_); + return writeIterator(iterableStack_.begin(), iterableStack_.end(), iterableStack_.size()); + } + template + inline Writer& operator<<(const std::priority_queue& priorityQueue_) { + if (!proceed_) { return *this; } + internal::workarounds::IterablePriorityQueue iterablePriorityQueue_ = + static_cast >(priorityQueue_); + return writeIterator(iterablePriorityQueue_.begin(), iterablePriorityQueue_.end(), iterablePriorityQueue_.size()); + } + template + inline Writer& operator<<(const std::set& set_) { + if (!proceed_) { return *this; } + return writeIterator(set_.begin(), set_.end(), set_.size()); + } + template + inline Writer& operator<<(const std::multiset& set_) { + if (!proceed_) { return *this; } + return writeIterator(set_.begin(), set_.end(), set_.size()); + } + template + inline Writer& operator<<(const std::pair& pair_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << "("; + operator << (static_cast(pair_.first)); + _ELPP_STREAM(logger_) << ", "; + operator << (static_cast(pair_.second)); + _ELPP_STREAM(logger_) << ")"; + return *this; + } + template + inline Writer& operator<<(const std::bitset& bitset_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << "["; + _ELPP_STREAM(logger_) << bitset_.to_string(); + _ELPP_STREAM(logger_) << "]"; + return *this; + } + template + inline Writer& operator<<(const std::map& map_) { + if (!proceed_) { return *this; } + return writeIterator(map_.begin(), map_.end(), map_.size()); + } + template + inline Writer& operator<<(const std::multimap& map_) { + if (!proceed_) { return *this; } + return writeIterator(map_.begin(), map_.end(), map_.size()); + } +#endif // defined(_ELPP_STL_LOGGING) +#if defined(QT_CORE_LIB) && defined(_ELPP_QT_LOGGING) + inline Writer& operator<<(const QString& log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_.toStdString(); + return *this; + } + inline Writer& operator<<(const QStringRef& log_) { + if (!proceed_) { return *this; } + return operator<<(log_.toString()); + } + inline Writer& operator<<(qint64 log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << QString::number(log_).toStdString(); + return *this; + } + inline Writer& operator<<(quint64 log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << QString::number(log_).toStdString(); + return *this; + } + inline Writer& operator<<(QChar log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_.toLatin1(); + return *this; + } +# if (!_ELPP_QT_5) + inline Writer& operator<<(QBool log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << (bool(log_ != 0) ? "true" : "false"); + return *this; + } +# endif // (!_ELPP_QT_5) + inline Writer& operator<<(const QLatin1String& log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_.latin1(); + return *this; + } + template + inline Writer& operator<<(const QList& list_) { + if (!proceed_) { return *this; } + return writeIterator(list_.begin(), list_.end(), list_.size()); + } + template + inline Writer& operator<<(const QVector& vec_) { + if (!proceed_) { return *this; } + return writeIterator(vec_.begin(), vec_.end(), vec_.size()); + } + template + inline Writer& operator<<(const QQueue& queue_) { + if (!proceed_) { return *this; } + return writeIterator(queue_.begin(), queue_.end(), queue_.size()); + } + template + inline Writer& operator<<(const QSet& set_) { + if (!proceed_) { return *this; } + return writeIterator(set_.begin(), set_.end(), set_.size()); + } + template + inline Writer& operator<<(const QPair& pair_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << "("; + operator << (static_cast(pair_.first)); + _ELPP_STREAM(logger_) << ", "; + operator << (static_cast(pair_.second)); + _ELPP_STREAM(logger_) << ")"; + return *this; + } + template + inline Writer& operator<<(const QMap& map_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << "["; + QList keys = map_.keys(); + typename QList::const_iterator begin = keys.begin(); + typename QList::const_iterator end = keys.end(); + int max_ = static_cast(constants_->MAX_LOG_PER_CONTAINER); // to prevent warning + for (int index_ = 0; begin != end && index_ < max_; ++index_, ++begin) { + _ELPP_STREAM(logger_) << "("; + operator << (static_cast(*begin)); + _ELPP_STREAM(logger_) << ", "; + operator << (static_cast(map_.value(*begin))); + _ELPP_STREAM(logger_) << ")"; + _ELPP_STREAM(logger_) << ((index_ < keys.size() -1) ? ", " : ""); + } + if (begin != end) { + _ELPP_STREAM(logger_) << " ..."; + } + _ELPP_STREAM(logger_) << "]"; + return *this; + } + template + inline Writer& operator<<(const QMultiMap& map_) { + if (!proceed_) { return *this; } + operator << (static_cast >(map_)); + return *this; + } + template + inline Writer& operator<<(const QHash& hash_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << "["; + QList keys = hash_.keys(); + typename QList::const_iterator begin = keys.begin(); + typename QList::const_iterator end = keys.end(); + int max_ = static_cast(constants_->MAX_LOG_PER_CONTAINER); // prevent type warning + for (int index_ = 0; begin != end && index_ < max_; ++index_, ++begin) { + _ELPP_STREAM(logger_) << "("; + operator << (static_cast(*begin)); + _ELPP_STREAM(logger_) << ", "; + operator << (static_cast(hash_.value(*begin))); + _ELPP_STREAM(logger_) << ")"; + _ELPP_STREAM(logger_) << ((index_ < keys.size() -1) ? ", " : ""); + } + if (begin != end) { + _ELPP_STREAM(logger_) << " ..."; + } + _ELPP_STREAM(logger_) << "]"; + return *this; + } + template + inline Writer& operator<<(const QMultiHash& multiHash_) { + if (!proceed_) { return *this; } + operator << (static_cast >(multiHash_)); + return *this; + } + template + inline Writer& operator<<(const QLinkedList& linkedList_) { + if (!proceed_) { return *this; } + return writeIterator(linkedList_.begin(), linkedList_.end(), linkedList_.size()); + } + template + inline Writer& operator<<(const QStack& stack_) { + if (!proceed_) { return *this; } + return writeIterator(stack_.begin(), stack_.end(), stack_.size()); + } +#endif // defined(QT_CORE_LIB) && defined(_ELPP_QT_LOGGING) + template + inline Writer& operator<<(const Class& class_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << class_; + return *this; + } +private: + unsigned int aspect_; + unsigned int severity_; + const char* func_; + const char* file_; + const unsigned long int line_; + bool condition_; + int verboseLevel_; + int counter_; + Logger* logger_; + std::stringstream tempss_; + std::string currLine_; + bool proceed_; + internal::Constants* constants_; + internal::threading::Mutex mutex_; + + friend class Logger; + + template + inline Writer& writeIterator(Iterator begin_, Iterator end_, std::size_t size_) { + _ELPP_STREAM(logger_) << "["; + for (std::size_t i = 0; begin_ != end_ && i < constants_->MAX_LOG_PER_CONTAINER; ++i, ++begin_) { + operator << (*begin_); + _ELPP_STREAM(logger_) << ((i < size_ - 1) ? ", " : ""); + } + if (begin_ != end_) { + _ELPP_STREAM(logger_) << " ..."; + } + _ELPP_STREAM(logger_) << "]"; + return *this; + } + + void buildAndWriteLine(void) { + internal::RegisteredLoggers* rl_ = registeredLoggers.pointer(); + TypedConfigurations* conf_ = logger_->typedConfigurations_; + unsigned int f_ = conf_->formatFlag(severity_); // format spec + currLine_ = conf_->logFormat(severity_); + std::string dateFormat = conf_->dateFormat(severity_); + std::string fs_; // format specifier + std::string v_; // value + // App name + if (f_ & constants_->kAppName) { + v_ = logger_->applicationName(); + fs_ = constants_->APP_NAME_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Logger ID + if (f_ & constants_->kLoggerId) { + v_ = logger_->id(); + fs_ = constants_->LOGGER_ID_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Thread ID + if (f_ & constants_->kThreadId) { + std::stringstream ss; + ss << threading::getCurrentThreadId(); + fs_ = constants_->THREAD_ID_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, ss.str(), currLine_, constants_); + } + // Date/Time + if ((f_ & constants_->kDateOnly) || (f_ & constants_->kTimeOnly) || (f_ & constants_->kDateTime)) { + v_ = internal::utilities::DateUtils::getDateTime(dateFormat, + f_, constants_, conf_->millisecondsWidth(Level::All)); + fs_ = conf_->dateFormatSpecifier(severity_); + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Function + if (f_ & constants_->kFunction) { + v_ = std::string(func_); + fs_ = constants_->FUNCTION_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Location + if (f_ & constants_->kLocation) { + tempss_ << file_ << ":" << line_; + fs_ = constants_->LOCATION_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, tempss_.str(), currLine_, constants_); + tempss_.str(""); + } + // User + if (f_ & constants_->kUser) { + v_ = rl_->username(); + fs_ = constants_->USER_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Host + if (f_ & constants_->kHost) { + v_ = rl_->hostname(); + fs_ = constants_->HOST_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Verbose level + if ((severity_ == Level::Verbose) && (f_ & constants_->kVerboseLevel)) { + tempss_ << verboseLevel_; + fs_ = constants_->VERBOSE_LEVEL_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, tempss_.str(), currLine_, constants_); + } + // Log message + if (f_ & constants_->kLogMessage) { + fs_ = constants_->LOG_MESSAGE_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, logger_->stream()->str(), currLine_, constants_); + } + log(); + } + +#if (defined(_ELPP_STRICT_ROLLOUT)) + bool checkRollOuts(unsigned int level_, Logger* baseLogger_) { + unsigned int validLevel_ = 0; + std::string rolledOutFile = std::string(); + if (baseLogger_->typedConfigurations_->checkRollOuts(level_, validLevel_, rolledOutFile)) { + Logger* currLogger_ = NULL; + for (unsigned int i = 0; i < registeredLoggers->count(); ++i) { + currLogger_ = registeredLoggers->list().at(i); + if (currLogger_ == baseLogger_) + continue; + std::string fname = currLogger_->typedConfigurations_->filename(validLevel_); + if (fname == rolledOutFile) { + currLogger_->typedConfigurations_->forceReinitiateFile(validLevel_, fname); + } + } + return true; + } + return false; + } +#endif // (defined(_ELPP_STRICT_ROLLOUT)) + + inline void syncWritePointer(unsigned int level_, Logger* targetLogger_, std::fstream* baseStream_) { + targetLogger_->acquireLock(); + targetLogger_->typedConfigurations_->fileStream(level_)->seekg(baseStream_->tellg()); + targetLogger_->releaseLock(); + } + + void safeWriteToFile(unsigned int level_, Logger* logger_, const std::string& line) { + std::string baseFilename_ = logger_->typedConfigurations_->filename(level_); + std::fstream* fstr = logger_->typedConfigurations_->fileStream(level_); + (*fstr) << line; + fstr->flush(); + Logger* currLogger_ = NULL; + for (std::size_t i = 0; i < registeredLoggers->count(); ++i) { + currLogger_ = registeredLoggers->list().at(i); + if (currLogger_ == logger_) + continue; + std::string fname = currLogger_->typedConfigurations_->filename(level_); + if (fname == baseFilename_) { + syncWritePointer(level_, currLogger_, fstr); + } + } + } + + void log(void) { + if (logger_->stream_) { + if (logger_->typedConfigurations_->toFile(severity_)) { + safeWriteToFile(severity_, logger_, currLine_); + } + if (logger_->typedConfigurations_->toStandardOutput(severity_)) { + std::cout << currLine_; + } + logger_->stream_->str(""); + } + } +}; +} // namespace internal + +class VersionInfo : private internal::StaticClass { +public: + // Minimal formatted displayable information + static inline const std::string formattedInfo(void) { + std::stringstream ss; + ss << "EasyLogging++ v" << version() << " (" << releaseDate() << ")"; + ss << std::endl; + ss << website(); + ss << std::endl; + ss << copyright(); + return ss.str(); + } + + // Current version number + static inline const std::string version(void) { return std::string("8.91"); } + + // Release date of current version + static inline const std::string releaseDate(void) { return std::string("12-07-2013 1243hrs"); } + + // Original author and maintainer + static inline const std::string author(void) { return std::string("Majid Khan "); } + + // Web link + static inline const std::string website(void) { return std::string("http://icplusplus.com/tools/easylogging"); } + + // Link to source code + static inline const std::string sourceCodeLink(void) { return std::string("https://github.com/mkhan3189/EasyLoggingPP"); } + + // Copyright information + static inline const std::string copyright(void) { return std::string("Copyright (c) 2012 - 2013 Majid Khan"); } + + // Full licence + static const std::string licence(void) { + std::stringstream ss; + ss << " This software is provided 'as-is', without any express or implied" << std::endl; + ss << " warranty. In no event will the authors be held liable for any damages" << std::endl; + ss << " arising from the use of this software." << std::endl; + ss << std::endl; + ss << " Permission is granted to anyone to use this software for any purpose," << std::endl; + ss << " including commercial applications, and to alter it and redistribute" << std::endl; + ss << " it freely, subject to the following restrictions:" << std::endl; + ss << std::endl; + ss << " 1. The origin of this software must not be misrepresented; you must" << std::endl; + ss << " not claim that you wrote the original software. If you use this" << std::endl; + ss << " software in a product, an acknowledgment in the product documentation" << std::endl; + ss << " would be appreciated but is not required." << std::endl; + ss << std::endl; + ss << " 2. Altered source versions must be plainly marked as such, and must" << std::endl; + ss << " not be misrepresented as being the original software." << std::endl; + ss << std::endl; + ss << " 3. This notice may not be removed or altered from any source" << std::endl; + ss << " distribution"; + return ss.str(); + } +}; // class VersionInfo + +//! +//! \brief Helper class to manage loggers and configurations +//! +//! A static helper class for users of library. This class contains functions related to register +//! and configure logger/s +//! +class Loggers : private internal::StaticClass { +public: + + //! + //! Get existing logger, if logger does not exist a newly created logger is returned + //! \param identifier_ A unique ID for logger + //! \return Pointer to easyloggingpp::Logger from logger repository + //! + static inline Logger* getLogger(const std::string& identifier_) { + return internal::registeredLoggers->get(identifier_); + } + + //! + //! Reconfigures logger with easyloggingpp::Configurations + //! \param logger_ Pointer to Logger to configure. You get use getLogger() to get pointer from logger repository + //! \param configurations_ easyloggingpp::Configurations to configure logger against + //! \return Updated pointer to Logger + //! + static inline Logger* reconfigureLogger(Logger* logger_, const Configurations& configurations_) { + if (!logger_) return NULL; + logger_->configure(configurations_); + return logger_; + } + + //! + //! Reconfigures logger with easyloggingpp::Configurations + //! \param identifier_ Logger ID + //! \param configurations_ easyloggingpp::Configurations to configure logger against + //! \return Updated pointer to Logger + //! + static inline Logger* reconfigureLogger(const std::string& identifier_, Configurations& configurations_) { + Logger* logger_ = Loggers::getLogger(identifier_); + Loggers::reconfigureLogger(logger_, configurations_); + return logger_; + } + + //! + //! Reconfigures all loggers available in logger repository + //! \param configurations_ easyloggingpp::Configurations to configure logger against + //! + static inline void reconfigureAllLoggers(Configurations& configurations_) { + for (std::size_t i = 0; i < internal::registeredLoggers->count(); ++i) { + Logger* l = internal::registeredLoggers->at(i); + Loggers::reconfigureLogger(l, configurations_); + } + } + + //! + //! Reconfigures all loggers for single configuration. + //! \param configurationType_ Configuration type to update. Use easyloggingpp::ConfigurationType to prevent confusion + //! \param value_ Value to set. Values have to be std::string; For boolean values use "true", "false", for any integral values + //! use them in quotes. They will be parsed when configuring + //! + static inline void reconfigureAllLoggers(unsigned int configurationType_, const std::string& value_) { + for (std::size_t i = 0; i < internal::registeredLoggers->count(); ++i) { + Logger* l = internal::registeredLoggers->at(i); + l->configurations().setAll(configurationType_, value_); + l->reconfigure(); + } + } + + //! + //! Sets default configurations. This configuration is used for future loggers. + //! \param configurations + //! \param configureExistingLoggers If true, all loggers are updated against provided configuration otherwise only future loggers + //! will be updated and all the existing loggers will use configurations that have been set previously. + //! + static inline void setDefaultConfigurations(Configurations& configurations, bool configureExistingLoggers = false) { + internal::registeredLoggers->setDefaultConfigurations(configurations); + if (configureExistingLoggers) { + Loggers::reconfigureAllLoggers(configurations); + } + } + + //! + //! Sets application arguments and uses them where needed. Example use is when application is run with '--v=X' or '-v', verbose logging + //! turns on + //! \param argc Argument count + //! \param argv Argument value array pointer + //! + static inline void setApplicationArguments(int argc, char** argv) { + internal::registeredLoggers->setApplicationArguments(argc, argv); + } + + //! + //! Sets application arguments and uses them where needed. Example use is when application is run with '--v=X' or '-v', verbose logging + //! turns on + //! \param argc + //! \param argv + //! + static inline void setApplicationArguments(int argc, const char** argv) { + internal::registeredLoggers->setApplicationArguments(argc, argv); + } + + //! + //! Disables all loggers + //! + static inline void disableAll(void) { + reconfigureAllLoggers(ConfigurationType::Enabled, "false"); + } + + //! + //! Enable all loggers + //! + static inline void enableAll(void) { + reconfigureAllLoggers(ConfigurationType::Enabled, "true"); + } + + //! + //! Reconfigure all loggers to write to single log file + //! \param logFilename_ Full path to log file + //! + static inline void setFilename(const std::string& logFilename_) { + reconfigureAllLoggers(ConfigurationType::Filename, logFilename_); + } + + //! + //! Reconfigure specified logger to write to specified log file + //! \param logger_ Pointer to logger. You may use Loggers::get(id) to get pointer + //! \param logFilename_ Full path to log file + //! + static inline void setFilename(Logger* logger_, const std::string& logFilename_) { + if (!logger_) return; + logger_->configurations().setAll(ConfigurationType::Filename, logFilename_); + logger_->reconfigure(); + } + + //! + //! Determines whether or not performance tracking is enabled + //! \return True if enabled, false otherwise + //! + static inline bool performanceTrackingEnabled(void) { + return performanceLogger()->typedConfigurations_->performanceTracking(); + } + + //! + //! Disables performance tracking. + //! Performance tracking is logged using 'performance' logger. + //! + static inline void disablePerformanceTracking(void) { + Logger* l = Loggers::performanceLogger(); + l->configurations().setAll(ConfigurationType::PerformanceTracking, "false"); + l->reconfigure(); + } + + //! + //! Enable performance tracking + //! Performance tracking is logged using 'performance' logger. + //! + static inline void enablePerformanceTracking(void) { + Logger* l = Loggers::performanceLogger(); + l->configurations().setAll(ConfigurationType::PerformanceTracking, "true"); + l->reconfigure(); + } + + //! + //! Iterates through logger repository and puts IDs into listOfIds + //! \param listOfIds (Passed by reference) Vector to fill up + //! + static inline void getAllLogIdentifiers(std::vector& listOfIds) { + listOfIds.clear(); + for (std::size_t i = 0; i < internal::registeredLoggers->count(); ++i) { + listOfIds.push_back(internal::registeredLoggers->at(i)->id()); + } + } + + //! + //! \return Returns one of default loggers 'trivial' logger + //! + static inline Logger* trivialLogger(void) { + return Loggers::getLogger("trivial"); + } + + //! + //! \return Returns one of default loggers 'business' logger + //! + static inline Logger* businessLogger(void) { + return Loggers::getLogger("business"); + } + + //! + //! \return Returns one of default loggers 'security' logger + //! + static inline Logger* securityLogger(void) { + return Loggers::getLogger("security"); + } + + //! + //! \return Returns one of default loggers 'performance' logger + //! + static inline Logger* performanceLogger(void) { + return Loggers::getLogger("performance"); + } + + //! + //! Static class that contains static helper functions used to read configurations + //! + class ConfigurationsReader : private internal::StaticClass { + public: + static inline bool enabled(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->enabled(level_); + } + + static inline bool enabled(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->enabled(level_); + } + + static inline bool toFile(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->toFile(level_); + } + + static inline bool toFile(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->toFile(level_); + } + + static inline const std::string& filename(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->filename(level_); + } + + static inline const std::string& filename(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->filename(level_); + } + + static inline bool toStandardOutput(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->toStandardOutput(level_); + } + + static inline bool toStandardOutput(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->toStandardOutput(level_); + } + + static inline const std::string& logFormat(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->logFormat(level_); + } + + static inline const std::string& logFormat(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->logFormat(level_); + } + + static inline int millisecondsWidth(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->millisecondsWidth(level_); + } + + static inline int millisecondsWidth(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->millisecondsWidth(level_); + } + + static inline bool performanceTracking(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->performanceTracking(level_); + } + + static inline bool performanceTracking(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->performanceTracking(level_); + } + + static inline std::size_t logRollOutSize(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->rollOutSize(level_); + } + + static inline std::size_t logRollOutSize(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->rollOutSize(level_); + } + + private: + static inline internal::TypedConfigurations* constConf(Logger* logger_) { + return logger_->typedConfigurations_; + } + }; // class ConfigurationsReader +private: + internal::threading::Mutex mutex_; +}; +// +// Helping Macros +// +// Performance tracking macros +#if ((!defined(_DISABLE_PERFORMANCE_TRACKING)) || (!defined(_DISABLE_INFO_LOGS))) +# if _ELPP_OS_UNIX +# define _ELPP_GET_CURR_TIME(tm) gettimeofday(tm, NULL); +# elif _ELPP_OS_WINDOWS +# define _ELPP_GET_CURR_TIME(tm) easyloggingpp::internal::utilities::DateUtils::gettimeofday(tm); +# endif +# define START_FUNCTION_LOG "Executing [" << __func__ << "]" +# define TIME_OUTPUT "Executed [" << __func__ << "] in [" << \ + easyloggingpp::internal::utilities::DateUtils::formatMilliSeconds( \ + easyloggingpp::internal::utilities::DateUtils::getTimeDifference(functionEndTime, functionStartTime)) << "]" +# define FUNC_SUB_COMMON_START { timeval functionStartTime, functionEndTime; _ELPP_GET_CURR_TIME(&functionStartTime) +# define WRITE_FUNC_PERFORMANCE _ELPP_GET_CURR_TIME(&functionEndTime); \ + if (easyloggingpp::Loggers::performanceTrackingEnabled()) { PINFO << TIME_OUTPUT; } +# define FUNC_SUB_COMMON_END WRITE_FUNC_PERFORMANCE; +# define SUB(FUNCTION_NAME,PARAMS) void FUNCTION_NAME PARAMS FUNC_SUB_COMMON_START +# define END_SUB FUNC_SUB_COMMON_END } +# define FUNC(RETURNING_TYPE,FUNCTION_NAME,PARAMS) RETURNING_TYPE FUNCTION_NAME PARAMS FUNC_SUB_COMMON_START +# define RETURN(return_value) FUNC_SUB_COMMON_END return return_value; +# define END_FUNC(return_value) RETURN(return_value) } +# define MAIN(argc, argv) FUNC(int, main, (argc, argv)) +# define END_MAIN(return_value) FUNC_SUB_COMMON_END; return return_value; } +# define RETURN_MAIN(exit_status) return exit_status; +#else +# define SUB(FUNCTION_NAME,PARAMS) void FUNCTION_NAME PARAMS { +# define END_SUB } +# define FUNC(RETURNING_TYPE,FUNCTION_NAME,PARAMS) RETURNING_TYPE FUNCTION_NAME PARAMS { +# define END_FUNC(x) return x; } +# define RETURN(expr) return expr; +# define MAIN(argc, argv) FUNC(int, main, (argc, argv)) +# define END_MAIN(x) return x; } +# define RETURN_MAIN(exit_status) return exit_status; +#endif // ((!defined(_DISABLE_PERFORMANCE_TRACKING)) || (!defined(_DISABLE_INFO_LOGS))) + +#define _ELPP_LOG_WRITER(_logger, _level) easyloggingpp::internal::Writer(\ + _logger, easyloggingpp::internal::Aspect::Normal, _level, __func__, __FILE__, __LINE__) +#define _ELPP_LOG_WRITER_COND(_c, _logger, _level) if (_c) easyloggingpp::internal::Writer(\ + _logger, easyloggingpp::internal::Aspect::Conditional, _level, __func__, __FILE__, __LINE__, _c) +#define _ELPP_LOG_WRITER_N(_n, _logger, _level) if (easyloggingpp::internal::registeredLoggers->validateCounter(\ + __FILE__, __LINE__, _n)) easyloggingpp::internal::Writer(_logger, easyloggingpp::internal::Aspect::Interval,\ + _level, __func__, __FILE__, __LINE__, true, 0, _n) +#undef VLOG_IS_ON +#define VLOG_IS_ON(verboseLevel) verboseLevel <= easyloggingpp::internal::registeredLoggers->constants()->CURRENT_VERBOSE_LEVEL +// Undef levels to support LOG(LEVEL) +#undef INFO +#undef DEBUG +#undef ERROR +#undef FATAL +#undef QA +#undef TRACE +#undef VERBOSE +// +// Custom loggers - macro names with levels - requires loggerId +// +// Undef existing +#undef CINFO +#undef CWARNING +#undef CDEBUG +#undef CERROR +#undef CFATAL +#undef ERROR +#undef CQA +#undef CTRACE +#undef CVERBOSE +#undef CINFO_IF +#undef CWARNING_IF +#undef CDEBUG_IF +#undef CERROR_IF +#undef CFATAL_IF +#undef ERROR_IF +#undef CQA_IF +#undef CTRACE_IF +#undef CVERBOSE_IF +#undef CINFO_EVERY_N +#undef CWARNING_EVERY_N +#undef CDEBUG_EVERY_N +#undef CERROR_EVERY_N +#undef CFATAL_EVERY_N +#undef ERROR_EVERY_N +#undef CQA_EVERY_N +#undef CTRACE_EVERY_N +#undef CVERBOSE_EVERY_N +// Normal logs +#if _ELPP_INFO_LOG +# define CINFO(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Info) +#else +# define CINFO(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_INFO_LOG +#if _ELPP_WARNING_LOG +# define CWARNING(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Warning) +#else +# define CWARNING(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_WARNING_LOG +#if _ELPP_DEBUG_LOG +# define CDEBUG(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Debug) +#else +# define CDEBUG(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_DEBUG_LOG +#if _ELPP_ERROR_LOG +# define CERROR(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Error) +#else +# define CERROR(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_ERROR_LOG +#if _ELPP_FATAL_LOG +# define CFATAL(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Fatal) +#else +# define CFATAL(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_FATAL_LOG +#if _ELPP_QA_LOG +# define CQA(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::QA) +#else +# define CQA(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_QA_LOG +#if _ELPP_TRACE_LOG +# define CTRACE(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Trace) +#else +# define CTRACE(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_TRACE_LOG +#if _ELPP_VERBOSE_LOG +# define CVERBOSE(vlevel_, loggerId) easyloggingpp::internal::Writer(loggerId, easyloggingpp::internal::Aspect::Normal, \ + easyloggingpp::Level::Verbose, __func__, __FILE__, __LINE__, true, vlevel_) +#else +# define CVERBOSE(vlevel_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_VERBOSE_LOG +// Conditional logs +#if _ELPP_INFO_LOG +# define CINFO_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Info) +#else +# define CINFO_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_INFO_LOG +#if _ELPP_WARNING_LOG +# define CWARNING_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Warning) +#else +# define CWARNING_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_WARNING_LOG +#if _ELPP_DEBUG_LOG +# define CDEBUG_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Debug) +#else +# define CDEBUG_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_DEBUG_LOG +#if _ELPP_ERROR_LOG +# define CERROR_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Error) +#else +# define CERROR_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_ERROR_LOG +#if _ELPP_FATAL_LOG +# define CFATAL_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Fatal) +#else +# define CFATAL_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_FATAL_LOG +#if _ELPP_QA_LOG +# define CQA_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::QA) +#else +# define CQA_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_QA_LOG +#if _ELPP_TRACE_LOG +# define CTRACE_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Trace) +#else +# define CTRACE_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_TRACE_LOG +#if _ELPP_VERBOSE_LOG +# define CVERBOSE_IF(condition_, vlevel_, loggerId) if (condition_) easyloggingpp::internal::Writer(loggerId, easyloggingpp::internal::Aspect::Conditional, \ + easyloggingpp::Level::Verbose, __func__, __FILE__, __LINE__, condition_, vlevel_) +#else +# define CVERBOSE_IF(condition_, vlevel_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_VERBOSE_LOG +// Interval logs +#if _ELPP_INFO_LOG +# define CINFO_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Info) +#else +# define CINFO_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_INFO_LOG +#if _ELPP_WARNING_LOG +# define CWARNING_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Warning) +#else +# define CWARNING_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_WARNING_LOG +#if _ELPP_DEBUG_LOG +# define CDEBUG_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Debug) +#else +# define CDEBUG_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_DEBUG_LOG +#if _ELPP_ERROR_LOG +# define CERROR_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Error) +#else +# define CERROR_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_ERROR_LOG +#if _ELPP_FATAL_LOG +# define CFATAL_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Fatal) +#else +# define CFATAL_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_FATAL_LOG +#if _ELPP_QA_LOG +# define CQA_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::QA) +#else +# define CQA_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_QA_LOG +#if _ELPP_TRACE_LOG +# define CTRACE_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Trace) +#else +# define CTRACE_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_TRACE_LOG +#if _ELPP_VERBOSE_LOG +# define CVERBOSE_EVERY_N(interval_, vlevel_, loggerId) if (easyloggingpp::internal::registeredLoggers->validateCounter(__FILE__, __LINE__, interval_)) \ + easyloggingpp::internal::Writer(loggerId, easyloggingpp::internal::Aspect::Interval, \ + easyloggingpp::Level::Verbose, __func__, __FILE__, __LINE__, true, vlevel_, interval_) +#else +# define CVERBOSE_EVERY_N(interval_, vlevel_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_VERBOSE_LOG +// +// Custom Loggers - Requires (level, loggerId) +// +// undef existing +#undef CLOG +#undef CLOG_VERBOSE +#undef CVLOG +#undef CLOG_IF +#undef CLOG_VERBOSE_IF +#undef CVLOG_IF +#undef CLOG_EVERY_N +#undef CLOG_VERBOSE_EVERY_N +#undef CVLOG_EVERY_N +// Normal logs +#define CLOG(LEVEL, loggerId) C##LEVEL(loggerId) +#define CLOG_VERBOSE(vlevel, loggerId) CVERBOSE(vlevel, loggerId) +#define CVLOG(vlevel, loggerId) CVERBOSE(vlevel, loggerId) +// Conditional logs +#define CLOG_IF(condition, LEVEL, loggerId) C##LEVEL##_IF(condition, loggerId) +#define CLOG_VERBOSE_IF(condition, vlevel, loggerId) CVERBOSE_IF(condition, vlevel, loggerId) +#define CVLOG_IF(condition, vlevel, loggerId) CVERBOSE_IF(condition, vlevel, loggerId) +// Interval logs +#define CLOG_EVERY_N(n, LEVEL, loggerId) C##LEVEL##_EVERY_N(n, loggerId) +#define CLOG_VERBOSE_EVERY_N(n, vlevel, loggerId) CVERBOSE_EVERY_N(n, vlevel, loggerId) +#define CVLOG_EVERY_N(n, vlevel, loggerId) CVERBOSE_EVERY_N(n, vlevel, loggerId) +// +// Default Loggers macro using CLOG(), CLOG_VERBOSE() and CVLOG() macros +// +// undef existing +#undef LOG +#undef LOG_VERBOSE +#undef VLOG +#undef LOG_IF +#undef LOG_VERBOSE_IF +#undef VLOG_IF +#undef LOG_EVERY_N +#undef LOG_VERBOSE_EVERY_N +#undef VLOG_EVERY_N +// Normal logs +#define LOG(LEVEL) CLOG(LEVEL, "trivial") +#define LOG_VERBOSE(vlevel) CLOG_VERBOSE(vlevel, "trivial") +#define VLOG(vlevel) CVLOG(vlevel, "trivial") +// Conditional logs +#define LOG_IF(condition, LEVEL) CLOG_IF(condition, LEVEL, "trivial") +#define LOG_VERBOSE_IF(condition, vlevel) CLOG_VERBOSE_IF(condition, vlevel, "trivial") +#define VLOG_IF(condition, vlevel) CVLOG_IF(condition, vlevel, "trivial") +// Interval logs +#define LOG_EVERY_N(n, LEVEL) CLOG_EVERY_N(n, LEVEL, "trivial") +#define LOG_VERBOSE_EVERY_N(n, vlevel) CLOG_VERBOSE_EVERY_N(n, vlevel, "trivial") +#define VLOG_EVERY_N(n, vlevel) CVLOG_EVERY_N(n, vlevel, "trivial") +// +// Default Loggers macro using C##LEVEL("trivial") +// +// undef existing +#undef LINFO +#undef LWARNING +#undef LDEBUG +#undef LERROR +#undef LFATAL +#undef LQA +#undef LTRACE +#undef LVERBOSE +#undef LINFO_IF +#undef LWARNING_IF +#undef LDEBUG_IF +#undef LERROR_IF +#undef LFATAL_IF +#undef LQA_IF +#undef LTRACE_IF +#undef LVERBOSE_IF +#undef LINFO_EVERY_N +#undef LWARNING_EVERY_N +#undef LDEBUG_EVERY_N +#undef LERROR_EVERY_N +#undef LFATAL_EVERY_N +#undef LQA_EVERY_N +#undef LTRACE_EVERY_N +#undef LVERBOSE_EVERY_N +// Normal logs +#define LINFO CINFO("trivial") +#define LWARNING CWARNING("trivial") +#define LDEBUG CDEBUG("trivial") +#define LERROR CERROR("trivial") +#define LFATAL CFATAL("trivial") +#define LQA CQA("trivial") +#define LTRACE CTRACE("trivial") +#define LVERBOSE(level) CVERBOSE(level, "trivial") +// Conditional logs +#define LINFO_IF(condition) CINFO_IF(condition, "trivial") +#define LWARNING_IF(condition) CWARNING_IF(condition, "trivial") +#define LDEBUG_IF(condition) CDEBUG_IF(condition, "trivial") +#define LERROR_IF(condition) CERROR_IF(condition, "trivial") +#define LFATAL_IF(condition) CFATAL_IF(condition, "trivial") +#define LQA_IF(condition) CQA_IF(condition, "trivial") +#define LTRACE_IF(condition) CTRACE_IF(condition, "trivial") +#define LVERBOSE_IF(condition, level) CVERBOSE_IF(condition, level, "trivial") +// Interval logs +#define LINFO_EVERY_N(n) CINFO_EVERY_N(n, "trivial") +#define LWARNING_EVERY_N(n) CWARNING_EVERY_N(n, "trivial") +#define LDEBUG_EVERY_N(n) CDEBUG_EVERY_N(n, "trivial") +#define LERROR_EVERY_N(n) CERROR_EVERY_N(n, "trivial") +#define LFATAL_EVERY_N(n) CFATAL_EVERY_N(n, "trivial") +#define LQA_EVERY_N(n) CQA_EVERY_N(n, "trivial") +#define LTRACE_EVERY_N(n) CTRACE_EVERY_N(n, "trivial") +#define LVERBOSE_EVERY_N(n, level) CVERBOSE_EVERY_N(n, level, "trivial") +// +// Default Loggers macro using C##LEVEL("business") +// +// undef existing +#undef BINFO +#undef BWARNING +#undef BDEBUG +#undef BERROR +#undef BFATAL +#undef BQA +#undef BTRACE +#undef BVERBOSE +#undef BINFO_IF +#undef BWARNING_IF +#undef BDEBUG_IF +#undef BERROR_IF +#undef BFATAL_IF +#undef BQA_IF +#undef BTRACE_IF +#undef BVERBOSE_IF +#undef BINFO_EVERY_N +#undef BWARNING_EVERY_N +#undef BDEBUG_EVERY_N +#undef BERROR_EVERY_N +#undef BFATAL_EVERY_N +#undef BQA_EVERY_N +#undef BTRACE_EVERY_N +#undef BVERBOSE_EVERY_N +// Normal logs +#define BINFO CINFO("business") +#define BWARNING CWARNING("business") +#define BDEBUG CDEBUG("business") +#define BERROR CERROR("business") +#define BFATAL CFATAL("business") +#define BQA CQA("business") +#define BTRACE CTRACE("business") +#define BVERBOSE(level) CVERBOSE(level, "business") +// Conditional logs +#define BINFO_IF(condition) CINFO_IF(condition, "business") +#define BWARNING_IF(condition) CWARNING_IF(condition, "business") +#define BDEBUG_IF(condition) CDEBUG_IF(condition, "business") +#define BERROR_IF(condition) CERROR_IF(condition, "business") +#define BFATAL_IF(condition) CFATAL_IF(condition, "business") +#define BQA_IF(condition) CQA_IF(condition, "business") +#define BTRACE_IF(condition) CTRACE_IF(condition, "business") +#define BVERBOSE_IF(condition, level) CVERBOSE_IF(condition, level, "business") +// Interval logs +#define BINFO_EVERY_N(n) CINFO_EVERY_N(n, "business") +#define BWARNING_EVERY_N(n) CWARNING_EVERY_N(n, "business") +#define BDEBUG_EVERY_N(n) CDEBUG_EVERY_N(n, "business") +#define BERROR_EVERY_N(n) CERROR_EVERY_N(n, "business") +#define BFATAL_EVERY_N(n) CFATAL_EVERY_N(n, "business") +#define BQA_EVERY_N(n) CQA_EVERY_N(n, "business") +#define BTRACE_EVERY_N(n) CTRACE_EVERY_N(n, "business") +#define BVERBOSE_EVERY_N(n, level) CVERBOSE_EVERY_N(n, level, "business") +// +// Default Loggers macro using C##LEVEL("security") +// +// undef existing +#undef SINFO +#undef SWARNING +#undef SDEBUG +#undef SERROR +#undef SFATAL +#undef SQA +#undef STRACE +#undef SVERBOSE +#undef SINFO_IF +#undef SWARNING_IF +#undef SDEBUG_IF +#undef SERROR_IF +#undef SFATAL_IF +#undef SQA_IF +#undef STRACE_IF +#undef SVERBOSE_IF +#undef SINFO_EVERY_N +#undef SWARNING_EVERY_N +#undef SDEBUG_EVERY_N +#undef SERROR_EVERY_N +#undef SFATAL_EVERY_N +#undef SQA_EVERY_N +#undef STRACE_EVERY_N +#undef SVERBOSE_EVERY_N +// Normal logs +#define SINFO CINFO("security") +#define SWARNING CWARNING("security") +#define SDEBUG CDEBUG("security") +#define SERROR CERROR("security") +#define SFATAL CFATAL("security") +#define SQA CQA("security") +#define STRACE CTRACE("security") +#define SVERBOSE(level) CVERBOSE(level, "security") +// Conditional logs +#define SINFO_IF(condition) CINFO_IF(condition, "security") +#define SWARNING_IF(condition) CWARNING_IF(condition, "security") +#define SDEBUG_IF(condition) CDEBUG_IF(condition, "security") +#define SERROR_IF(condition) CERROR_IF(condition, "security") +#define SFATAL_IF(condition) CFATAL_IF(condition, "security") +#define SQA_IF(condition) CQA_IF(condition, "security") +#define STRACE_IF(condition) CQA_IF(condition, "security") +#define SVERBOSE_IF(condition, level) CVERBOSE_IF(condition, level, "security") +// Interval logs +#define SINFO_EVERY_N(n) CINFO_EVERY_N(n, "security") +#define SWARNING_EVERY_N(n) CWARNING_EVERY_N(n, "security") +#define SDEBUG_EVERY_N(n) CDEBUG_EVERY_N(n, "security") +#define SERROR_EVERY_N(n) CERROR_EVERY_N(n, "security") +#define SFATAL_EVERY_N(n) CFATAL_EVERY_N(n, "security") +#define SQA_EVERY_N(n) CQA_EVERY_N(n, "security") +#define STRACE_EVERY_N(n) CTRACE_EVERY_N(n, "security") +#define SVERBOSE_EVERY_N(n, level) CVERBOSE_EVERY_N(n, level, "security") +// +// Default Loggers macro using C##LEVEL("performance") +// +// undef existing +#undef PINFO +#undef PWARNING +#undef PDEBUG +#undef PERROR +#undef PFATAL +#undef PQA +#undef PTRACE +#undef PVERBOSE +#undef PINFO_IF +#undef PWARNING_IF +#undef PDEBUG_IF +#undef PERROR_IF +#undef PFATAL_IF +#undef PQA_IF +#undef PTRACE_IF +#undef PVERBOSE_IF +#undef PINFO_EVERY_N +#undef PWARNING_EVERY_N +#undef PDEBUG_EVERY_N +#undef PERROR_EVERY_N +#undef PFATAL_EVERY_N +#undef PQA_EVERY_N +#undef PTRACE_EVERY_N +#undef PVERBOSE_EVERY_N +// Normal logs +#define PINFO CINFO("performance") +#define PWARNING CWARNING("performance") +#define PDEBUG CDEBUG("performance") +#define PERROR CERROR("performance") +#define PFATAL CFATAL("performance") +#define PQA CQA("performance") +#define PTRACE CTRACE("performance") +#define PVERBOSE(level) CVERBOSE(level, "performance") +// Conditional logs +#define PINFO_IF(condition) CINFO_IF(condition, "performance") +#define PWARNING_IF(condition) CWARNING_IF(condition, "performance") +#define PDEBUG_IF(condition) CDEBUG_IF(condition, "performance") +#define PERROR_IF(condition) CERROR_IF(condition, "performance") +#define PFATAL_IF(condition) CFATAL_IF(condition, "performance") +#define PQA_IF(condition) CQA_IF(condition, "performance") +#define PTRACE_IF(condition) CQA_IF(condition, "performance") +#define PVERBOSE_IF(condition, level) CVERBOSE_IF(condition, level, "performance") +// Interval logs +#define PINFO_EVERY_N(n) CINFO_EVERY_N(n, "performance") +#define PWARNING_EVERY_N(n) CWARNING_EVERY_N(n, "performance") +#define PDEBUG_EVERY_N(n) CDEBUG_EVERY_N(n, "performance") +#define PERROR_EVERY_N(n) CERROR_EVERY_N(n, "performance") +#define PFATAL_EVERY_N(n) CFATAL_EVERY_N(n, "performance") +#define PQA_EVERY_N(n) CQA_EVERY_N(n, "performance") +#define PTRACE_EVERY_N(n) CTRACE_EVERY_N(n, "performance") +#define PVERBOSE_EVERY_N(n, level) CVERBOSE_EVERY_N(n, level, "performance") +// Undefine macros that are not needed anymore +#undef _ELPP_ASSEMBLY_SUPPORTED +#undef _ELPP_STREAM +#undef _ELPP_MUTEX_LOCK_GNU_ASM +#undef _ELPP_MUTEX_UNLOCK_GNU_ASM +#undef _ELPP_ENABLE_MUTEX +#undef _ENABLE_EASYLOGGING +#undef __EASYLOGGINGPP_SUPPRESS_UNSED +#undef _ELPP_DEBUG_LOG +#undef _ELPP_INFO_LOG +#undef _ELPP_WARNING_LOG +#undef _ELPP_ERROR_LOG +#undef _ELPP_FATAL_LOG +#undef _ELPP_QA_LOG +#undef _ELPP_VERBOSE_LOG +#undef _ELPP_TRACE_LOG +#undef _INITIALIZE_EASYLOGGINGPP +#undef _START_EASYLOGGINGPP +#undef _ELPP_COUNTER +#undef _ELPP_COUNTER_POSITION +#define _INITIALIZE_EASYLOGGINGPP \ + namespace easyloggingpp { \ + namespace internal { \ + ScopedPointer registeredLoggers( \ + new RegisteredLoggers()); \ + } \ + } +#define _START_EASYLOGGINGPP(argc, argv) easyloggingpp::Loggers::setApplicationArguments(argc, argv); +#define _ELPP_COUNTER easyloggingpp::internal::registeredLoggers->counters()->get(__FILE__, __LINE__) +#define _ELPP_COUNTER_POSITION (_ELPP_COUNTER == NULL ? 0 : _ELPP_COUNTER->position()) +} // easyloggingpp +#endif // EASYLOGGINGPP_H diff --git a/error.cpp b/error.cpp new file mode 100644 index 0000000..511c088 --- /dev/null +++ b/error.cpp @@ -0,0 +1,19 @@ +#include "error.h" + +using namespace std; + +ErrorException::ErrorException(string msg) : msg(msg) {} + +ErrorException::~ErrorException() throw() {} + +string ErrorException::getMessage() { + return msg; +} + +const char *ErrorException::what() const throw () { + return ("Error: " + msg).c_str(); +} + +void error(string str) { + throw ErrorException(str); +} diff --git a/error.h b/error.h new file mode 100644 index 0000000..2cb2d5b --- /dev/null +++ b/error.h @@ -0,0 +1,57 @@ +/* + * File: error.h + * ------------- + * This file defines the ErrorException class and the + * error function. + */ + +#ifndef _error_h +#define _error_h + +#include +#include + +/* + * Class: ErrorException + * --------------------- + * This exception is thrown by calls to the error + * function, which makes it possible for clients to respond to error + * conditions. Typical code for catching errors looks like this: + * + *
+ *    try {
+ *       . . . code in which an error might occur . . .
+ *    } catch (ErrorException & ex) {
+ *       . . . code to handle the error condition . . .
+ *    }
+ *
+ * + * If an ErrorException is thrown at any point in the + * range of the try (including in functions called from + * that code), control will jump immediately to the error handler. + */ + + +class ErrorException : public std::exception { +public: + ErrorException(std::string msg); + virtual ~ErrorException() throw (); + virtual std::string getMessage(); + virtual const char *what() const throw (); + +private: + std::string msg; +}; + +/* + * Function: error + * Usage: error(msg); + * ------------------ + * Signals an error condition in a program by throwing an + * ErrorException with the specified message. + */ + +void error(std::string str); + + +#endif diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..cc08e90 --- /dev/null +++ b/main.cpp @@ -0,0 +1,350 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "error.h" +#include "Deletion.h" +#include "ClipReader.h" +#include "BamStatCalculator.h" +#include "Helper.h" +//#include "Parameters.h" +#include "clip.h" +#include "range.h" +#include "Thirdparty/Timer.h" + +#include "easylogging++.h" + +// +// Getopt +// +#define PROGRAM_NAME "sprites" +#define PROGRAM_VERSION "1.0" +#define PROGRAM_BUGREPORT "zhangz@csu.edu.cn" +const int DEFAULT_MIN_OVERLAP=12; +const int DEFAULT_MIN_MAPQUAL=1; +const int DEFAULT_SD_CUTOFF=4; + +static const char *DFINDER_VERSION_MESSAGE = +PROGRAM_NAME " Version " PROGRAM_VERSION "\n" +"Written by Zhen Zhang.\n" +"\n" +"Copyright 2013 netlab.csu.edu.cn\n"; + +static const char *DFINDER_USAGE_MESSAGE = +"Usage: " PROGRAM_NAME " [OPTION] ... BAMFILE\n" +"Find deletions from records in BAMFILE\n" +"\n" +" --help display this help and exit\n" +" -v, --verbose display verbose output\n" +" -r, --reffile=FILE read the reference sequence from FILE\n" +" -o, --outfile=FILE write the deletion calls to FILE (default: BAMFILE.calls)\n" +" -e, --error-rate=F the maximum error rate allowed between two sequences to consider them overlapped (default: 0.04)\n" +" -m, --min-overlap=LEN minimum overlap required between two reads (default: 12)\n" +" -q, --mapping-qual=MAPQ minimum mapping quality of a read (default: 1)\n" +" -n, --allowed-num=SIZE a soft-clip is defined as valid, when the clipped part is not less than SIZE (default: 5)\n" +"\nThe following two option must appear together (if ommitted, attempt ot learn the mean and the standard deviation of insert size):\n" +" -i, --insert-mean=N the mean of insert size\n" +" --enhanced-mode enable the enhanced mode, in which reads of type 2 are considered besides type 1\n" +" -s, --insert-sd=N the standard deviation of insert size\n" +"\nReport bugs to " PROGRAM_BUGREPORT "\n\n"; + +namespace opt +{ + static unsigned int verbose; + static std::string bamFile; + static std::string refFile; + static std::string outFile; + static double errorRate = 0.04; + static int minOverlap = DEFAULT_MIN_OVERLAP; + static int minMapQual = DEFAULT_MIN_MAPQUAL; + static int allowedNum = 12; + static int mode = 0; + + static bool bLearnInsert = true; + static int insertMean; + static int insertSd; +} + +static const char* shortopts = "o:q:r:e:m:n:i:s:v"; + +enum { OPT_HELP = 1, OPT_VERSION, OPT_ENHANCED_MODE }; + +static const struct option longopts[] = { + { "verbose", no_argument, NULL, 'v' }, + { "min-overlap", required_argument, NULL, 'm' }, + { "mapping-qual", required_argument, NULL, 'q' }, + { "allowed-num", required_argument, NULL, 'n' }, + { "reffile", required_argument, NULL, 'r'}, + { "outfile", required_argument, NULL, 'o' }, + { "error-rate", required_argument, NULL, 'e' }, + { "insert-mean", required_argument, NULL, 'i' }, + { "insert-sd", required_argument, NULL, 's' }, + { "help", no_argument, NULL, OPT_HELP }, + { "version", no_argument, NULL, OPT_VERSION }, + { "enhanced-mode", no_argument, NULL, OPT_ENHANCED_MODE }, + { NULL, 0, NULL, 0 } +}; + +void parseOptions(int argc, char** argv); +void output(const std::string& filename, const std::vector& dels); + +_INITIALIZE_EASYLOGGINGPP + +// +// Main +// +int main(int argc, char *argv[]) { + +// std::string s1 = +// "TCACTTGAACCCAGGAGGCAGAGGTTCCAGTGAGCTGAGATCATGCCACTGCACTCCAGCCTGGGCAACAGAGCGAGGCTCCATCTCA" +// "TCTCCTCTTTCCCTCCTGCCAACTGAAAATGTTTGCTTCGCTCTGTGAAAATAATGTTAATAAAAATGTCTATATACACATATAAAATGTCACTTATAAAAGATGTTAACTATAAAATAG" +// "CAGCTAGGGATAAGAGTTCTTAAGTCAAATCCTTAGAATCAATTAATTAGCTCTCCCAAACAAAACAAAACAAAACAAAAAAAGGCCATGGCCGAGCATGGTGGCTGACACCTGTAATCC" +// "CAGCACTTTAGGAGACTGAGGTGGGTAGACGGAGGTCAGGAGTTCAAGACCAGCGTGGCCAACATAGTGAAACCCCGTCTCTACTAAAAATACAAAAAAATTTGCCGGGCATAGAGGTGC" +// "ACACCTGTAATCCCAGCTACTTGGGAGGCTGAGGCACAAGAATCGCTTGAACCCAGGAGGTGGAAGTTGCAGCAACCTGAGGTTGCACCACTGCACTCCAGCCTGGGCAACAGAGCGAGA" +// "CTCCATCTCAAATAAATAAACAAACAAACAAAAACAAACTAGCTCTGCCAGTTGCTACCTTGAGAAAGTCACTTAACTTTTCTAAACCTCTTTTCCACCTATAAAAGTTAGTAATTGCTT" +// "AATTCACATATTGTGAGAATAAGAGAAATACTCTATATGGTACACTCATGACAATGACTAGGACACACTAAATACCCGTACTCAATTCAACAATGATCAGCATTATTACTGATTTACTAA" +// "TCTGCACTAATAAGCACAATAAGCTCTAACTAATAAGCAAAATAATTACTAACAATTATTTTAAATACTGTTAGTGGTACATACCTTATAATCTATAAAAGATTCTTGTTCCTGTTGACA" +// "CTGGGAAAGATAATCCTTCATATCATTCAATTCATC"; +// std::string s2 = "TCACTTGAACCCAGGAGGCAGAGGTTCCAGTGAGCTGAGATCATGCCACTGCACTCCAGCCTGGGCAACAGAGCGAGGCTCCATCTCAAATAAATAATCAA"; + +// std::string s1 = "ACGGGGACT"; +// std::string s2 = "ACGTTACT"; +// SequenceOverlap result = Overlapper::ageAlignSuffix(s1, s2, ScoreParam(1, -1, 2, 4)); +// LINFO << result; +// return 0; + + parseOptions(argc, argv); + + if (opt::bLearnInsert) { + std::cout << "Estimate the mean and standard deviation of insert size:" << std::endl; + BamStatCalculator calc(opt::bamFile); + opt::insertMean = calc.getInsertMean(); + opt::insertSd = calc.getInsertSd(); + std::cout << "Mean: " << opt::insertMean << std::endl; + std::cout << "Sd: " << opt::insertSd << std::endl; + } + +// Parameters params = { opt::allowedNum, +// opt::mode, +// opt::minOverlap, +// 1.0f - opt::errorRate, +// opt::insertMean, +// opt::insertSd }; + + ClipReader creader(opt::bamFile, opt::allowedNum, opt::mode, opt::minMapQual, opt::insertMean + DEFAULT_SD_CUTOFF * opt::insertSd); + + BamTools::BamReader bamReader; + if (!bamReader.Open(opt::bamFile)) + error("Could not open the input BAM file."); + if (!bamReader.LocateIndex()) + error("Could not locate the index file"); + + FaidxWrapper faidx(opt::refFile); + + int insLength = opt::insertMean + 3 * opt::insertSd; + double identityRate = 1.0f - opt::errorRate; + + std::vector deletions; + +// Timer* pTimer = new Timer("Preprocessing split reads"); + Timer* pTimer = new Timer("Calling deletions"); + AbstractClip *pClip; +// std::vector clips; + while ((pClip = creader.nextClip())) { +// clips.push_back(pClip); + try { + auto del = pClip->call(bamReader, faidx, insLength, opt::minOverlap, identityRate, opt::minMapQual); + deletions.push_back(del); + } catch (ErrorException& ex) { + // std::cout << ex.getMessage() << std::endl; + } + } + delete pTimer; + +// std::cout << "# Soft-clipping reads: " << clips.size() << std::endl; + +/* + sort(clips.begin(), clips.end(), + [](AbstractClip* pc1, AbstractClip* pc2){ return pc1->getClipPosition() < pc2->getClipPosition(); }); + + size_t k = 50; + for (size_t i = 0; i < clips.size() - 1; ++i) { + for (size_t j = i + 1; j < std::min(i + k, clips.size()); ++j) { + if (clips[i]->hasConflictWith(clips[j])) { + clips[i]->setConflictFlag(true); + clips[j]->setConflictFlag(true); + } + } + } + + std::cout << "#Reads with soft-clipping (original): " << clips.size() << std::endl; + + std::vector newClips; + std::copy_if(clips.begin(), clips.end(), back_inserter(newClips), + [](AbstractClip* pc){ return !pc->getConflictFlag(); }); + + std::cout << "#Reads with soft-clipping after resolving conflicts: " << newClips.size() << std::endl; + + std::vector > clipClusters; + cluster(clips, clipClusters, + [](AbstractClip* pc1, AbstractClip* pc2){ return pc1->getClipPosition() == pc2->getClipPosition(); }); + + std::cout << "#Reads with soft-clipping after clustering: " << clipClusters.size() << std::endl; + + std::vector finalClips; + finalClips.reserve(clipClusters.size()); + std::transform(clipClusters.begin(), clipClusters.end(), back_inserter(finalClips), + [](const std::vector& v){ return v[v.size()/2]; }); +*/ + + /* + pTimer = new Timer("Calling deletions"); + for (auto pClip: clips) { +// if (pClip->getConflictFlag()) continue; + try { + auto del = pClip->call(bamReader, faidx, insLength, opt::minOverlap, identityRate, opt::minMapQual); + deletions.push_back(del); + } catch (ErrorException& ex) { + // std::cout << ex.getMessage() << std::endl; + } + } + delete pTimer; + */ + + if (deletions.empty()) { + std::cout << "No deletion was found." << std::endl; + return 0; + } + + pTimer = new Timer("Merging deletions"); + std::sort(deletions.begin(), deletions.end()); + deletions.erase(std::unique(deletions.begin(), deletions.end()), deletions.end()); + +// std::vector > delClusters; + +// cluster(deletions, delClusters, +// [](const Deletion& d1, const Deletion& d2){ return d1.overlaps(d2); }); + + std::vector finalDels; + merge(deletions, finalDels, + [](const Deletion& d1, const Deletion& d2){ return d1.overlaps(d2); }); +// finalDels.reserve(delClusters.size()); +// for (auto &clu: delClusters) { +// finalDels.push_back(clu[0]); + /* + if (clu.size() == 1) finalDels.push_back(clu[0]); + else { + Deletion d(clu[0].getReferenceName(), + clu[0].getStart1(), + clu[clu.size()-1].getEnd1(), + clu[0].getStart2(), + clu[clu.size()-1].getEnd2(), + clu[0].getLength(), + clu[0].getFromTag()); + finalDels.push_back(d); + } + */ +// } + delete pTimer; + + output(opt::outFile, finalDels); + + return 0; +} + +// +// Handle command line arguments +// +void parseOptions(int argc, char** argv) +{ + bool bInsertMean = false; + bool bInsertSd = false; + bool die = false; + for (char c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) + { + std::istringstream arg(optarg != NULL ? optarg : ""); + switch (c) + { + case 'n': arg >> opt::allowedNum; break; + case 'm': arg >> opt::minOverlap; break; + case 'q': arg >> opt::minMapQual; break; + case 'r': arg >> opt::refFile; break; + case 'o': arg >> opt::outFile; break; + case 'e': arg >> opt::errorRate; break; + case '?': die = true; break; + case 'v': opt::verbose++; break; + case 'i': arg >> opt::insertMean; bInsertMean = true; break; + case 's': arg >> opt::insertSd; bInsertSd = true; break; + case OPT_ENHANCED_MODE: opt::mode = 1; break; + case OPT_HELP: + std::cout << DFINDER_USAGE_MESSAGE; + exit(EXIT_SUCCESS); + case OPT_VERSION: + std::cout << DFINDER_VERSION_MESSAGE; + exit(EXIT_SUCCESS); + } + } + + if (argc - optind < 1) + { + std::cerr << PROGRAM_NAME ": missing arguments\n"; + die = true; + } + else if (argc - optind > 1) + { + std::cerr << PROGRAM_NAME ": too many arguments\n"; + die = true; + } + + if (bInsertMean & bInsertSd) { + opt::bLearnInsert = false; + } + + if (bInsertMean ^ bInsertSd) { + std::cerr << PROGRAM_NAME ": the mean and standard deviation of insert size must be specified together\n"; + die = true; + } + + if(opt::errorRate > 1.0f) + { + std::cerr << PROGRAM_NAME ": invalid error-rate parameter: " << opt::errorRate << "\n"; + die = true; + } + + if(opt::refFile.empty()) + { + std::cerr << PROGRAM_NAME ": the reference file must be specified\n"; + die = true; + } + + if (die) + { + std::cout << "\n" << DFINDER_USAGE_MESSAGE; + exit(EXIT_FAILURE); + } + + // Validate parameters + if(opt::errorRate <= 0) + opt::errorRate = 0.0f; + + // Parse the input filename + opt::bamFile = argv[optind++]; + + std::string out_prefix = stripFilename(opt::bamFile); + if(opt::outFile.empty()) + { + opt::outFile = out_prefix + ".bedpe"; + } + +} + +void output(const std::string &filename, const std::vector &dels) { + std::ofstream out(filename.c_str()); + size_t i = 1; + std::for_each(std::begin(dels), std::end(dels), [&i, &out](const Deletion &d) + {out << d << "\tDEL." << i << "." << d.getFromTag() << std::endl; i++;}); +} diff --git a/range.cpp b/range.cpp new file mode 100644 index 0000000..7a2ec59 --- /dev/null +++ b/range.cpp @@ -0,0 +1,89 @@ +#include "range.h" +#include +#include +#include +#include +#include + +using namespace std; + +void clusterRanges(const vector &ranges, std::vector &clusters) +{ + + vector endPoints; + for (size_t i = 0; i < ranges.size(); ++i) { + endPoints.push_back({ranges[i].start, i, true}); + endPoints.push_back({ranges[i].end, i, false}); + } + + sort(endPoints.begin(), endPoints.end()); + set usedIds; + queue buffer; + + for (auto it = endPoints.begin(); it != endPoints.end(); ++it) { + if ((*it).isStart) buffer.push((*it).ownerId); + else { + if (usedIds.count((*it).ownerId)) continue; + IdCluster clu; + while (!buffer.empty()) { + clu.push_back(buffer.front()); + usedIds.insert(buffer.front()); + buffer.pop(); + } + if (!clu.empty()) clusters.push_back(clu); + } + } + IdCluster clu; + while (!buffer.empty()) { + clu.push_back(buffer.front()); + usedIds.insert(buffer.front()); + buffer.pop(); + } + if (!clu.empty()) clusters.push_back(clu); + +} + + +void append(size_t startIndex, size_t endIndex, std::vector &clusters) +{ + IdCluster buffer(endIndex - startIndex); + std::iota(std::begin(buffer), std::end(buffer), startIndex); + clusters.push_back(buffer); +} + +void clusterRanges2(const vector &ranges, std::vector &clusters) +{ + size_t startIndex = 0; + + for (size_t i = 1; i < ranges.size(); ++i) { + if (!ranges[i-1].overlaps(ranges[i])) { + append(startIndex, i, clusters); + startIndex = i; + } + } + append(startIndex, ranges.size(), clusters); + +} + + +int IRange::length() const +{ + return end - start + 1; +} + +bool IRange::operator<(const IRange &other) const +{ + if (start != other.start) return start < other.start; + return end < other.end; +} + +bool IRange::overlaps(const IRange &other) const +{ + return (start >= other.start && start < other.end) || + (other.start >= start && other.start < end); +} + +bool IRangeEndPoint::operator<(const IRangeEndPoint &other) const +{ + return position < other.position; +} diff --git a/range.h b/range.h new file mode 100644 index 0000000..caebc1f --- /dev/null +++ b/range.h @@ -0,0 +1,30 @@ +#ifndef RANGE_H +#define RANGE_H + +#include +#include + +struct IRange { + int start; + int end; + + int length() const; + + bool operator<(const IRange &other) const; + bool overlaps(const IRange& other) const; +}; + +struct IRangeEndPoint { + int position; + std::size_t ownerId; + bool isStart; + + bool operator<(const IRangeEndPoint &other) const; +}; + +typedef std::vector IdCluster; + +void clusterRanges(const std::vector &ranges, std::vector &clusters); +void clusterRanges2(const std::vector &ranges, std::vector &clusters); + +#endif // RANGE_H