diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..49347c6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +AllTests +dfinder +*.pyc +*.bam +*.o +*~ +result* +output +CMakeLists.txt.user +build/ diff --git a/BamStatCalculator.cpp b/BamStatCalculator.cpp new file mode 100644 index 0000000..0e014e1 --- /dev/null +++ b/BamStatCalculator.cpp @@ -0,0 +1,70 @@ +#include "BamStatCalculator.h" +#include "error.h" + +#include +#include +#include + +using namespace std; +using namespace BamTools; + +BamStatCalculator::BamStatCalculator(const string &filename) : + insertMean(-1), insertSd(-1) +{ + if (!reader.Open(filename)) + error("Could not open the input BAM file."); + loadInserts(); +} + +BamStatCalculator::~BamStatCalculator() +{ + reader.Close(); +} + +int BamStatCalculator::getInsertMean() +{ + if (insertMean == -1) { + insertMean = mean(); + } + return insertMean; +} + +int BamStatCalculator::getInsertSd() +{ + if (insertSd == -1) { + insertSd = sd(); + } + return insertSd; +} + +void BamStatCalculator::loadInserts() +{ + BamAlignment al; + size_t cnt = 0; + while (reader.GetNextAlignmentCore(al) && cnt < 10000) + { + if (al.IsProperPair() && al.MatePosition > al.Position) + { + uint64_t insert = al.MatePosition + al.Length - al.Position; + if (insert < 10000) { + inserts.push_back(insert); + cnt++; + } + } + } +} + +int BamStatCalculator::mean() +{ + return accumulate(inserts.begin(), inserts.end(), 0) / inserts.size(); +} + + +int BamStatCalculator::sd() +{ + int m = getInsertMean(); + vector temp; + transform(inserts.begin(), inserts.end(), back_inserter(temp), [](int x) { return x*x; }); + uint32_t sum = accumulate(temp.begin(), temp.end(), 0); + return sqrt( sum / temp.size() - m * m); +} diff --git a/BamStatCalculator.h b/BamStatCalculator.h new file mode 100644 index 0000000..dbc40e7 --- /dev/null +++ b/BamStatCalculator.h @@ -0,0 +1,28 @@ +#ifndef BAMSTATCALCULATOR_H +#define BAMSTATCALCULATOR_H + +#include "api/BamReader.h" +#include +#include + +class BamStatCalculator +{ +public: + BamStatCalculator(const std::string& filename); + virtual ~BamStatCalculator(); + + int getInsertMean(); + int getInsertSd(); + +private: + void loadInserts(); + int mean(); + int sd(); + + BamTools::BamReader reader; + std::vector inserts; + int insertMean; + int insertSd; +}; + +#endif // BAMSTATCALCULATOR_H diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..9653cb9 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 2.8) + +project(sprites) + +include_directories($ENV{BAMTOOLS_HOME}/include $ENV{HTSLIB_HOME}) +#link_directories($ENV{BAMTOOLS_HOME}/lib $ENV{HTSLIB_HOME}) +add_definitions(-std=c++0x) + +add_executable(sprites main.cpp error.cpp Helper.cpp +Deletion.cpp Thirdparty/overlapper.cpp BamStatCalculator.cpp ClipReader.cpp clip.cpp FaidxWrapper.cpp range.cpp) +target_link_libraries(sprites $ENV{HTSLIB_HOME}/libhts.a $ENV{BAMTOOLS_HOME}/lib/libbamtools.a pthread z) + +set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g -O2 -Wall") + +if(CMAKE_BUILD_TYPE MATCHES DEBUG) + message(${CMAKE_CXX_FLAGS_DEBUG}) +else(CMAKE_BUILD_TYPE MATCHES DEBUG) + message(${CMAKE_CXX_FLAGS_RELEASE}) +endif(CMAKE_BUILD_TYPE MATCHES DEBUG) + diff --git a/ClipReader.cpp b/ClipReader.cpp new file mode 100644 index 0000000..210a317 --- /dev/null +++ b/ClipReader.cpp @@ -0,0 +1,103 @@ +#include "ClipReader.h" +#include "error.h" +#include "api/BamAlgorithms.h" + +using namespace std; +using namespace BamTools; + +ClipReader::ClipReader(const string &filename, int allowedNum, int mode, int minMapQual, int isizeCutoff) + : allowedNum(allowedNum), mode(mode), minMapQual(minMapQual), isizeCutoff(isizeCutoff) +{ + if (!reader.Open(filename)) + error("Could not open the input BAM file."); + if (!reader.LocateIndex()) + error("Could not locate the index file"); +} + +ClipReader::~ClipReader() +{ + reader.Close(); +} + +bool ClipReader::setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition) +{ + return reader.SetRegion(leftRefId, leftPosition, rightRefId, rightPosition); +} + +int ClipReader::getReferenceId(const string &referenceName) +{ + return reader.GetReferenceID(referenceName); +} + +string ClipReader::getReferenceName(int referenceId) +{ + assert(referenceId >= 0 && referenceId < reader.GetReferenceCount()); + return reader.GetReferenceData()[referenceId].RefName; +} + +AbstractClip *ClipReader::nextClip() { + BamAlignment al; + while (reader.GetNextAlignment(al)) { + vector clipSizes, readPositions, genomePositions; +// if (!al.GetSoftClips(clipSizes, readPositions, genomePositions)) continue; + if (al.MapQuality < minMapQual || !al.GetSoftClips(clipSizes, readPositions, genomePositions)) continue; + int size = clipSizes.size(); + + if (al.IsProperPair()) { + if (!al.IsReverseStrand() && al.Position == genomePositions[0] && + clipSizes[0] >= allowedNum && + (size == 1 || + (size == 2 && clipSizes[1] <= 5))) { + return new ForwardBClip(al.RefID, + al.Position + 1, + genomePositions[0] + 1, + al.MatePosition + 1, + al.QueryBases, + al.CigarData); + } + if (al.IsReverseStrand() && al.Position != genomePositions[size - 1] && + clipSizes[size - 1] >= allowedNum && + (size == 1 || + (size == 2 && clipSizes[0] <= 5))) { + return new ReverseEClip(al.RefID, + al.Position + 1, + genomePositions[size - 1] + 1, + al.MatePosition + 1, + al.QueryBases, + al.CigarData); + } + } + + if (inEnhancedMode()) { + if (al.RefID != al.MateRefID || abs(al.InsertSize) <= isizeCutoff) + continue; + if ((al.AlignmentFlag == 161 || al.AlignmentFlag == 97) && al.Position < al.MatePosition && + clipSizes[size - 1] >= allowedNum && + (size == 1 || (size == 2 && clipSizes[0] <= 5))) { + return new ForwardEClip(al.RefID, + al.Position + 1, + genomePositions[size - 1] + 1, + al.MatePosition + 1, + al.QueryBases, + al.CigarData); + } + if ((al.AlignmentFlag == 81 || al.AlignmentFlag == 145) && al.Position > al.MatePosition && + clipSizes[0] >= allowedNum && + (size == 1 || (size == 2 && clipSizes[1] <= 5))) { + return new ReverseBClip(al.RefID, + al.Position + 1, + genomePositions[0] + 1, + al.MatePosition + 1, + al.QueryBases, + al.CigarData); + } + } + + } + return NULL; +} + +bool ClipReader::inEnhancedMode() const +{ + return mode == 1; +} diff --git a/ClipReader.h b/ClipReader.h new file mode 100644 index 0000000..c22fed6 --- /dev/null +++ b/ClipReader.h @@ -0,0 +1,32 @@ +#ifndef CLIPREADER_H +#define CLIPREADER_H + +#include "clip.h" + +class ClipReader +{ +public: + // 0 indicates the standard mode and 1 indicates the enhanced mode, which reads reads of type 2 besides type 1 + ClipReader(const std::string& filename, int allowedNum, int mode, int minMapQual, int isizeCutoff); + virtual ~ClipReader(); + + bool setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition); + + int getReferenceId(const std::string& referenceName); + std::string getReferenceName(int referenceId); + + int getAllowedNum() const; + + AbstractClip* nextClip(); + +private: + BamTools::BamReader reader; + int allowedNum; + int mode; + int minMapQual; + int isizeCutoff; + + bool inEnhancedMode() const; +}; + +#endif // CLIPREADER_H diff --git a/Deletion.cpp b/Deletion.cpp new file mode 100644 index 0000000..4b20325 --- /dev/null +++ b/Deletion.cpp @@ -0,0 +1,71 @@ +#include "Deletion.h" +#include "Helper.h" +#include +#include + +using namespace std; + +Deletion::Deletion(const string &referenceName, + int start1, + int end1, + int start2, + int end2, + int length, + const string& fromTag) : + referenceName(referenceName), + start1(start1), + end1(end1), + start2(start2), + end2(end2), + length(length), + fromTag(fromTag) { + assert(checkRep()); +} + +Deletion::~Deletion() { +} + +string Deletion::toBedpe() const { + stringstream fmt; + fmt << referenceName << "\t" << start1 - 1 << "\t" << end1 << "\t" + << referenceName << "\t" << start2 - 1 << "\t" << end2; + return fmt.str(); +} + +bool Deletion::overlaps(const Deletion &other) const +{ + if (referenceName != other.referenceName) return false; + return ((start1-1 >= other.start1-1 && start1-1 <= other.end1) || + (other.start1-1 >= start1-1 && other.start1-1 <= end1)) && + ((start2-1 >= other.start2-1 && start2-1 <= other.end2) || + (other.start2-1 >= start2-1 && other.start2-1 <= end2)); +} + +bool Deletion::operator<(const Deletion &other) const +{ + if (referenceName != other.referenceName) return referenceName < other.referenceName; + if (start1 != other.start1) return start1 < other.start1; + if (start2 != other.start2) return start2 < other.start2; + if (end1 != other.end1) return end1 < other.end1; + return end2 < other.end2; +} + +bool Deletion::operator==(const Deletion &other) const +{ + return referenceName == other.referenceName && + start1 == other.start1 && start2 == other.start2 && + end1 == other.end1 && end2 == other.end2; +} + +std::ostream& operator <<(ostream &stream, const Deletion &del) +{ + stream << del.toBedpe(); + return stream; +} + +bool Deletion::checkRep() const +{ + return (start1 <= end1) && + (start2 <= end2) && + (length <= Helper::SVLEN_THRESHOLD); +} diff --git a/Deletion.h b/Deletion.h new file mode 100644 index 0000000..0c1504d --- /dev/null +++ b/Deletion.h @@ -0,0 +1,54 @@ +#ifndef _DELETION_H_ +#define _DELETION_H_ + +#include +#include + +class Deletion { +public: + Deletion(const std::string& referenceName, + int start1, + int end1, + int start2, + int end2, + int length, + const std::string& fromTag); + + virtual ~Deletion(); + + std::string getReferenceName() const { return referenceName; } + + int getStart1() const { return start1; } + + int getEnd1() const { return end1; } + + int getStart2() const { return start2; } + + int getEnd2() const { return end2; } + + int getLength() const { return length; } + + std::string getFromTag() const { return fromTag; } + + std::string toBedpe() const; + + friend std::ostream& operator <<(std::ostream& stream, const Deletion& del); + + bool overlaps(const Deletion &other) const; + bool operator<(const Deletion &other) const; + bool operator==(const Deletion &other) const; + +private: + std::string referenceName; + int start1; + int end1; + int start2; + int end2; + int length; + std::string fromTag; + + bool checkRep() const; + +}; + +#endif /* _DELETION_H_ */ diff --git a/FaidxWrapper.cpp b/FaidxWrapper.cpp new file mode 100644 index 0000000..c7167f8 --- /dev/null +++ b/FaidxWrapper.cpp @@ -0,0 +1,31 @@ +#include "FaidxWrapper.h" +#include "error.h" +#include + +using namespace std; + +FaidxWrapper::FaidxWrapper(const std::string &fasta) +{ + fai = fai_load(fasta.c_str()); + if (fai == NULL) error("Cannot load the indexed fasta."); +} + +FaidxWrapper::~FaidxWrapper() +{ + if (fai != NULL) fai_destroy(fai); +} + +int FaidxWrapper::size() +{ + return faidx_nseq(fai); +} + +string FaidxWrapper::fetch(const string &chrom, int start, int end) +{ + int len; + char *s = faidx_fetch_seq(fai, (char *)chrom.c_str(), start - 1, end - 1, &len); + if (s == NULL) error("cannot fetch the reference sequence"); + string str(s); + transform(str.begin(), str.end(), str.begin(), ::toupper); + return str; +} diff --git a/FaidxWrapper.h b/FaidxWrapper.h new file mode 100644 index 0000000..fbe2541 --- /dev/null +++ b/FaidxWrapper.h @@ -0,0 +1,19 @@ +#ifndef FAIDXWRAPPER_H +#define FAIDXWRAPPER_H + +#include "htslib/faidx.h" +#include + +class FaidxWrapper +{ +public: + FaidxWrapper(const std::string& fasta); + virtual ~FaidxWrapper(); + int size(); + std::string fetch(const std::string& chrom, int start, int end); + +private: + faidx_t *fai; +}; + +#endif // FAIDXWRAPPER_H diff --git a/Helper.cpp b/Helper.cpp new file mode 100644 index 0000000..1a1b6af --- /dev/null +++ b/Helper.cpp @@ -0,0 +1,56 @@ +#include "Helper.h" + +using namespace std; + +// Strip the leading directories and +// the last trailling suffix from a filename +string stripFilename(const string& filename) { + string out = stripDirectories(filename); + return stripExtension(out); +} + +// Remove a single file extension from the filename +string stripExtension(const string& filename) { + size_t suffixPos = filename.find_last_of('.'); + if(suffixPos == string::npos) + return filename; // no suffix + else + return filename.substr(0, suffixPos); +} + +// Strip the leadering directories from a filename +string stripDirectories(const string& filename) { + size_t lastDirPos = filename.find_last_of('/'); + + if(lastDirPos == string::npos) + return filename; // no directories + else + return filename.substr(lastDirPos + 1); +} + + + +std::string Helper::getReferenceName(BamTools::BamReader &reader, int referenceId) { + assert(referenceId >= 0 && referenceId < reader.GetReferenceCount()); + return reader.GetReferenceData()[referenceId].RefName; +} + + +int numOfTheLongestPrefix(const string &s1, const string &s2) +{ + assert(s1.size() == s2.size()); + for (int i = 0; i < s1.size(); i++) { + if (s1[i] != s2[i]) return i; + } + return 0; +} + + +int numOfThelongestSuffix(const string &s1, const string &s2) +{ + assert(s1.size() == s2.size()); + for (int i = 0; i < s1.size(); i++) { + if (s1[s1.size() - 1 - i] != s2[s1.size() - 1 - i]) return i; + } + return 0; +} diff --git a/Helper.h b/Helper.h new file mode 100644 index 0000000..f4a488b --- /dev/null +++ b/Helper.h @@ -0,0 +1,65 @@ +#ifndef HELPER_H +#define HELPER_H + +#include "api/BamReader.h" +#include +#include + +// +// Functions +// +std::string stripFilename(const std::string& filename); +std::string stripExtension(const std::string& filename); +std::string stripDirectories(const std::string& filename); +int numOfTheLongestPrefix(const std::string& s1, const std::string& s2); +int numOfThelongestSuffix(const std::string& s1, const std::string& s2); + +int extend(const std::string& read, int offset, int leftOrigin, int rightOrigin); + +template +void cluster(const std::vector& orig, std::vector >& clusters, Compare comp) { + std::vector buffer; + + auto first = orig.begin(); + auto last = orig.end(); + buffer.push_back(*first); + while (++first != last) { + if (!comp(*first, buffer[0])) { + clusters.push_back(buffer); + buffer.clear(); + } + buffer.push_back(*first); + } + if (!buffer.empty()) clusters.push_back(buffer); +} + +template +void merge(const std::vector& orig, std::vector& results, Compare comp) { + std::vector removed(orig.size(), false); + + for (size_t i = 0; i < orig.size() - 1; ++i) { + for (size_t j = i + 1; j < orig.size(); ++j) { + if (!removed[j] && comp(orig[i], orig[j])) { + removed[j] = true; + } + } + } + + for (size_t i = 0; i < removed.size(); ++i) { + if (!removed[i]) { + results.push_back(orig[i]); + } + } + +} + +namespace Helper { +std::string getReferenceName(BamTools::BamReader& reader, int referenceId); +const int SVLEN_THRESHOLD = -50; +const int CONFLICT_THRESHOLD = 13; + +//std::set forwardEClipNames; +//std::set reverseBClipNames; +} + +#endif // HELPER_H diff --git a/README.md b/README.md new file mode 100644 index 0000000..a187ef8 --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +#*Sprites* + +**Written by** Zhen Zhang (zhangz@csu.edu.cn) +[Jianxin Wang Lab, Central South University](http://netlab.csu.edu.cn/) + +**Please cite:** + +--- + +**Current version:** 0.3.0 + +Support for Linux and OS X + +##Summary +*Sprites* is a sv caller that specializes in detecting deletion from low-coverage sequencing data. It works by identifying split reads from alignments based on soft-clipping information. By re-aligning a split read to one of its target sequences derived from paired-end reads that span it, a deletion is predicted and breakpoint ends are pinpointed with base-pair resolution. *Sprites* uses alignments produced by BWA. Of course, it can also use those produced by other read aligners that support 5'- or 3'-end soft-clipping, like Bowtie2. It can also be extended to detect other types of sv. + +##Pre-built binaries +You can download the pre-built binaries from the [Releases page](https://github.com/zhangzhen/sprites/releases) or the links below: +- Linux 64bit: [sprites\_Linux64](https://github.com/zhangzhen/sprites/releases/download/v0.3.0/sprites\_Linux64) +- OS X: [sprites\_OSX](https://github.com/zhangzhen/sprites/releases/download/v0.3.0/sprites\_OSX) + +##Installation + +#### Requirements +- HTSlib ([http://www.htslib.org/](http://www.htslib.org/)) +- BamTools ([https://github.com/pezmaster31/bamtools](https://github.com/pezmaster31/bamtools)) +- CMake ([http://www.cmake.org](http://www.cmake.org)) + +#### Building Sprites +``` +git clone https://github.com:zhangzhen/sprites.git +cd sprites +export BAMTOOLS_HOME=/path/to/bamtools +export HTSLIB_HOME=/path/to/htslib +mkdir build +cd build +cmake .. +make +cp sprites /usr/local/bin/ +``` +##Usage +``` +sprites [options] sample.bam +``` +The input bam file is required to be sorted. + +**Options** +``` +-r FILE +``` diff --git a/SoftClipReader.cpp b/SoftClipReader.cpp new file mode 100644 index 0000000..a2f6114 --- /dev/null +++ b/SoftClipReader.cpp @@ -0,0 +1,105 @@ +#include "SoftClipReader.h" +#include "error.h" + +#include + +using namespace std; +using namespace BamTools; + +SoftClipReader::SoftClipReader(const string &filename, int minClip, int mode) : + minClip(minClip), mode(mode) { + if (!reader.Open(filename)) + error("Could not open the input BAM file."); + if (!reader.LocateIndex()) + error("Could not locate the index file"); +} + +SoftClipReader::~SoftClipReader() { + reader.Close(); +} + +int SoftClipReader::getReferenceId(const string &referenceName) { + return reader.GetReferenceID(referenceName); +} + +bool SoftClipReader::getSoftClip(SoftClip &clip) { + BamAlignment al; + while (reader.GetNextAlignment(al)) { + vector clipSizes, readPositions, genomePositions; + if (!al.GetSoftClips(clipSizes, readPositions, genomePositions)) continue; + int size = clipSizes.size(); + + if (inEnhancedMode()) { + if (!al.IsReverseStrand() && al.IsMateReverseStrand() && al.Position < al.MatePosition && + al.Position != genomePositions[size - 1] && clipSizes[size - 1] > minClip && + (size == 1 || (size == 2 && clipSizes[0] <= minClip))) { + clip = SoftClip(al.RefID, + al.Position + 1, + al.Position + 1 - ((size == 2) ? clipSizes[0] : 0), + genomePositions[size - 1] + 1, + al.MatePosition + 1, + al.IsReverseStrand(), + al.IsMateReverseStrand(), + clipSizes[size - 1], + al.QueryBases); + return true; + } + if (al.IsReverseStrand() && !al.IsMateReverseStrand() && al.Position > al.MatePosition && + al.Position == genomePositions[0] && clipSizes[0] > minClip && + (size == 1 || (size == 2 && clipSizes[1] <= minClip))) { + clip = SoftClip(al.RefID, + al.Position + 1, + al.Position + 1 - clipSizes[0], + genomePositions[0] + 1, + al.MatePosition + 1, + al.IsReverseStrand(), + al.IsMateReverseStrand(), + clipSizes[0], + al.QueryBases); + return true; + } + } else if (al.IsProperPair()) { + if (!al.IsReverseStrand() && al.Position == genomePositions[0] && + clipSizes[0] > minClip && + (size == 1 || + (size == 2 && clipSizes[1] <= minClip))) { + clip = SoftClip(al.RefID, + al.Position + 1, + al.Position - clipSizes[0] + 1, + genomePositions[0] + 1, + al.MatePosition + 1, + al.IsReverseStrand(), + al.IsMateReverseStrand(), + clipSizes[0], + al.QueryBases); + return true; + } + if (al.IsReverseStrand() && al.Position != genomePositions[size - 1] && + clipSizes[size - 1] > minClip && + (size == 1 || + (size == 2 && clipSizes[0] <= minClip))) { + clip = SoftClip(al.RefID, + al.Position + 1, + al.Position + 1 - ((size == 2) ? clipSizes[0] : 0), + genomePositions[size - 1] + 1, + al.MatePosition + 1, + al.IsReverseStrand(), + al.IsMateReverseStrand(), + clipSizes[size - 1], + al.QueryBases); + return true; + } + } + + } + + return false; +} + +bool SoftClipReader::setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition) { + return reader.SetRegion(leftRefId, leftPosition, rightRefId, rightPosition); +} + +bool SoftClipReader::inEnhancedMode() const { + return mode == 1; +} diff --git a/SoftClipReader.h b/SoftClipReader.h new file mode 100644 index 0000000..a1b9317 --- /dev/null +++ b/SoftClipReader.h @@ -0,0 +1,31 @@ +#ifndef SOFTCLIPREADER_H +#define SOFTCLIPREADER_H + +#include "SoftClip.h" +#include "api/BamReader.h" + +#include + +class SoftClipReader +{ +public: + // 0 indicates the standard mode and 1 indicates the enhanced mode, which reads reads of type 2 besides type 1 + SoftClipReader(const std::string& filename, int minClip, int mode); + virtual ~SoftClipReader(); + + int getReferenceId(const std::string& referenceName); + + bool getSoftClip(SoftClip& clip); + bool setRegion(int leftRefId, int leftPosition, int rightRefId, int rightPosition); + + int getMinClip() const; + +private: + BamTools::BamReader reader; + int minClip; + int mode; + + bool inEnhancedMode() const; +}; + +#endif // SOFTCLIPREADER_H diff --git a/Thirdparty/Timer.h b/Thirdparty/Timer.h new file mode 100644 index 0000000..7179309 --- /dev/null +++ b/Thirdparty/Timer.h @@ -0,0 +1,56 @@ +//----------------------------------------------- +// Copyright 2009 Wellcome Trust Sanger Institute +// Written by Jared Simpson (js18@sanger.ac.uk) +// Released under the GPL license +//----------------------------------------------- +// +// Timer - Simple object to that prints the wallclock +// length of its lifetime +// +#ifndef TIMER_H +#define TIMER_H + +#include +#include + +class Timer +{ + public: + Timer(std::string s, bool silent = false) : m_desc(s), m_silent(silent) + { + reset(); + } + + ~Timer() + { + if(!m_silent) + fprintf(stderr, "[timer - %s] wall clock: %.2lfs CPU: %.2lfs\n", m_desc.c_str(), getElapsedWallTime(), getElapsedCPUTime()); + } + + double getElapsedWallTime() const + { + timeval now; + gettimeofday(&now, NULL); + return (now.tv_sec - m_wallStart.tv_sec) + (double(now.tv_usec - m_wallStart.tv_usec) / 1000000); + } + + double getElapsedCPUTime() const + { + double now = clock(); + return (now - m_cpuStart) / CLOCKS_PER_SEC; + } + + void reset() { gettimeofday(&m_wallStart, NULL); m_cpuStart = clock(); } + + private: + std::string m_desc; + + // Track the wall-clock and CPU times + // CPU time includes all threads + timeval m_wallStart; + double m_cpuStart; + + bool m_silent; +}; + +#endif diff --git a/Thirdparty/overlapper.cpp b/Thirdparty/overlapper.cpp new file mode 100644 index 0000000..e0de542 --- /dev/null +++ b/Thirdparty/overlapper.cpp @@ -0,0 +1,1417 @@ +//------------------------------------------------------------------------------- +// +// overlapper - String-string overlap algorithm +// +// Copyright (C) 2011 Jared Simpson (jared.simpson@gmail.com) +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// ------------------------------------------------------------------------------ +#include "overlapper.h" +#include "../error.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +OverlapperParams default_params = { 2, -6, -3 }; +OverlapperParams ungapped_params = { 2, -10000, -3 }; + + + + +// +#define max3(x,y,z) std::max(std::max(x,y), z) +//#define DEBUG_OVERLAPPER 1 +//#define DEBUG_EXTEND 1 + + +// +SequenceInterval::SequenceInterval() : start(0), end(-1) +{ + +} + +SequenceOverlap::SequenceOverlap() +{ + length[0] = length[1] = 0; + score = -1; + edit_distance = -1; + total_columns = -1; +} + +// +bool SequenceOverlap::isValid() const +{ + return !cigar.empty() && match[0].isValid() && match[1].isValid(); +} + +bool SequenceOverlap::isQualified(int minOverlap, double minIdentity) const +{ + if (getOverlapLength() >= minOverlap && + getPercentIdentity() >= minIdentity * 100) { + return true; + } + return false; +} + +// +double SequenceOverlap::getPercentIdentity() const +{ + return (double)(total_columns - edit_distance) * 100.0f / total_columns; +} + +// +std::ostream& operator<<(std::ostream& out, const SequenceOverlap& overlap) +{ + out << "[" << overlap.match[0].start << " " << overlap.match[0].end << "] "; + out << "[" << overlap.match[1].start << " " << overlap.match[1].end << "] "; + out << "C:" << overlap.cigar; + return out; +} + +void SequenceOverlap::makePaddedMatches(const std::string& s1, const std::string& s2, + std::string* p1, std::string* p2) const +{ + assert(isValid() && p1 != NULL && p2 != NULL); + + // Process the matching region using the cigar operations + size_t current_1 = match[0].start; + size_t current_2 = match[1].start; + + std::stringstream cigar_parser(cigar); + int length = -1; + char code; + while(cigar_parser >> length >> code) { + assert(length > 0); + if(code == 'M') { + p1->append(s1.substr(current_1, length)); + p2->append(s2.substr(current_2, length)); + current_1 += length; + current_2 += length; + } + else if(code == 'D') { + p1->append(s1.substr(current_1, length)); + p2->append(length, '-'); + current_1 += length; + } + else if(code == 'I') { + p1->append(length, '-'); + p2->append(s2.substr(current_2, length)); + current_2 += length; + } + length = -1; + } +} + +// +int SequenceOverlap::calculateEditDistance(const std::string& s1, const std::string& s2) const +{ + // Recalculate the edit distance between the pair of strings, given this alignment + int new_edit_distance = 0; + + // Process the matching region using the cigar operations + size_t current_1 = match[0].start; + size_t current_2 = match[1].start; + + std::stringstream cigar_parser(cigar); + int length = -1; + char code; + while(cigar_parser >> length >> code) { + assert(length > 0); + if(code == 'M') { + for(int i = 0; i < length; ++i) { + if(s1[current_1 + i] != s2[current_2 + i]) + new_edit_distance++; + } + current_1 += length; + current_2 += length; + } + else if(code == 'D') { + new_edit_distance += length; + current_1 += length; + } + else if(code == 'I') { + new_edit_distance += length; + current_2 += length; + } + length = -1; + } + + return new_edit_distance; +} + +// +int SequenceOverlap::calculateTotalColumns() const +{ + // Recalculate the edit distance between the pair of strings, given this alignment + int total_columns = 0; + + std::stringstream cigar_parser(cigar); + int length = -1; + char code; + while(cigar_parser >> length >> code) { + assert(length > 0); + total_columns += length; + } + + return total_columns; +} + +// +void SequenceOverlap::printAlignment(const std::string& s1, const std::string& s2) const +{ + assert(isValid()); + + std::string out_1; + std::string out_2; + + // Print out the initial part of the strings, which do not match. + // Typically this is the overhanging portion of one of the strings. + std::string leader_1 = s1.substr(0, match[0].start); + std::string leader_2 = s2.substr(0, match[1].start); + + // Pad the beginning of the output strings with spaces to align + if(leader_1.size() < leader_2.size()) + out_1.append(leader_2.size() - leader_1.size(), ' '); + + if(leader_2.size() < leader_1.size()) + out_2.append(leader_1.size() - leader_2.size(), ' '); + + out_1.append(leader_1); + out_2.append(leader_2); + + // Process the matching region using the cigar operations + size_t current_1 = match[0].start; + size_t current_2 = match[1].start; + + std::stringstream cigar_parser(cigar); + int length = -1; + char code; + while(cigar_parser >> length >> code) { + assert(length > 0); + if(code == 'M') { + out_1.append(s1.substr(current_1, length)); + out_2.append(s2.substr(current_2, length)); + current_1 += length; + current_2 += length; + } + else if(code == 'D') { + out_1.append(s1.substr(current_1, length)); + out_2.append(length, '-'); + current_1 += length; + } + else if(code == 'I') { + out_1.append(length, '-'); + out_2.append(s2.substr(current_2, length)); + current_2 += length; + } + length = -1; + } + + // Append the remainder of each string + out_1.append(s1.substr(current_1)); + out_2.append(s2.substr(current_2)); + + // Print the output strings and split long lines + int MAX_COLUMNS = 120; + size_t total_columns = std::max(out_1.size(), out_2.size()); + for(size_t i = 0; i < total_columns; i += MAX_COLUMNS) { + std::string sub_1; + std::string sub_2; + if(i < out_1.size()) + sub_1 = out_1.substr(i, MAX_COLUMNS); + if(i < out_2.size()) + sub_2 = out_2.substr(i, MAX_COLUMNS); + + std::cout << "S1\t" << sub_1 << "\n"; + std::cout << "S2\t" << sub_2 << "\n"; + std::cout << "\n"; + } + std::cout << "Cigar: " << cigar << "\n"; + std::cout << "Score: " << score << "\n"; + + printf("Identity: %2.2lf\n", getPercentIdentity()); +} + +typedef std::vector DPCells; +typedef std::vector DPMatrix; + +// +SequenceOverlap Overlapper::computeOverlap(const std::string& s1, const std::string& s2, const OverlapperParams params) +{ + // Exit with invalid intervals if either string is zero length + SequenceOverlap output; + if(s1.empty() || s2.empty()) { + std::cerr << "Overlapper::computeOverlap error: empty input sequence\n"; + exit(EXIT_FAILURE); + } + + // Initialize the scoring matrix + size_t num_columns = s1.size() + 1; + size_t num_rows = s2.size() + 1; + + DPMatrix score_matrix; + score_matrix.resize(num_columns); + for(size_t i = 0; i < score_matrix.size(); ++i) + score_matrix[i].resize(num_rows); + + // Calculate scores + for(size_t i = 1; i < num_columns; ++i) { + for(size_t j = 1; j < num_rows; ++j) { + // Calculate the score for entry (i,j) + int idx_1 = i - 1; + int idx_2 = j - 1; + int diagonal = score_matrix[i-1][j-1] + (s1[idx_1] == s2[idx_2] ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int left = score_matrix[i-1][j] + params.gap_penalty; + + score_matrix[i][j] = max3(diagonal, up, left); + } + } + + // The location of the highest scoring match in the + // last row or last column is the maximum scoring overlap + // for the pair of strings. We start the backtracking from + // that cell + int max_row_value = std::numeric_limits::min(); + int max_column_value = std::numeric_limits::min(); + size_t max_row_index = 0; + size_t max_column_index = 0; + + // Check every column of the last row + // The first column is skipped to avoid empty alignments + for(size_t i = 1; i < num_columns; ++i) { + int v = score_matrix[i][num_rows - 1]; + if(score_matrix[i][num_rows - 1] > max_row_value) { + max_row_value = v; + max_row_index = i; + } + } + + // Check every row of the last column + for(size_t j = 1; j < num_rows; ++j) { + int v = score_matrix[num_columns - 1][j]; + if(v > max_column_value) { + max_column_value = v; + max_column_index = j; + } + } + + // Compute the location at which to start the backtrack + size_t i; + size_t j; + + if(max_column_value > max_row_value) { + i = num_columns - 1; + j = max_column_index; + output.score = max_column_value; + } + else { + i = max_row_index; + j = num_rows - 1; + output.score = max_row_value; + } + + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); +#ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(i > 0 && j > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = score_matrix[i - 1][j - 1] + (is_match ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int left = score_matrix[i-1][j] + params.gap_penalty; + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(score_matrix[i][j] == up) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(score_matrix[i][j] == left) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(score_matrix[i][j] == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + return output; +} + +SequenceOverlap Overlapper::computeOverlapSG(const std::string& s1, const std::string& s2, const OverlapperParams params) +{ + // Exit with invalid intervals if either string is zero length + SequenceOverlap output; + if(s1.empty() || s2.empty()) { + std::cerr << "Overlapper::computeOverlap error: empty input sequence\n"; + exit(EXIT_FAILURE); + } + + // Initialize the scoring matrix + size_t num_columns = s1.size() + 1; + size_t num_rows = s2.size() + 1; + + DPMatrix score_matrix; + score_matrix.resize(num_columns); + for(size_t i = 0; i < score_matrix.size(); ++i) + score_matrix[i].resize(num_rows); + + for(size_t j = 0; j < num_rows; ++j) { + score_matrix[0][j] = j*params.gap_penalty; + } + + // Calculate scores + for(size_t i = 1; i < num_columns; ++i) { + for(size_t j = 1; j < num_rows; ++j) { + // Calculate the score for entry (i,j) + int idx_1 = i - 1; + int idx_2 = j - 1; + int diagonal = score_matrix[i-1][j-1] + (s1[idx_1] == s2[idx_2] ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int gap_pen = (j == num_rows - 1) ? 0 : params.gap_penalty; + int left = score_matrix[i-1][j] + gap_pen; + + score_matrix[i][j] = max3(diagonal, up, left); + } + } + + int max_row_value; + size_t max_row_index = 0; + + // Check every column of the last row + // The first column is skipped to avoid empty alignments + for(size_t i = num_columns - 1; i > 0; --i) { + int left = score_matrix[i-1][num_rows - 1]; + if (score_matrix[i][num_rows - 1] != left) { + max_row_index = i; + max_row_value = score_matrix[i][num_rows - 1]; + break; + } + } + + // Compute the location at which to start the backtrack + size_t i = max_row_index; + size_t j = num_rows - 1; + output.score = max_row_value; + + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); +#ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(j > 0 && i > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = score_matrix[i - 1][j - 1] + (is_match ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int gap_pen = (j == num_rows - 1) ? 0 : params.gap_penalty; + int left = score_matrix[i-1][j] + gap_pen; + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(score_matrix[i][j] == up) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(score_matrix[i][j] == left) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(score_matrix[i][j] == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + return output; +} + +SequenceOverlap Overlapper::alignSuffix(const std::string& s1, const std::string& s2, const OverlapperParams params) +{ + // Exit with invalid intervals if either string is zero length + SequenceOverlap output; + if(s1.empty() || s2.empty()) { + std::cerr << "Overlapper::computeOverlapSW error: empty input sequence\n"; + exit(EXIT_FAILURE); + } + + // Initialize the scoring matrix + size_t num_columns = s1.size() + 1; + size_t num_rows = s2.size() + 1; + + DPMatrix score_matrix; + score_matrix.resize(num_columns); + for(size_t i = 0; i < score_matrix.size(); ++i) + score_matrix[i].resize(num_rows); + + // Calculate scores + for(size_t i = 1; i < num_columns; ++i) { + for(size_t j = 1; j < num_rows; ++j) { + // Calculate the score for entry (i,j) + int idx_1 = i - 1; + int idx_2 = j - 1; + int diagonal = score_matrix[i-1][j-1] + (s1[idx_1] == s2[idx_2] ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int left = score_matrix[i-1][j] + params.gap_penalty; + + score_matrix[i][j] = std::max(0, max3(diagonal, up, left)); + } + } + + // The location of the highest scoring match in the + // last row or last column is the maximum scoring overlap + // for the pair of strings. We start the backtracking from + // that cell + int max_value = std::numeric_limits::min(); + size_t max_row_index = num_rows - 1; + size_t max_column_index = 0; + + for (size_t i =1; i < num_columns; ++i) { + if (score_matrix[i][max_row_index] > max_value) { + max_value = score_matrix[i][max_row_index]; + max_column_index = i; + } + } + + // Compute the location at which to start the backtrack + size_t i = max_column_index; + size_t j = max_row_index; + + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); +#ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(i > 0 && j > 0 && score_matrix[i][j] > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = score_matrix[i - 1][j - 1] + (is_match ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int left = score_matrix[i-1][j] + params.gap_penalty; + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(score_matrix[i][j] == up) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(score_matrix[i][j] == left) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(score_matrix[i][j] == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + return output; +} + +SequenceOverlap Overlapper::computeOverlapSW2(const std::string& s1, const std::string& s2, int minOverlap, double minIdentity, const OverlapperParams params) +{ + // Exit with invalid intervals if either string is zero length + SequenceOverlap output; + if(s1.empty() || s2.empty()) { + std::cerr << "Overlapper::computeOverlapSW error: empty input sequence\n"; + exit(EXIT_FAILURE); + } + + // Initialize the scoring matrix + size_t num_columns = s1.size() + 1; + size_t num_rows = s2.size() + 1; + + DPMatrix score_matrix; + score_matrix.resize(num_columns); + for(size_t i = 0; i < score_matrix.size(); ++i) + score_matrix[i].resize(num_rows); + + // Calculate scores + for(size_t i = 1; i < num_columns; ++i) { + for(size_t j = 1; j < num_rows; ++j) { + // Calculate the score for entry (i,j) + int idx_1 = i - 1; + int idx_2 = j - 1; + int diagonal = score_matrix[i-1][j-1] + (s1[idx_1] == s2[idx_2] ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; +// int gap_pen = (j == num_rows - 1) ? 0 : params.gap_penalty; +// int left = score_matrix[i-1][j] + gap_pen; + int left = score_matrix[i-1][j] + params.gap_penalty; + + score_matrix[i][j] = std::max(0, max3(diagonal, up, left)); + } + } + + // The location of the highest scoring match in the + // last row or last column is the maximum scoring overlap + // for the pair of strings. We start the backtracking from + // that cell + + std::vector last_row_indexes(num_columns - 1); + for (size_t i = 1; i < num_columns; ++i) { + last_row_indexes[i-1] = i; + } + std::sort(last_row_indexes.begin(), last_row_indexes.end(), + [&score_matrix, num_rows](size_t i1, size_t i2) {return score_matrix[i1][num_rows - 1] > score_matrix[i2][num_rows - 1];}); + + int cnt = 0; + for (auto max_row_index: last_row_indexes) { + if (cnt >= 10) break; + auto max_row_value = score_matrix[max_row_index][num_rows - 1]; + + // Compute the location at which to start the backtrack + size_t i = max_row_index; + size_t j = num_rows - 1; + output.score = max_row_value; + + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); + #ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); + #endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(i > 0 && j > 0 && score_matrix[i][j] > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = score_matrix[i - 1][j - 1] + (is_match ? params.match_score : params.mismatch_penalty); + int up = score_matrix[i][j-1] + params.gap_penalty; + int left = score_matrix[i-1][j] + params.gap_penalty; +// int gap_pen = (j == num_rows - 1) ? 0 : params.gap_penalty; +// int left = score_matrix[i-1][j] + gap_pen; + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(score_matrix[i][j] == up) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(score_matrix[i][j] == left) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(score_matrix[i][j] == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + +// std::string s0 = "CTGCCCCAAATACAGCTACTGCCACCACCAAGGCGGCTGTTGGTGCCCTGCAGTCAACAGCCAGTCTCTTCGTGGTCTCACTCTCTCTTCTACATCTCTCC"; +// std::reverse(s0.begin(), s0.end()); + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + if (cigar.empty()) continue; + std::reverse(cigar.begin(), cigar.end()); + output.cigar = compactCigar(cigar); + +// if (s2 == s0) +// output.printAlignment(s1, s2); + + if (output.isQualified(minOverlap, minIdentity)) + return output; + + cnt++; + } + error("No overlap was found."); +} + +// Returns the index into a cell vector for for the ith column and jth row +// of a dynamic programming matrix. The band_origin gives the row in first +// column of the matrix that the bands start at. This is used to calculate +// the starting band row for each column. +inline int _getBandedCellIndex(int i, int j, int band_width, int band_origin_row) +{ + int band_start = band_origin_row + i; + int band_row_index = j - band_start; + return (band_row_index >= 0 && band_row_index < band_width) ? i * band_width + band_row_index : -1; +} + +// Returns the score for (i,j) in the +inline int _getBandedCellScore(const DPCells& cells, int i, int j, int band_width, int band_origin_row, int invalid_score) +{ + int band_start = band_origin_row + i; + int band_row_index = j - band_start; + return (band_row_index >= 0 && band_row_index < band_width) ? cells[i * band_width + band_row_index] : invalid_score; +} + +SequenceOverlap Overlapper::extendMatch(const std::string& s1, const std::string& s2, + int start_1, int start_2, int band_width) +{ + SequenceOverlap output; + int num_columns = s1.size() + 1; + int num_rows = s2.size() + 1; + + const int MATCH_SCORE = 2; + const int GAP_PENALTY = -5; + const int MISMATCH_PENALTY = -3; + + // Calculate the number of cells off the diagonal to compute + int half_width = band_width / 2; + band_width = half_width * 2 + 1; // the total number of cells per band + + // Calculate the number of columns that we need to extend to for s1 + size_t num_cells_required = num_columns * band_width; + + // Allocate bands with uninitialized scores + int INVALID_SCORE = std::numeric_limits::min(); + DPCells cells(num_cells_required, 0); + + // Calculate the band center coordinates in the first + // column of the multiple alignment. These are calculated by + // projecting the match diagonal onto the first column. It is possible + // that these are negative. + int band_center = start_2 - start_1 + 1; + int band_origin = band_center - (half_width + 1); +#ifdef DEBUG_EXTEND + printf("Match start: [%d %d]\n", start_1, start_2); + printf("Band center, origin: [%d %d]\n", band_center, band_origin); + printf("Num cells: %zu\n", cells.size()); +#endif + + // Fill in the bands column by column + for(int i = 1; i < num_columns; ++i) { + int j = band_origin + i; // start row of this band + int end_row = j + band_width; + + // Trim band coordinates to only compute valid positions + if(j < 1) + j = 1; + if(end_row > num_rows) + end_row = num_rows; + + if(end_row <= 0 || j >= num_rows || j >= end_row) + continue; // nothing to do for this column + +#ifdef DEBUG_EXTEND + printf("Filling column %d rows [%d %d]\n", i, j, end_row); +#endif + + // Fill in this band. To avoid the need to perform many tests whether a particular cell + // is stored in a band, we do some of the calculations outside of the main loop below. + // We first calculate the score for the first cell in the band. This calculation cannot + // involve the cell above the first row so we ignore it below. We then fill in the main + // part of the band, which can perform valid reads from all its neighboring cells. Finally + // we calculate the last row, which does not use the cell to its left. + + // Set up initial indices and scores + int curr_idx = _getBandedCellIndex(i, j, band_width, band_origin); + int left_idx = _getBandedCellIndex(i - 1, j, band_width, band_origin); + int diagonal_idx = _getBandedCellIndex(i - 1, j - 1, band_width, band_origin); + int diagonal_score = cells[diagonal_idx] + (s1[i - 1] == s2[j - 1] ? MATCH_SCORE : MISMATCH_PENALTY); + int left_score = left_idx != -1 ? cells[left_idx] + GAP_PENALTY : INVALID_SCORE; + int up_score = 0; + + // Set the first row score + cells[curr_idx] = std::max(left_score, diagonal_score); + +#ifdef DEBUG_EXTEND + printf("Filled [%d %d] = %d\n", i , j, cells[curr_idx]); + assert(_getBandedCellIndex(i,j, band_width, band_origin) != -1); + assert(diagonal_idx != -1); +#endif + + // Update indices + curr_idx += 1; + left_idx += 1; + diagonal_idx += 1; + j += 1; + + // Fill in the main part of the band, stopping before the last row + while(j < end_row - 1) { + +#ifdef DEBUG_EXTEND + assert(diagonal_idx == _getBandedCellIndex(i - 1, j - 1, band_width, band_origin)); + assert(left_idx == _getBandedCellIndex(i - 1, j, band_width, band_origin)); + assert(curr_idx - 1 == _getBandedCellIndex(i, j - 1, band_width, band_origin)); +#endif + + diagonal_score = cells[diagonal_idx] + (s1[i - 1] == s2[j - 1] ? MATCH_SCORE : MISMATCH_PENALTY); + left_score = cells[left_idx] + GAP_PENALTY; + up_score = cells[curr_idx - 1] + GAP_PENALTY; + cells[curr_idx] = max3(diagonal_score, left_score, up_score); + +#ifdef DEBUG_EXTEND + printf("Filled [%d %d] = %d\n", i , j, cells[curr_idx]); + assert(_getBandedCellIndex(i,j, band_width, band_origin) != -1); +#endif + // Update indices + curr_idx += 1; + left_idx += 1; + diagonal_idx += 1; + j += 1; + } + + // Fill in last row, here we ignore the left cell which is now out of band + if(j != end_row) { + diagonal_score = cells[diagonal_idx] + (s1[i - 1] == s2[j - 1] ? MATCH_SCORE : MISMATCH_PENALTY); + up_score = cells[curr_idx - 1] + GAP_PENALTY; + cells[curr_idx] = std::max(diagonal_score, up_score); +#ifdef DEBUG_EXTEND + printf("Filled [%d %d] = %d\n", i , j, cells[curr_idx]); + assert(_getBandedCellIndex(i,j, band_width, band_origin) != -1); +#endif + } + } + + // The location of the highest scoring match in the + // last row or last column is the maximum scoring overlap + // for the pair of strings. We start the backtracking from + // that cell + int max_row_value = std::numeric_limits::min(); + int max_column_value = std::numeric_limits::min(); + size_t max_row_index = 0; + size_t max_column_index = 0; + + // Check every column of the last row + // The first column is skipped to avoid empty alignments + for(int i = 1; i < num_columns; ++i) { + int v = _getBandedCellScore(cells, i, num_rows - 1, band_width, band_origin, INVALID_SCORE); + if(v > max_row_value) { + max_row_value = v; + max_row_index = i; + } + } + + // Check every row of the last column + for(int j = 1; j < num_rows; ++j) { + int v = _getBandedCellScore(cells, num_columns - 1, j, band_width, band_origin, INVALID_SCORE); + if(v > max_column_value) { + max_column_value = v; + max_column_index = j; + } + } + + // Compute the location at which to start the backtrack + size_t i; + size_t j; + + if(max_column_value > max_row_value) { + i = num_columns - 1; + j = max_column_index; + output.score = max_column_value; + } + else { + i = max_row_index; + j = num_rows - 1; + output.score = max_row_value; + } + +#ifdef DEBUG_EXTEND + printf("BEST: %zu %zu\n", i, j); +#endif + + // Backtrack to fill in the cigar string and alignment start position + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); +#ifdef DEBUG_EXTEND + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(i > 0 && j > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = _getBandedCellScore(cells, i - 1, j - 1, band_width, band_origin, INVALID_SCORE) + (is_match ? MATCH_SCORE : MISMATCH_PENALTY); + int up = _getBandedCellScore(cells, i, j - 1, band_width, band_origin, INVALID_SCORE) + GAP_PENALTY; + int left = _getBandedCellScore(cells, i -1 , j, band_width, band_origin, INVALID_SCORE) + GAP_PENALTY; + int curr = _getBandedCellScore(cells, i, j, band_width, band_origin, INVALID_SCORE); + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(curr == up) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(curr == left) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(curr == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + return output; +} + +// The score for this cell coming from a match, deletion and insertion +struct AffineCell +{ + AffineCell() : G(0), I(-std::numeric_limits::max()), D(-std::numeric_limits::max()) {} + + // + int G; + int I; + int D; +}; + +typedef std::vector AffineCells; +typedef std::vector AffineMatrix; + +SequenceOverlap Overlapper::computeOverlapAffine(const std::string& s1, const std::string& s2, const OverlapperParams params) +{ + // Exit with invalid intervals if either string is zero length + SequenceOverlap output; + if(s1.empty() || s2.empty()) { + std::cerr << "Overlapper::computeOverlap error: empty input sequence\n"; + exit(EXIT_FAILURE); + } + + // Initialize the scoring matrix + size_t num_columns = s1.size() + 1; + size_t num_rows = s2.size() + 1; + + int gap_open = 5; + int gap_ext = 2; + + AffineMatrix score_matrix; + score_matrix.resize(num_columns); + for(size_t i = 0; i < score_matrix.size(); ++i) + score_matrix[i].resize(num_rows); + + // Calculate scores + for(size_t i = 1; i < num_columns; ++i) { + for(size_t j = 1; j < num_rows; ++j) { + + // Calculate the score for entry (i,j) + int idx_1 = i - 1; + int idx_2 = j - 1; + + int diagonal = score_matrix[i-1][j-1].G + (s1[idx_1] == s2[idx_2] ? params.match_score : params.mismatch_penalty); + + // When computing the score starting from the left/right cells, we have to determine + // whether to extend an existing gap or start a new one. + AffineCell& curr = score_matrix[i][j]; + + AffineCell& up = score_matrix[i][j-1]; + if(up.I > up.G - gap_open) + curr.I = up.I - gap_ext; + else + curr.I = up.G - (gap_open + gap_ext); + + AffineCell& left = score_matrix[i-1][j]; + if(left.D > left.G - gap_open) + curr.D = left.D - gap_ext; + else + curr.D = left.G - (gap_open + gap_ext); + + curr.G = max3(curr.D, curr.I, diagonal); + } + } + + // The location of the highest scoring match in the + // last row or last column is the maximum scoring overlap + // for the pair of strings. We start the backtracking from + // that cell + int max_row_value = std::numeric_limits::min(); + int max_column_value = std::numeric_limits::min(); + size_t max_row_index = 0; + size_t max_column_index = 0; + + // Check every column of the last row + // The first column is skipped to avoid empty alignments + for(size_t i = 1; i < num_columns; ++i) { + int v = score_matrix[i][num_rows - 1].G; + if(v > max_row_value) { + max_row_value = v; + max_row_index = i; + } + } + + // Check every row of the last column + for(size_t j = 1; j < num_rows; ++j) { + int v = score_matrix[num_columns - 1][j].G; + if(v > max_column_value) { + max_column_value = v; + max_column_index = j; + } + } + + // Compute the location at which to start the backtrack + size_t i; + size_t j; + + if(max_column_value > max_row_value) { + i = num_columns - 1; + j = max_column_index; + output.score = max_column_value; + } else { + i = max_row_index; + j = num_rows - 1; + output.score = max_row_value; + } + + // Set the alignment endpoints to be the index of the last aligned base + output.match[0].end = i - 1; + output.match[1].end = j - 1; + output.length[0] = s1.length(); + output.length[1] = s2.length(); +#ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + std::string cigar; + while(i > 0 && j > 0) { + // Compute the possible previous locations of the path + int idx_1 = i - 1; + int idx_2 = j - 1; + + bool is_match = s1[idx_1] == s2[idx_2]; + int diagonal = score_matrix[i - 1][j - 1].G + (is_match ? params.match_score : params.mismatch_penalty); + int up1 = score_matrix[i][j-1].G - (gap_open + gap_ext); + int up2 = score_matrix[i][j-1].I - gap_ext; + + int left1 = score_matrix[i-1][j].G - (gap_open + gap_ext); + int left2 = score_matrix[i-1][j].D - gap_ext; + + int curr = score_matrix[i][j].G; + + // If there are multiple possible paths to this cell + // we break ties in order of insertion,deletion,match + // this helps left-justify matches for homopolymer runs + // of unequal lengths + if(curr == up1 || curr == up2) { + cigar.push_back('I'); + j -= 1; + output.edit_distance += 1; + } else if(curr == left1 || curr == left2) { + cigar.push_back('D'); + i -= 1; + output.edit_distance += 1; + } else { + assert(curr == diagonal); + if(!is_match) + output.edit_distance += 1; + cigar.push_back('M'); + i -= 1; + j -= 1; + } + + output.total_columns += 1; + } + + // Set the alignment startpoints + output.match[0].start = i; + output.match[1].start = j; + + // Compact the expanded cigar string into the canonical run length encoding + // The backtracking produces a cigar string in reversed order, flip it + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + return output; +} + +// Compact an expanded CIGAR string into a regular cigar string +std::string Overlapper::compactCigar(const std::string& ecigar) +{ + if(ecigar.empty()) + return ""; + + std::stringstream compact_cigar; + char curr_symbol = ecigar[0]; + int curr_run = 1; + for(size_t i = 1; i < ecigar.size(); ++i) { + if(ecigar[i] == curr_symbol) { + curr_run += 1; + } else { + compact_cigar << curr_run << curr_symbol; + curr_symbol = ecigar[i]; + curr_run = 1; + } + } + + // Add last symbol/run + compact_cigar << curr_run << curr_symbol; + return compact_cigar.str(); +} + + +SequenceOverlap Overlapper::ageAlignPrefix(const std::string &s1, const std::string &s2, const ScoreParam &score_param) +{ + SequenceOverlap output; + + const int NONE = 0; + const int DIAGONAL = 1; + const int VERTICAL = 2; + const int HORIZONTAL = 3; + + int orientation_table[] = {NONE, DIAGONAL, VERTICAL, HORIZONTAL}; + int orientation_table_m[] = {NONE, HORIZONTAL, VERTICAL}; + + output.length[0] = s1.size(); + output.length[1] = s2.size(); + + size_t num_columns = s1.size() + 2; + size_t num_rows = s2.size() + 2; + + DPMatrix S(num_rows, DPCells(num_columns)); + DPMatrix S_backtrace(num_rows, DPCells(num_columns)); + DPMatrix S_lower(num_rows, DPCells(num_columns)); + DPMatrix S_upper(num_rows, DPCells(num_columns)); + + // calculate score matrix + for (size_t i = 1; i < num_rows-1; ++i) { + for (size_t j = 1; j < num_columns-1; ++j) { + S_lower[i][j] = std::max(S_lower[i-1][j] - score_param.gap, S[i-1][j] - score_param.gap_start); + S_upper[i][j] = std::max(S_upper[i][j-1] - score_param.gap, S[i][j-1] - score_param.gap_start); + int middle_scores[] = {0, S[i-1][j-1] + score_param.matchChar(s1[j-1], s2[i-1]), S_lower[i][j], S_upper[i][j]}; + const int N = sizeof(middle_scores) / sizeof(int); + auto max_it = std::max_element(middle_scores, middle_scores + N); + S[i][j] = *max_it; + S_backtrace[i][j] = orientation_table[std::distance(middle_scores, max_it)]; + } + } + + DPMatrix R(num_rows, DPCells(num_columns)); + DPMatrix R_backtrace(num_rows, DPCells(num_columns)); + DPMatrix R_lower(num_rows, DPCells(num_columns)); + DPMatrix R_upper(num_rows, DPCells(num_columns)); + + for (size_t i = num_rows-2; i > 0; --i) { + for (size_t j = num_columns-2; j > 0; --j) { + R_lower[i][j] = std::max(R_lower[i+1][j] - score_param.gap, R[i+1][j] - score_param.gap_start); + R_upper[i][j] = std::max(R_upper[i][j+1] - score_param.gap, R[i][j+1] - score_param.gap_start); + int middle_scores[] = {0, R[i+1][j+1] + score_param.matchChar(s1[j-1], s2[i-1]), R_lower[i][j], R_upper[i][j]}; + const int N = sizeof(middle_scores) / sizeof(int); + auto max_it = std::max_element(middle_scores, middle_scores + N); + R[i][j] = *max_it; + R_backtrace[i][j] = orientation_table[std::distance(middle_scores, max_it)]; + } + } + + + DPMatrix M_backtrace(num_rows, DPCells(num_columns)); + + for (size_t i = 1; i < num_rows-1; ++i) { + M_backtrace[i][0] = VERTICAL; + } + + for (size_t j = 1; j < num_columns-1; ++j) { + M_backtrace[0][j] = HORIZONTAL; + } + + // calculate maximum matrix + for (size_t i = 1; i < num_rows-1; ++i) { + for (size_t j = 1; j < num_columns-1; ++j) { + int scores[] = {S[i][j], S[i][j-1], S[i-1][j]}; + const int N = sizeof(scores) / sizeof(int); + auto max_it = std::max_element(scores, scores + N); + S[i][j] = *max_it; + M_backtrace[i][j] = orientation_table_m[std::distance(scores, max_it)]; + } + } + + DPMatrix MR_backtrace(num_rows, DPCells(num_columns)); + + for (size_t i = num_rows-2; i > 0; --i) { + MR_backtrace[i][num_columns-1] = VERTICAL; + } + + for (size_t j = num_columns-2; j > 0; --j) { + MR_backtrace[num_rows-1][j] = HORIZONTAL; + } + + // calculate maximum matrix + for (size_t i = num_rows-2; i > 0; --i) { + for (size_t j = num_columns-2; j > 0; --j) { + int scores[] = {R[i][j], R[i][j+1], R[i+1][j]}; + const int N = sizeof(scores) / sizeof(int); + auto max_it = std::max_element(scores, scores + N); + R[i][j] = *max_it; + MR_backtrace[i][j] = orientation_table_m[std::distance(scores, max_it)]; + } + } + +// for (size_t i = 0; i < num_rows; ++i) { +// for (size_t j = 0; j < num_columns; ++j) { +// std::cout << S[i][j] << ' '; +// } +// std::cout << std::endl; +// } +// std::cout << std::endl; + +// for (size_t i = 0; i < num_rows; ++i) { +// for (size_t j = 0; j < num_columns; ++j) { +// std::cout << M_backtrace[i][j] << ' '; +// } +// std::cout << std::endl; +// } +// std::cout << std::endl; + +// for (size_t i = 0; i < num_rows; ++i) { +// for (size_t j = 0; j < num_columns; ++j) { +// std::cout << R[i][j] << ' '; +// } +// std::cout << std::endl; +// } +// std::cout << std::endl; + +// for (size_t i = 0; i < num_rows; ++i) { +// for (size_t j = 0; j < num_columns; ++j) { +// std::cout << MR_backtrace[i][j] << ' '; +// } +// std::cout << std::endl; +// } +// std::cout << std::endl; + + int max_score = 0; + int max_row_index = 0; + int max_column_index = 0; + + for (size_t i = 0; i < num_rows-1; ++i) { + for (size_t j = 0; j < num_columns-1; ++j) { + int val = S[i][j] + R[i+1][j+1]; + if (max_score < val) max_score = val; + } + } + + for (size_t i = 0; i < num_rows-1; ++i) { + for (size_t j = 0; j < num_columns-1; ++j) { + if (M_backtrace[i][j] == NONE && S[i][j] + R[i+1][j+1] == max_score) { + max_row_index = i; + max_column_index = j; + goto theEnd; + } + } + } + +theEnd: + output.score = max_score; + output.match[0].end = max_column_index - 1; + output.match[1].end = max_row_index - 1; + +#ifdef DEBUG_OVERLAPPER + printf("Endpoints selected: (%d %d) with score %d\n", output.match[0].end, output.match[1].end, output.score); +#endif + + output.edit_distance = 0; + output.total_columns = 0; + + int i = max_row_index; + int j = max_column_index; + std::string cigar; + + while (S_backtrace[i][j] != NONE && i*j !=0) { + if (S_backtrace[i][j] == VERTICAL) { + cigar.push_back('I'); + output.edit_distance += 1; + i--; + } else if(S_backtrace[i][j] == HORIZONTAL) { + cigar.push_back('D'); + output.edit_distance += 1; + j--; + } else { + if (s1[j-1] != s2[i-1]) { + output.edit_distance += 1; + } + cigar.push_back('M'); + i--; + j--; + } + output.total_columns += 1; + } + + output.match[0].start = j; + output.match[1].start = i; + + std::reverse(cigar.begin(), cigar.end()); + assert(!cigar.empty()); + output.cigar = compactCigar(cigar); + + return output; +} + + +SequenceOverlap Overlapper::ageAlignSuffix(const std::string &s1, const std::string &s2, const ScoreParam &score_param) +{ + std::string s1_r = s1; + std::reverse(s1_r.begin(), s1_r.end()); + std::string s2_r = s2; + std::reverse(s2_r.begin(), s2_r.end()); + + SequenceOverlap output = ageAlignPrefix(s1_r, s2_r, score_param); + + output.match[0].flipStrand(output.length[0]); + output.match[1].flipStrand(output.length[1]); + + return output; +} + + +SequenceOverlap Overlapper::alignPrefix(const std::string &s1, const std::string &s2, const OverlapperParams params) +{ + std::string s1_r = s1; + std::reverse(s1_r.begin(), s1_r.end()); + std::string s2_r = s2; + std::reverse(s2_r.begin(), s2_r.end()); + + SequenceOverlap output = alignSuffix(s1_r, s2_r, params); + + output.match[0].flipStrand(output.length[0]); + output.match[1].flipStrand(output.length[1]); + + return output; +} diff --git a/Thirdparty/overlapper.h b/Thirdparty/overlapper.h new file mode 100644 index 0000000..1036541 --- /dev/null +++ b/Thirdparty/overlapper.h @@ -0,0 +1,196 @@ +//------------------------------------------------------------------------------- +// +// overlapper - Functions to calculate overlaps between pairs of strings +// +// Copyright (C) 2011 Jared Simpson (jared.simpson@gmail.com) +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +// of the Software, and to permit persons to whom the Software is furnished to do +// so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// ------------------------------------------------------------------------------ +#ifndef OVERLAPPER_H +#define OVERLAPPER_H + +#include +#include +#include + +// A start/end coordinate pair representing +// a subsequence. The end coordinate is +// the index of the last base aligned. +struct SequenceInterval +{ + // functions + SequenceInterval(); + + // Check that the interval is valid + bool isValid() const { return start <= end; } + + // Change the interval to represent the same + // set of bases but on the opposite strand. + void flipStrand(int sequence_length) + { + assert(isValid()); + int tmp = sequence_length - start - 1; + start = sequence_length - end - 1; + end = tmp; + assert(isValid()); + } + + // Returns the length of the interval + // The interval must be valid + int length() const + { + assert(isValid()); + return end - start + 1; + } + + // data + int start; + int end; // inclusive +}; + +// Data structure to hold the result of +// an overlap calculation +struct SequenceOverlap +{ + // Functions + SequenceOverlap(); + + // Check that the record is properly formed + bool isValid() const; + + // added by Zhen Zhang + bool isQualified(int minOverlap, double minIdentity) const; + + // Return padded versions of the matching portions of the strings + void makePaddedMatches(const std::string& s1, const std::string& s2, + std::string* p1, std::string* p2) const; + + // Print the alignment with padding characters + void printAlignment(const std::string& s1, const std::string& s2) const; + + // Recalculate the edit distance between the strings using this alignment + int calculateEditDistance(const std::string& s1, const std::string& s2) const; + + // Recalculate the number of columns in the alignment + int calculateTotalColumns() const; + + // Return the percent identity which we define to be + // the number of matching columns divided by the total number of columns + double getPercentIdentity() const; + + // Returns the length of the overlap, defined to be the + // number of columns in the alignment + int getOverlapLength() const { return total_columns; } + + // + friend std::ostream& operator<<(std::ostream& out, const SequenceOverlap& overlap); + + // Data + + // The coordinates of the matching portion of each string + // The end coordinate are the index of the last base matched + SequenceInterval match[2]; + + // The length of the input sequences + int length[2]; + + // + int score; + int edit_distance; + int total_columns; + + // The cigar string follows the sam convention with s1 being the "reference": + // I is an insertion into s1 + // D is a deletion from s1 + + // A-C s1 + // AAC s2 + // C: 1M1I1M + // + // ATC s1 + // A-C s2 + // C: 1M1D1M + std::string cigar; + +}; + +struct OverlapperParams +{ + int match_score; + int gap_penalty; + int mismatch_penalty; +}; + +struct ScoreParam +{ + + ScoreParam(int match, int mismatch, int gap, int gap_start = 0) : + match(match), mismatch(mismatch), gap(gap), gap_start(gap_start) { + + } + + int matchChar(char a, char b) const { + if (a == b) return match; + return mismatch; + } + + int match; + int mismatch; + int gap; + int gap_start; + +}; + +// Global variables +extern OverlapperParams default_params; // { 2, -5, -3 }; +extern OverlapperParams ungapped_params; // { 2, -10000, -3 }; +extern OverlapperParams svseq2_params; // { 1, -3, -1 }; + +// +namespace Overlapper +{ + +// Compute the highest-scoring overlap between s1 and s2. +// This is a naive O(M*N) algorithm with a linear gap penalty. +SequenceOverlap computeOverlap(const std::string& s1, const std::string& s2, const OverlapperParams params = default_params); +SequenceOverlap computeOverlapSG(const std::string& s1, const std::string& s2, const OverlapperParams params = default_params); + +SequenceOverlap alignSuffix(const std::string& s1, const std::string& s2, const OverlapperParams params = default_params); +SequenceOverlap alignPrefix(const std::string& s1, const std::string& s2, const OverlapperParams params = default_params); + +SequenceOverlap computeOverlapSW2(const std::string& s1, const std::string& s2, int minOverlap, double minIdentity, const OverlapperParams params = default_params); + +SequenceOverlap ageAlignPrefix(const std::string& s1, const std::string& s2, const ScoreParam& score_param); +SequenceOverlap ageAlignSuffix(const std::string& s1, const std::string& s2, const ScoreParam& score_param); + +// Extend a match between s1 and s2 into a full overlap using banded dynamic programming. +// start_1/start_2 give the starting positions of the current partial alignment. These coordinates +// are used to estimate where the overlap begins. The estimated alignment is refined by calculating +// the overlap with banded dynamic programming +SequenceOverlap extendMatch(const std::string& s1, const std::string& s2, int start_1, int start_2, int bandwidth); + +// Perform an alignment using affine gap penalties +SequenceOverlap computeOverlapAffine(const std::string& s1, const std::string& s2, const OverlapperParams params = default_params); + +// Compact an expanded CIGAR string into a regular cigar string +std::string compactCigar(const std::string& ecigar); + +} + +#endif diff --git a/clip.cpp b/clip.cpp new file mode 100644 index 0000000..05dd9c1 --- /dev/null +++ b/clip.cpp @@ -0,0 +1,620 @@ +#include "clip.h" +#include "error.h" +#include "Helper.h" +#include +#include +#include +#include + +using namespace std; +using namespace BamTools; + +AbstractClip::AbstractClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const vector& cigar) + : referenceId(referenceId), + mapPosition(mapPosition), + clipPosition(clipPosition), + matePosition(matePosition), + sequence(sequence), + cigar(cigar), + conflictFlag(false) { +} + +int AbstractClip::length() const { + return sequence.length(); +} + +int AbstractClip::leftmostPosition() const { + if (cigar[0].Type == 'S') return mapPosition - cigar[0].Length; + return mapPosition; +} + +AbstractClip::~AbstractClip() { +} + +Deletion AbstractClip::call(BamReader &reader, FaidxWrapper &faidx, int insLength, int minOverlap, double minIdentity, int minMapQual) +{ + string refName = Helper::getReferenceName(reader, referenceId); + + vector ranges; + fetchSpanningRanges(reader, insLength, ranges, minMapQual); +// vector sizes; +// fecthSizesForSpanningPairs(reader, insLength, sizes); + + if (ranges.empty()) error("No deletion is found"); + + vector regions; + toTargetRegions(refName, insLength, ranges, regions); + + return call(faidx, regions, minOverlap, minIdentity); +} + +bool AbstractClip::hasConflictWith(AbstractClip *other) { + if (getType() == other->getType()) return false; + return abs(clipPosition - other->clipPosition) < Helper::CONFLICT_THRESHOLD; +} + +bool AbstractClip::getConflictFlag() const +{ + return conflictFlag; +} + +void AbstractClip::setConflictFlag(bool value) +{ + conflictFlag = value; +} + +int AbstractClip::maxEditDistanceForSoftclippedPart() +{ + if (lengthOfSoftclippedPart() >= 20) return 2; + return 1; +} + + + +ForwardBClip::ForwardBClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const vector& cigar) + : AbstractClip(referenceId, mapPosition, clipPosition, matePosition, sequence, cigar) { +} + +/* +Deletion ForwardBClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) { + for (auto it = regions.rbegin(); it != regions.rend(); ++it) { + if ((*it).length() < lengthOfSoftclippedPart()) continue; + string s1 = (*it).sequence(faidx); + SequenceOverlap overlap = Overlapper::computeOverlapSG(s1, softclippedPart()); + if (overlap.edit_distance > maxEditDistanceForSoftclippedPart()) continue; + int leftEnd = (*it).start + overlap.match[0].end; + int offsetToRight = offsetFromThatEnd((*it).referenceName, faidx, leftEnd); + int rightEnd = clipPosition; + int offsetToLeft = offsetFromThisEnd((*it).referenceName, faidx); + int start1 = leftEnd - offsetToLeft; + int start2 = leftEnd + offsetToRight; + int end1 = rightEnd - offsetToLeft; + int end2 = rightEnd + offsetToRight; + int len = start1 - end1 + 1; + if (len > Helper::SVLEN_THRESHOLD) continue; + return Deletion((*it).referenceName, start1, start2, end1, end2, len, getType()); + } + error("No deletion is found."); +} +*/ + +Deletion ForwardBClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) +{ +// error("No deletion is found."); + ScoreParam score_param(1, -1, 2, 4); + for (auto it = regions.begin(); it != regions.end(); ++it) { + string s1 = (*it).sequence(faidx); + reverse(s1.begin(), s1.end()); + string s2 = sequence; + reverse(s2.begin(), s2.end()); + + SequenceOverlap overlap; + + try { + overlap = Overlapper::computeOverlapSW2(s1, s2, minOverlap, minIdentity, ungapped_params); + } catch (ErrorException& ex) { + continue; + } + + for (size_t i = 0; i < 2; ++i) + overlap.match[i].flipStrand(overlap.length[i]); + +// overlap = Overlapper::alignPrefix(s1, s2, ungapped_params); + +// if (s2 == "GCCTACAGAGTGCAGAGCCAGCCCAGGACAGGGGACAATTACACAGGCGATGGTCCTAAGAACCGAACCTTCCAATCCCAAAACTCTAGACAGGTATCCAA") +// cout << s1 << endl; +// overlap = Overlapper::ageAlignPrefix(s1, s2, score_param); +// if (!overlap.isQualified(minOverlap, minIdentity)) +// continue; + + int delta = overlap.getOverlapLength() - lengthOfSoftclippedPart(); + int offset = 0; + for (auto &ci: cigar) { + if (ci.Type == 'D') offset += ci.Length; + else if (ci.Type == 'I') offset -= ci.Length; + } + int rightBp = clipPosition + offset; + int leftBp = (*it).start + overlap.match[0].start + lengthOfSoftclippedPart() - 1; + +// int delta = overlap.match[1].length() - lengthOfSoftclippedPart(); +// int rightBp = clipPosition; +// // leftBp might need to be adjusted. +// int leftBp = (*it).start + overlap.match[0].start + lengthOfSoftclippedPart() - 1; + + int len = leftBp - rightBp + 1; + int start1 = delta > 0 ? leftBp : leftBp + delta; + int start2 = delta > 0 ? leftBp + delta : leftBp; + int end1 = delta > 0 ? rightBp : rightBp + delta; + int end2 = delta > 0 ? rightBp + delta : rightBp; + +// if (start2 == 23483811) { +// cout << overlap << endl; +// cout << s2 << endl; +// cout << s1 << endl; +// } + + if (len > Helper::SVLEN_THRESHOLD) continue; + return Deletion((*it).referenceName, start1, start2, end1, end2, len, getType()); + } + error("No deletion is found."); +} + +string ForwardBClip::getType() +{ + return "5F"; +} + +void ForwardBClip::fetchSpanningRanges(BamReader &reader, int insLength, std::vector &ranges, int minMapQual) +{ + // SVSeq2.length +// int start = leftmostPosition(); + int start = clipPosition; +// int end = start + insLength + length(); + int end = start + insLength - 2 * length(); + + if (start > end) error("the region is invalid."); + + if (!reader.SetRegion(referenceId, start - 1, referenceId, end)) + error("Could not set the region."); + + BamAlignment al; + while(reader.GetNextAlignment(al)) { +// string xt; +// al.GetTag("XT", xt); +// xt = xt.substr(0,1); + if (al.IsReverseStrand() && !al.IsMateReverseStrand() && al.RefID == al.MateRefID + && al.MapQuality >= minMapQual //&& xt == "U" + && al.Position > al.MatePosition && al.MatePosition + length() - Helper::SVLEN_THRESHOLD <= clipPosition) { + ranges.push_back({al.MatePosition + 1, al.Position + 1}); + } + } + +} + +void ForwardBClip::fecthSizesForSpanningPairs(BamReader &reader, int insLength, std::vector &sizes) +{ + int start = clipPosition; + int end = clipPosition + insLength + length(); + + if (!reader.SetRegion(referenceId, start - 1, referenceId, end)) + error("Could not set the region."); + + vector > records; + BamAlignment al; + while(reader.GetNextAlignment(al)) { + if (al.IsReverseStrand() && !al.IsMateReverseStrand() && al.RefID == al.MateRefID + && al.MapQuality > 0 && al.Position > al.MatePosition) { + records.push_back(make_pair(abs(al.InsertSize), al.Position - clipPosition)); + } + } + sort(records.begin(), records.end(), [](const pair& r1, const pair& r2){ return r1.first < r2.first; }); + cout << ">" << clipPosition << "," << mapPosition << endl; + transform(records.begin(), records.end(), ostream_iterator(cout, " "), [](const pair& r){ + stringstream ss; + ss << "(" << r.first << "," << r.second << ")"; + return ss.str(); + }); + cout << endl; +} + +void ForwardBClip::toTargetRegions(const string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) +{ + int rightmostPos = clipPosition + length(); + + std::vector newRanges(ranges.size()); +// transform(ranges.begin(), ranges.end(), newRanges.begin(), [=](const IRange &ran) { IRange r = {ran.start, ran.start + insLength + length()}; return r; }); + transform(ranges.begin(), ranges.end(), newRanges.begin(), [=](const IRange &ran) { IRange r = {ran.start, ran.start + insLength - length()}; return r; }); + std::vector idClusters; + clusterRanges(newRanges, idClusters); +// Replace with the merging method used by SVSeq2 +// sort(std::begin(newRanges), std::end(newRanges)); +// clusterRanges2(newRanges, idClusters); + for (auto &elt : idClusters) { + int s = newRanges[elt.front()].start; + if (s > rightmostPos) break; + int e = newRanges[elt.back()].end; + if (e > rightmostPos) e = rightmostPos; + if (s > e) break; + regions.push_back({referenceName, s, e}); + } +} + + +/* + +ForwardEClip::ForwardEClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const std::vector &cigar) + : AbstractClip(referenceId, mapPosition, clipPosition, matePosition, sequence, cigar) { +} + +void ForwardEClip::fetchSpanningRanges(BamReader &reader, int insLength, std::vector &ranges) +{ + ranges.push_back({clipPosition, matePosition}); +} + +void ForwardEClip::toTargetRegions(const string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) +{ + int pe = ranges[0].end + length(); + int leftmostPos = clipPosition; + int len1 = length() - cigar[cigar.size() - 1].Length; + int cPrime = ranges[0].end + len1 - insLength; + if (cPrime < leftmostPos) cPrime = leftmostPos; + regions.push_back({referenceName, cPrime, pe}); +} + +Deletion ForwardEClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) +{ + string s1 = regions[0].sequence(faidx); + SequenceOverlap overlap = Overlapper::computeOverlapSW(s1, sequence, minOverlap, minIdentity, ungapped_params); + if (overlap.getOverlapLength() >= minOverlap && + overlap.getPercentIdentity() >= minIdentity * 100) { + int rightBp = regions[0].start + overlap.match[0].start - 1; + int leftBp = (overlap.getOverlapLength() > cigar[cigar.size() - 1].Length) ? clipPosition - overlap.getOverlapLength() + cigar[cigar.size() - 1].Length + : clipPosition; + leftBp--; // left breakpoint refers the position of the last base prior to the clipped part conforming to the VCF format. + int len = leftBp - rightBp; + if (overlap.getOverlapLength() < cigar[cigar.size() - 1].Length) len += cigar[cigar.size() - 1].Length - overlap.getOverlapLength(); + if (len > Helper::SVLEN_THRESHOLD) error("No deletion was found."); + return Deletion(regions[0].referenceName, leftBp, leftBp, rightBp, rightBp, len); + } + error("No deletion was found."); +} +*/ + + +ReverseEClip::ReverseEClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const std::vector &cigar) + : AbstractClip(referenceId, mapPosition, clipPosition, matePosition, sequence, cigar) { +} + +void ReverseEClip::fetchSpanningRanges(BamReader &reader, int insLength, std::vector &ranges, int minMapQual) +{ + // Experiment ID: SVSeq2.length + int start = clipPosition - insLength + length(); +// int end = leftmostPosition() + length(); +// int start = end - insLength - length(); + if (start < 0) start = 0; + int end = clipPosition - length(); + + if (start > end) error("the region is invalid."); + + if (!reader.SetRegion(referenceId, start - 1, referenceId, end)) + error("Could not set the region."); + + BamAlignment al; + while(reader.GetNextAlignment(al)) { +// string xt; +// al.GetTag("XT", xt); +// xt = xt.substr(0,1); + if (al.Position < start - 1) continue; + if (!al.IsReverseStrand() && al.IsMateReverseStrand() && al.RefID == al.MateRefID + && al.MapQuality >= minMapQual //&& xt == "U" + && al.Position < al.MatePosition && al.MatePosition >= clipPosition - Helper::SVLEN_THRESHOLD) { + ranges.push_back({al.Position + 1, al.MatePosition + 1}); + } + } + +} + +void ReverseEClip::fecthSizesForSpanningPairs(BamReader &reader, int insLength, std::vector &sizes) +{ + +} + +void ReverseEClip::toTargetRegions(const string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) +{ + int leftmostPos = clipPosition - length(); + + std::vector newRanges(ranges.size()); +// transform(ranges.begin(), ranges.end(), newRanges.begin(), [=](const IRange &ran) { IRange r = {ran.end - insLength - length(), ran.end}; return r; }); + transform(ranges.begin(), ranges.end(), newRanges.begin(), [=](const IRange &ran) { IRange r = {ran.end - insLength + 2 * length(), ran.end + length()}; return r; }); + std::vector idClusters; + clusterRanges(newRanges, idClusters); +// Replace with the merging method used by SVSeq2 +// sort(std::begin(newRanges), std::end(newRanges)); +// clusterRanges2(newRanges, idClusters); + for (auto &elt : idClusters) { + int e = newRanges[elt.back()].end; + if (e < leftmostPos) continue; + int s = newRanges[elt.front()].start; + if (s < leftmostPos) s = leftmostPos; + if (s > e) continue; + regions.push_back({referenceName, s, e}); + } + +} + +/* +Deletion ReverseEClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) { + for (auto it = regions.rbegin(); it != regions.rend(); ++it) { + if ((*it).length() < lengthOfSoftclippedPart()) continue; + string s1 = (*it).sequence(faidx); + SequenceOverlap overlap = Overlapper::computeOverlapSG(s1, softclippedPart()); + if (overlap.edit_distance > maxEditDistanceForSoftclippedPart()) continue; + int leftEnd = clipPosition - 1; + int offsetToRight = offsetFromThisEnd((*it).referenceName, faidx); + int rightEnd = (*it).start + overlap.match[0].start; + int offsetToLeft = offsetFromThatEnd((*it).referenceName, faidx, rightEnd); + int start1 = leftEnd - offsetToLeft; + int start2 = leftEnd + offsetToRight; + int end1 = rightEnd - offsetToLeft; + int end2 = rightEnd + offsetToRight; + int len = start1 - end1 + 1; + if (len > Helper::SVLEN_THRESHOLD) continue; + return Deletion((*it).referenceName, start1, start2, end1, end2, len, getType()); + } + error("No deletion is found."); +} +*/ + +Deletion ReverseEClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) +{ +// error("No deletion is found."); + ScoreParam score_param(1, -1, 2, 4); + + for (auto it = regions.rbegin(); it != regions.rend(); ++it) { + string s1 = (*it).sequence(faidx); + string s2 = sequence; + SequenceOverlap overlap; + + try { + overlap = Overlapper::computeOverlapSW2(s1, s2, minOverlap, minIdentity, ungapped_params); + } catch (ErrorException& ex) { + continue; + } + +// overlap = Overlapper::alignSuffix(s1, s2, ungapped_params); +// overlap = Overlapper::ageAlignSuffix(s1, s2, score_param); +// if (!overlap.isQualified(minOverlap, minIdentity)) +// continue; + + int delta = overlap.getOverlapLength() - lengthOfSoftclippedPart(); + int offset = 0; + for (auto &ci: cigar) { + if (ci.Type == 'D') offset += ci.Length; + else if (ci.Type == 'I') offset -= ci.Length; + } + int leftBp = clipPosition - 1 - offset; + int rightBp = (*it).start + overlap.match[0].end - lengthOfSoftclippedPart() + 1; + +// int delta = overlap.match[1].length() - lengthOfSoftclippedPart(); +// int leftBp = clipPosition - 1; +// // rightBp might need to be adjusted +// int rightBp = (*it).start + overlap.match[0].end - lengthOfSoftclippedPart() + 1; + + int len = leftBp - rightBp + 1; + int start1 = delta > 0 ? leftBp - delta : leftBp; + int start2 = delta > 0 ? leftBp : leftBp - delta; + int end1 = delta > 0 ? rightBp - delta : rightBp; + int end2 = delta > 0 ? rightBp : rightBp - delta; + +// if (start2 == 54151129) { +// cout << overlap << endl; +// cout << s2 << endl; +// cout << s1 << endl; +// } + + if (len > Helper::SVLEN_THRESHOLD) continue; + return Deletion((*it).referenceName, start1, start2, end1, end2, len, getType()); + } + error("No deletion is found."); +} + +string ReverseEClip::getType() +{ + return "5R"; +} + + +int ForwardBClip::offsetFromThisEnd(string referenceName, FaidxWrapper &faidx) +{ + return numOfThelongestSuffix(softclippedPart(), + faidx.fetch(referenceName, + clipPosition - lengthOfSoftclippedPart(), + clipPosition - 1)); +} + +int ForwardBClip::offsetFromThatEnd(string referenceName, FaidxWrapper &faidx, int orignal) +{ + return numOfTheLongestPrefix(mappedPart(), + faidx.fetch(referenceName, + orignal + 1, + orignal + lengthOfMappedPart())); +} + +int ReverseEClip::offsetFromThisEnd(string referenceName, FaidxWrapper &faidx) +{ + return numOfTheLongestPrefix(softclippedPart(), + faidx.fetch(referenceName, + clipPosition, + clipPosition + lengthOfSoftclippedPart() - 1)); +} + +int ReverseEClip::offsetFromThatEnd(string referenceName, FaidxWrapper &faidx, int orignal) +{ + return numOfThelongestSuffix(mappedPart(), + faidx.fetch(referenceName, + orignal - lengthOfMappedPart(), + orignal - 1)); +} + + +ReverseBClip::ReverseBClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const std::vector &cigar) + : AbstractClip(referenceId, mapPosition, clipPosition, matePosition, sequence, cigar) { +} + +string ReverseBClip::getType() +{ + return "3R"; +} + +Deletion ReverseBClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) +{ + string s1 = regions[0].sequence(faidx); + reverse(s1.begin(), s1.end()); + string s2 = sequence; + reverse(s2.begin(), s2.end()); + SequenceOverlap overlap = Overlapper::computeOverlapSW2(s1, s2, minOverlap, minIdentity, ungapped_params); + + for (size_t i = 0; i < 2; ++i) + overlap.match[i].flipStrand(overlap.length[i]); + + int delta = overlap.getOverlapLength() - lengthOfSoftclippedPart(); + int offset = 0; + for (auto &ci: cigar) { + if (ci.Type == 'D') offset += ci.Length; + else if (ci.Type == 'I') offset -= ci.Length; + } + int rightBp = clipPosition + offset; + int leftBp = regions[0].start + overlap.match[0].start + lengthOfSoftclippedPart() - 1; + + int len = leftBp - rightBp + 1; + int start1 = delta > 0 ? leftBp : leftBp + delta; + int start2 = delta > 0 ? leftBp + delta : leftBp; + int end1 = delta > 0 ? rightBp : rightBp + delta; + int end2 = delta > 0 ? rightBp + delta : rightBp; + if (len > Helper::SVLEN_THRESHOLD) error("No deletion is found."); + return Deletion(regions[0].referenceName, start1, start2, end1, end2, len, getType()); + + error("No deletion is found."); +} + +void ReverseBClip::fetchSpanningRanges(BamReader &reader, int insLength, std::vector &ranges, int minMapQual) +{ + ranges.push_back({matePosition + 1, clipPosition + 1}); +} + +void ReverseBClip::fecthSizesForSpanningPairs(BamReader &reader, int inslength, std::vector &sizes) +{ +} + +void ReverseBClip::toTargetRegions(const string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) +{ + // ran.start, ran.start + insLength - length() + int rightmostPos = clipPosition + length(); + int s = ranges[0].start; + int e = s + insLength - length(); + if (e > rightmostPos) e = rightmostPos; + if (s > e) return; + regions.push_back({referenceName, s, e}); +} + +int ReverseBClip::lengthOfSoftclippedPart() +{ + return cigar[0].Length; +} + +string ReverseBClip::softclippedPart() +{ + return sequence.substr(0, lengthOfSoftclippedPart()); +} + +string ReverseBClip::mappedPart() +{ + return sequence.substr(lengthOfSoftclippedPart()); +} + +int ReverseBClip::offsetFromThisEnd(string referenceName, FaidxWrapper &faidx) +{ +} + +int ReverseBClip::offsetFromThatEnd(string referenceName, FaidxWrapper &faidx, int orignal) +{ +} + +ForwardEClip::ForwardEClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const string &sequence, const std::vector &cigar) + : AbstractClip(referenceId, mapPosition, clipPosition, matePosition, sequence, cigar) { +} + +string ForwardEClip::getType() +{ + return "3F"; +} + +Deletion ForwardEClip::call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity) +{ + string s1 = regions[0].sequence(faidx); + SequenceOverlap overlap = Overlapper::computeOverlapSW2(s1, sequence, minOverlap, minIdentity, ungapped_params); + + int delta = overlap.getOverlapLength() - lengthOfSoftclippedPart(); + int offset = 0; + for (auto &ci: cigar) { + if (ci.Type == 'D') offset += ci.Length; + else if (ci.Type == 'I') offset -= ci.Length; + } + int leftBp = clipPosition - 1 - offset; + int rightBp = regions[0].start + overlap.match[0].end - lengthOfSoftclippedPart() + 1; + + int len = leftBp - rightBp + 1; + int start1 = delta > 0 ? leftBp - delta : leftBp; + int start2 = delta > 0 ? leftBp : leftBp - delta; + int end1 = delta > 0 ? rightBp - delta : rightBp; + int end2 = delta > 0 ? rightBp : rightBp - delta; + + if (len <= Helper::SVLEN_THRESHOLD) { + return Deletion(regions[0].referenceName, start1, start2, end1, end2, len, getType()); + } + + error("No deletion is found."); +} + +void ForwardEClip::fetchSpanningRanges(BamReader &reader, int insLength, std::vector &ranges, int minMapQual) +{ + ranges.push_back({clipPosition + 1, matePosition + 1}); +} + +void ForwardEClip::fecthSizesForSpanningPairs(BamReader &reader, int inslength, std::vector &sizes) +{ +} + +void ForwardEClip::toTargetRegions(const string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) +{ + // ran.end - insLength + 2 * length(), ran.end + length() + int leftmostPos = clipPosition; + int s = ranges[0].end - insLength + 2 * length(); + if (s < leftmostPos) s = leftmostPos; + int e = ranges[0].end + length(); + if (s > e) return; + regions.push_back({referenceName, s, e}); +} + +int ForwardEClip::lengthOfSoftclippedPart() +{ + return cigar[cigar.size() - 1].Length; +} + +string ForwardEClip::softclippedPart() +{ + return sequence.substr(lengthOfMappedPart()); +} + +string ForwardEClip::mappedPart() +{ + return sequence.substr(0, lengthOfMappedPart()); +} + +int ForwardEClip::offsetFromThisEnd(string referenceName, FaidxWrapper &faidx) +{ +} + +int ForwardEClip::offsetFromThatEnd(string referenceName, FaidxWrapper &faidx, int orignal) +{ +} diff --git a/clip.h b/clip.h new file mode 100644 index 0000000..5af136e --- /dev/null +++ b/clip.h @@ -0,0 +1,191 @@ +#ifndef CLIP_H +#define CLIP_H + +#include "api/BamAux.h" +#include "api/BamReader.h" +#include "Deletion.h" +#include "FaidxWrapper.h" +#include "range.h" +#include "Thirdparty/overlapper.h" + +#include +#include +#include + +struct TargetRegion +{ + std::string referenceName; + int start; + int end; + + std::string sequence(FaidxWrapper &faidx) const { + return faidx.fetch(referenceName, start, end); + } + + int length() const { + return end - start + 1; + } +}; + + +class AbstractClip { +public: + AbstractClip(int referenceId, int mapPosition, int clipPosition, + int matePosition, const std::string& sequence, + const std::vector& cigar); + + int length() const; + + int leftmostPosition() const; + int getClipPosition() const { + return clipPosition; + } + + virtual ~AbstractClip(); + + Deletion call(BamTools::BamReader& reader, FaidxWrapper &faidx, int insLength, int minOverlap, double minIdentity, int minMapQual); + + bool hasConflictWith(AbstractClip *other); + virtual std::string getType() = 0; + bool getConflictFlag() const; + void setConflictFlag(bool value); + std::string toString() { + std::stringstream ss; + ss << getClipPosition() << "\t" << getType(); + return ss.str(); + } + +protected: + + virtual Deletion call(FaidxWrapper &faidx, const std::vector& regions, int minOverlap, double minIdentity) = 0; + virtual void fetchSpanningRanges(BamTools::BamReader &reader, int insLength, std::vector &ranges, int minMapQual) = 0; + virtual void fecthSizesForSpanningPairs(BamTools::BamReader &reader, int inslength, std::vector& sizes) = 0; + virtual void toTargetRegions(const std::string &referenceName, int insLength, std::vector &ranges, std::vector ®ions) = 0; + + virtual int lengthOfSoftclippedPart() = 0; + int lengthOfMappedPart() { + return sequence.size() - lengthOfSoftclippedPart(); + } + + int maxEditDistanceForSoftclippedPart(); + + virtual std::string softclippedPart() = 0; + virtual std::string mappedPart() = 0; + virtual int offsetFromThisEnd(std::string referenceName, FaidxWrapper& faidx) = 0; + virtual int offsetFromThatEnd(std::string referenceName, FaidxWrapper& faidx, int orignal) = 0; + + int referenceId; + int mapPosition; + int clipPosition; + int matePosition; + std::string sequence; + std::vector cigar; + + bool conflictFlag; +}; + +class ForwardBClip : public AbstractClip { +public: + ForwardBClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const std::string& sequence, const std::vector& cigar); + +private: + virtual void fetchSpanningRanges(BamTools::BamReader &reader, int insLength, std::vector &ranges, int minMapQual); + virtual void fecthSizesForSpanningPairs(BamTools::BamReader& reader, int insLength, std::vector& sizes); + virtual void toTargetRegions(const std::string &referenceName, int insLength, std::vector &ranges, std::vector ®ions); + + virtual Deletion call(FaidxWrapper &faidx, const std::vector& regions, int minOverlap, double minIdentity); + + // AbstractClip interface +public: + std::string getType(); + + // AbstractClip interface +protected: + int lengthOfSoftclippedPart() { + return cigar[0].Length; + } + + std::string softclippedPart() { + return sequence.substr(0, lengthOfSoftclippedPart()); + } + + std::string mappedPart() { + return sequence.substr(lengthOfSoftclippedPart()); + } + + int offsetFromThisEnd(std::string referenceName, FaidxWrapper &faidx); + int offsetFromThatEnd(std::string referenceName, FaidxWrapper &faidx, int orignal); +}; + +class ReverseBClip : public AbstractClip { + + // AbstractClip interface +public: + ReverseBClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const std::string& sequence, const std::vector& cigar); + std::string getType(); + +protected: + Deletion call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity); + void fetchSpanningRanges(BamTools::BamReader &reader, int insLength, std::vector &ranges, int minMapQual); + void fecthSizesForSpanningPairs(BamTools::BamReader &reader, int inslength, std::vector &sizes); + void toTargetRegions(const std::string &referenceName, int insLength, std::vector &ranges, std::vector ®ions); + int lengthOfSoftclippedPart(); + std::string softclippedPart(); + std::string mappedPart(); + int offsetFromThisEnd(std::string referenceName, FaidxWrapper &faidx); + int offsetFromThatEnd(std::string referenceName, FaidxWrapper &faidx, int orignal); +}; + +class ForwardEClip : public AbstractClip { + + // AbstractClip interface +public: + ForwardEClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const std::string& sequence, const std::vector& cigar); + std::string getType(); + +protected: + Deletion call(FaidxWrapper &faidx, const std::vector ®ions, int minOverlap, double minIdentity); + void fetchSpanningRanges(BamTools::BamReader &reader, int insLength, std::vector &ranges, int minMapQual); + void fecthSizesForSpanningPairs(BamTools::BamReader &reader, int inslength, std::vector &sizes); + void toTargetRegions(const std::string &referenceName, int insLength, std::vector &ranges, std::vector ®ions); + int lengthOfSoftclippedPart(); + std::string softclippedPart(); + std::string mappedPart(); + int offsetFromThisEnd(std::string referenceName, FaidxWrapper &faidx); + int offsetFromThatEnd(std::string referenceName, FaidxWrapper &faidx, int orignal); +}; + +class ReverseEClip : public AbstractClip { +public: + ReverseEClip(int referenceId, int mapPosition, int clipPosition, int matePosition, const std::string& sequence, const std::vector& cigar); + +private: + virtual void fetchSpanningRanges(BamTools::BamReader &reader, int insLength, std::vector &ranges, int minMapQual); + virtual void fecthSizesForSpanningPairs(BamTools::BamReader& reader, int insLength, std::vector& sizes); + virtual void toTargetRegions(const std::string &referenceName, int insLength, std::vector &ranges, std::vector ®ions); + + virtual Deletion call(FaidxWrapper &faidx, const std::vector& regions, int minOverlap, double minIdentity); + + // AbstractClip interface +public: + std::string getType(); + + // AbstractClip interface +protected: + int lengthOfSoftclippedPart() { + return cigar[cigar.size() - 1].Length; + } + + std::string softclippedPart() { + return sequence.substr(lengthOfMappedPart()); + } + + std::string mappedPart() { + return sequence.substr(0, lengthOfMappedPart()); + } + + int offsetFromThisEnd(std::string referenceName, FaidxWrapper &faidx); + int offsetFromThatEnd(std::string referenceName, FaidxWrapper &faidx, int orignal); +}; + +#endif // CLIP_H diff --git a/easylogging++.h b/easylogging++.h new file mode 100644 index 0000000..2c4e893 --- /dev/null +++ b/easylogging++.h @@ -0,0 +1,4003 @@ +/////////////////////////////////////////////////////////////////////////////////// +// // +// easylogging++.h - Core of EasyLogging++ // +// // +// EasyLogging++ v8.91 // +// Cross platform logging made easy for C++ applications // +// Author Majid Khan // +// http://www.icplusplus.com/tools/easylogging // +// https://github.com/mkhan3189/EasyLoggingPP // +// // +// Copyright (c) 2012-2013 Majid Khan // +// // +// This software is provided 'as-is', without any express or implied // +// warranty. In no event will the authors be held liable for any damages // +// arising from the use of this software. // +// // +// Permission is granted to anyone to use this software for any purpose, // +// including commercial applications, and to alter it and redistribute // +// it freely, subject to the following restrictions: // +// // +// 1. The origin of this software must not be misrepresented; you must // +// not claim that you wrote the original software. If you use this // +// software in a product, an acknowledgment in the product documentation // +// would be appreciated but is not required. // +// // +// 2. Altered source versions must be plainly marked as such, and must // +// not be misrepresented as being the original software. // +// // +// 3. This notice may not be removed or altered from any source // +// distribution // +// // +// PLEASE NOTE: THIS FILE MAY BE CHANGED. TO GET ORIGINAL VERSION // +// EITHER DOWNLOAD IT FROM http://www.icplusplus.com/tools/easylogging/ // +// OR PULL IT FROM https://github.com/mkhan3189/EasyLoggingPP (master branch) // +// // +/////////////////////////////////////////////////////////////////////////////////// + +#ifndef EASYLOGGINGPP_H +#define EASYLOGGINGPP_H +// +// Log location macros +// +#if !defined(__FILE__) +# define __FILE__ "" +#endif // !defined(__FILE__) +#if !defined(__LINE__) +# define __LINE__ 0 +#endif // !defined(__LINE__) +// Appropriate function macro +#if defined(__func__) +# undef __func__ +#endif // defined(__func__) +#if defined(_MSC_VER) && (_MSC_VER >= 1020) +# define __func__ __FUNCSIG__ +#elif defined(__GNUC__) && (__GNUC__ >= 2) +# define __func__ __PRETTY_FUNCTION__ +#elif defined(__clang__) && (__clang__ == 1) +# define __func__ __PRETTY_FUNCTION__ +#else +# define __func__ "" +#endif // defined(_MSC_VER) && (_MSC_VER >= 1020) +// +// Compiler evaluation +// http://isocpp.org/blog/2013/05/gcc-4.8.1-released-c11-feature-complete +// http://msdn.microsoft.com/en-us/library/vstudio/hh567368.aspx +// +// GNU +#if defined(__GNUC__) +# define _ELPP_GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) +# if defined(__GXX_EXPERIMENTAL_CXX0X__) +# define _ELPP_CXX0X 1 +# elif (_ELPP_GCC_VERSION >= 40801) +# define _ELPP_CXX11 1 +# endif // defined(__GXX_EXPERIMENTAL_CXX0X__) +#endif // defined(__GNUC__) +// VC++ +#if defined(_MSC_VER) +# if (_MSC_VER >= 1400) // VC++ 8.0 +# define _ELPP_CRT_DBG_WARNINGS 1 +# else +# define _ELPP_CRT_DBG_WARNINGS 0 +# endif // (_MSC_VER >= 1400) +# if (_MSC_VER == 1600) +# define _ELPP_CXX0X 1 +# elif (_MSC_VER == 1700) +# define _ELPP_CXX11 1 +# endif // (_MSC_VER == 1600) +#else +# define _ELPP_CRT_DBG_WARNINGS 0 +#endif // defined(_MSC_VER) +// Clang +#if defined(__clang__) && (__clang__ == 1) +# define _ELPP_CLANG_VERSION (__clang_major__ * 10000 \ + + __clang_minor__ * 100 \ + + __clang_patchlevel__) +# if (_ELPP_CLANG_VERSION >= 30300) +# define _ELPP_CXX11 1 +# endif // (_ELPP_CLANG_VERSION >= 30300) +#endif // defined(__clang__) && (__clang__ == 1) +// MinGW +#if defined(__MINGW32__) || defined(__MINGW64__) +# define _ELPP_MINGW 1 +#else +# define _ELPP_MINGW 0 +#endif // defined(__MINGW32__) || defined(__MINGW64__) +#if defined(__ANDROID__) +# define _ELPP_NDK 1 +#else +# define _ELPP_NDK 0 +#endif // defined(__ANDROID__) +// Some special functions that are special for VC++ +// This is to prevent CRT security warnings and to override deprecated methods but at the same time +// MinGW does not support some functions, so we need to make sure that proper function is used. +#if _ELPP_CRT_DBG_WARNINGS +# define SPRINTF sprintf_s +# define STRTOK(a,b,c) strtok_s(a,b,c) +#else +# define SPRINTF sprintf +# define STRTOK(a,b,c) strtok(a,b) +#endif +// std::thread availablity +#if defined(__GNUC__) && (!_ELPP_NDK) && (_ELPP_CXX0X || _ELPP_CXX11) +# define _ELPP_STD_THREAD_AVAILABLE 1 +#elif defined(_MSC_VER) && (!_ELPP_NDK) && (_ELPP_CXX11) +# define _ELPP_STD_THREAD_AVAILABLE 1 +#elif defined(__clang__) && (!_ELPP_NDK) && (__clang__ == 1) && (_ELPP_CXX11) +# define _ELPP_STD_THREAD_AVAILABLE 1 +#else +# define _ELPP_STD_THREAD_AVAILABLE 0 +#endif // defined(__GNUC__) && (_ELPP_CXX0X || _ELPP_CXX11) +// Qt +#if defined(QT_CORE_LIB) +# if (defined(QT_VERSION) && QT_VERSION >= QT_VERSION_CHECK(5, 0, 0)) +# define _ELPP_QT_5 1 +# else +# define _ELPP_QT_5 0 +# endif // (defined(QT_VERSION) && QT_VERSION >= QT_VERSION_CHECK(5, 0, 0)) +#endif // defined(QT_CORE_LIB) +// +// High-level log evaluation +// +#if (defined(_DISABLE_LOGS)) +# define _ENABLE_EASYLOGGING 0 +#else +# define _ENABLE_EASYLOGGING 1 +#endif // (!defined(_DISABLE_LOGS)) +// +// OS evaluation +// +// Windows +#if defined(_WIN32) || defined(_WIN64) +# define _ELPP_OS_WINDOWS 1 +#else +# define _ELPP_OS_WINDOWS 0 +#endif // defined(_WIN32) || defined(_WIN64) +// Linux +#if (defined(__linux) || defined(__linux__)) +# define _ELPP_OS_LINUX 1 +#else +# define _ELPP_OS_LINUX 0 +#endif // (defined(__linux) || defined(__linux__)) +// Mac +#if defined(__APPLE__) +# define _ELPP_OS_MAC 1 +#else +# define _ELPP_OS_MAC 0 +#endif // defined(__APPLE__) +// Unix +#define _ELPP_OS_UNIX ((_ELPP_OS_LINUX || _ELPP_OS_MAC) && (!_ELPP_OS_WINDOWS)) +// Assembly +#if (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))) || \ + (defined(_MSC_VER) && (defined(_M_IX86))) +# define _ELPP_ASSEMBLY_SUPPORTED 1 +#else +# define _ELPP_ASSEMBLY_SUPPORTED 0 +#endif +#if (!defined(_DISABLE_ELPP_ASSERT)) +# if (defined(_STOP_ON_FIRST_ELPP_ASSERTION)) +# define __EASYLOGGINGPP_ASSERT(expr, msg) if (!(expr)) { std::cerr << "EASYLOGGING++ ASSERTION FAILED (LINE: " << __LINE__ << ") [" #expr << "] with message \"" << msg << "\"" << std::endl; exit(1); } +# else +# define __EASYLOGGINGPP_ASSERT(expr, msg) if (!(expr)) { std::cerr << "EASYLOGGING++ ASSERTION FAILED (LINE: " << __LINE__ << ") [" #expr << "] with message \"" << msg << "\"" << std::endl; } +# endif // (defined(_STOP_ON_FIRST_ELPP_ASSERTION)) +#else +# define __EASYLOGGINGPP_ASSERT(x, y) +#endif // (!defined(_DISABLE_ELPP_ASSERT)) +#define __EASYLOGGINGPP_SUPPRESS_UNSED(x) (void)x; +#if _ELPP_OS_UNIX +// Log file permissions for unix-based systems +# define _LOG_PERMS S_IRUSR | S_IWUSR | S_IXUSR | S_IWGRP | S_IRGRP | S_IXGRP | S_IWOTH | S_IXOTH +#endif // _ELPP_OS_UNIX +#if (!defined(_DISABLE_MUTEX) && (_ENABLE_EASYLOGGING)) +# define _ELPP_ENABLE_MUTEX 1 +#else +# define _ELPP_ENABLE_MUTEX 0 +#endif // (!defined(_DISABLE_MUTEX) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_DEBUG_LOGS) && (_ENABLE_EASYLOGGING) && ((defined(_DEBUG)) || (!defined(NDEBUG)))) +# define _ELPP_DEBUG_LOG 1 +#else +# define _ELPP_DEBUG_LOG 0 +#endif // (!defined(_DISABLE_DEBUG_LOGS) && (_ENABLE_EASYLOGGING) && ((defined(_DEBUG)) || (!defined(NDEBUG)))) +#if (!defined(_DISABLE_INFO_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_INFO_LOG 1 +#else +# define _ELPP_INFO_LOG 0 +#endif // (!defined(_DISABLE_INFO_LOGS) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_WARNING_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_WARNING_LOG 1 +#else +# define _ELPP_WARNING_LOG 0 +#endif // (!defined(_DISABLE_WARNING_LOGS) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_ERROR_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_ERROR_LOG 1 +#else +# define _ELPP_ERROR_LOG 0 +#endif // (!defined(_DISABLE_ERROR_LOGS) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_FATAL_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_FATAL_LOG 1 +#else +# define _ELPP_FATAL_LOG 0 +#endif // (!defined(_DISABLE_FATAL_LOGS) && (_ENABLE_EASYLOGGING)) +#if (defined(_QUALITY_ASSURANCE) && (_ENABLE_EASYLOGGING)) +# define _ELPP_QA_LOG 1 +#else +# define _ELPP_QA_LOG 0 +#endif // (defined(_QUALITY_ASSURANCE) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_TRACE_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_TRACE_LOG 1 +#else +# define _ELPP_TRACE_LOG 0 +#endif // (!defined(_DISABLE_TRACE_LOGS) && (_ENABLE_EASYLOGGING)) +#if (!defined(_DISABLE_VERBOSE_LOGS) && (_ENABLE_EASYLOGGING)) +# define _ELPP_VERBOSE_LOG 1 +#else +# define _ELPP_VERBOSE_LOG 0 +#endif // (!defined(_DISABLE_VERBOSE_LOGS) && (_ENABLE_EASYLOGGING)) +#define ELPP_FOR_EACH(variableName, initialValue, operation, limit) unsigned int variableName = initialValue; \ + do { \ + operation \ + variableName = variableName << 1; \ + if (variableName == 0) { ++variableName; } \ + } while (variableName <= limit) +#define ELPP_FOR_EACH_LEVEL(variableName, initialValue, operation) \ + ELPP_FOR_EACH(variableName, initialValue, operation, easyloggingpp::Level::kMaxValid) +#define ELPP_FOR_EACH_CONFIGURATION(variableName, initialValue, operation) \ + ELPP_FOR_EACH(variableName, initialValue, operation, easyloggingpp::ConfigurationType::kMaxValid) +// Includes +#include +#include +#include +#include +#include +#if _ELPP_NDK +# include +#endif // _ELPP_NDK +#if _ELPP_OS_UNIX +# include +# include +# if (_ELPP_ENABLE_MUTEX) +# if (_ELPP_ASSEMBLY_SUPPORTED) +# include +# else +# include +# endif // (_ELPP_ASSEMBLY_SUPPORTED) +# endif // (_ELPP_ENABLE_MUTEX) +#elif _ELPP_OS_WINDOWS +# include +# include +#endif // _ELPP_OS_UNIX +#include +#include +#include +#include +#include +#include +#include +#include +#if (_ELPP_STD_THREAD_AVAILABLE) +# include +#endif // _ELPP_STD_THREAD_AVAILABLE +#if defined(_ELPP_STL_LOGGING) +// For logging STL based templates +# include +# include +# include +# include +# include +# include +# include +#endif // defined(_ELPP_STL_LOGGING) +#if defined(QT_CORE_LIB) && defined(_ELPP_QT_LOGGING) +// For logging Qt based classes & templates +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +#endif // defined(QT_CORE_LIB) && defined(_ELPP_QT_LOGGING) +namespace easyloggingpp { +namespace internal { + +class NoCopy { +protected: + NoCopy(void) {} +private: + NoCopy(const NoCopy&); + NoCopy& operator=(const NoCopy&); +}; + +class StaticClass { +private: + StaticClass(void); + StaticClass(const StaticClass&); + StaticClass& operator=(const StaticClass&); +}; +} // namespace internal + +struct Level : private internal::StaticClass { +public: + enum { + All = 0, Debug = 1, Info = 2, Warning = 4, Error = 8, + Fatal = 16, Verbose = 32, QA = 64, Trace = 128, Unknown = 1010 + }; + + static const unsigned int kMinValid = All; + static const unsigned int kMaxValid = Trace; + + static std::string convertToString(unsigned int level_) { + switch (level_) { + case All: + return std::string("ALL"); + case Debug: + return std::string("DEBUG"); + case Info: + return std::string("INFO"); + case Warning: + return std::string("WARNING"); + case Error: + return std::string("ERROR"); + case Fatal: + return std::string("FATAL"); + case QA: + return std::string("QA"); + case Verbose: + return std::string("VERBOSE"); + case Trace: + return std::string("TRACE"); + default: + return std::string("UNKNOWN"); + } + } + + static unsigned int convertFromString(const std::string& levelStr) { + if (levelStr == "all" || levelStr == "ALL") return Level::All; + if (levelStr == "debug" || levelStr == "DEBUG") return Level::Debug; + if (levelStr == "info" || levelStr == "INFO") return Level::Info; + if (levelStr == "warning" || levelStr == "WARNING") return Level::Warning; + if (levelStr == "error" || levelStr == "ERROR") return Level::Error; + if (levelStr == "fatal" || levelStr == "FATAL") return Level::Fatal; + if (levelStr == "qa" || levelStr == "QA") return Level::QA; + if (levelStr == "verbose" || levelStr == "VERBOSE") return Level::Verbose; + if (levelStr == "trace" || levelStr == "TRACE") return Level::Trace; + return Level::Unknown; + } +}; + +struct ConfigurationType : private internal::StaticClass { +public: + enum { + Enabled = 0, ToFile = 1, ToStandardOutput = 2, Format = 4, Filename = 8, + MillisecondsWidth = 16, PerformanceTracking = 32, RollOutSize = 64, Unknown = 1010 + }; + + static const unsigned int kMinValid = Enabled; + static const unsigned int kMaxValid = RollOutSize; + + static std::string convertToString(unsigned int configurationType_) { + switch (configurationType_) { + case Enabled: + return std::string("ENABLED"); + case Filename: + return std::string("FILENAME"); + case Format: + return std::string("FORMAT"); + case ToFile: + return std::string("TO_FILE"); + case ToStandardOutput: + return std::string("TO_STANDARD_OUTPUT"); + case MillisecondsWidth: + return std::string("MILLISECONDS_WIDTH"); + case PerformanceTracking: + return std::string("PERFORMANCE_TRACKING"); + case RollOutSize: + return std::string("ROLL_OUT_SIZE"); + default: return std::string("UNKNOWN"); + } + } + + static unsigned int convertFromString(const std::string& configStr) { + if (configStr == "enabled" || configStr == "ENABLED") return ConfigurationType::Enabled; + if (configStr == "to_file" || configStr == "TO_FILE") return ConfigurationType::ToFile; + if (configStr == "to_standard_output" || configStr == "TO_STANDARD_OUTPUT") return ConfigurationType::ToStandardOutput; + if (configStr == "format" || configStr == "FORMAT") return ConfigurationType::Format; + if (configStr == "filename" || configStr == "FILENAME") return ConfigurationType::Filename; + if (configStr == "milliseconds_width" || configStr == "MILLISECONDS_WIDTH") return ConfigurationType::MillisecondsWidth; + if (configStr == "performance_tracking" || configStr == "PERFORMANCE_TRACKING") return ConfigurationType::PerformanceTracking; + if (configStr == "roll_out_size" || configStr == "ROLL_OUT_SIZE") return ConfigurationType::RollOutSize; + return ConfigurationType::Unknown; + } +}; + +namespace internal { +struct Aspect : private internal::StaticClass { +public: + enum { + Normal = 0, Conditional = 1, Interval = 2 + }; +}; + +//! +//! Used internally. You should not need this class. +//! +class Constants : private internal::NoCopy { +public: + Constants (void) : + // + // Log level name outputs + // + LOG_INFO_LEVEL_VALUE ("INFO") , + LOG_DEBUG_LEVEL_VALUE ("DEBUG"), + LOG_WARNING_LEVEL_VALUE("WARN"), + LOG_ERROR_LEVEL_VALUE ("ERROR"), + LOG_FATAL_LEVEL_VALUE ("FATAL"), + LOG_VERBOSE_LEVEL_VALUE("VER"), + LOG_QA_LEVEL_VALUE ("QA"), + LOG_TRACE_LEVEL_VALUE ("TRACE"), + // + // Format specifiers + // + APP_NAME_FORMAT_SPECIFIER ("%app"), + LOGGER_ID_FORMAT_SPECIFIER ("%logger"), + THREAD_ID_FORMAT_SPECIFIER ("%thread"), + LEVEL_FORMAT_SPECIFIER ("%level"), + DATE_ONLY_FORMAT_SPECIFIER ("%date"), + TIME_ONLY_FORMAT_SPECIFIER ("%time"), + DATE_TIME_FORMAT_SPECIFIER ("%datetime"), + LOCATION_FORMAT_SPECIFIER ("%loc"), + FUNCTION_FORMAT_SPECIFIER ("%func"), + USER_FORMAT_SPECIFIER ("%user"), + HOST_FORMAT_SPECIFIER ("%host"), + LOG_MESSAGE_FORMAT_SPECIFIER ("%log"), + VERBOSE_LEVEL_FORMAT_SPECIFIER ("%vlevel"), + // + // Others + // + NULL_POINTER ("nullptr"), + FORMAT_SPECIFIER_ESCAPE_CHAR ('E'), + MAX_LOG_PER_CONTAINER (100), + MAX_LOG_PER_COUNTER (100000), + DEFAULT_MILLISECOND_OFFSET (1000), + MAX_VERBOSE_LEVEL (9), + CURRENT_VERBOSE_LEVEL (0), // Set dynamically from registeredLoggers +#if _ELPP_OS_UNIX + PATH_SLASH ("/"), +#elif _ELPP_OS_WINDOWS + PATH_SLASH ("\\"), +#endif // _ELPP_OS_UNIX, + DEFAULT_LOG_FILENAME ("myeasylog.log") + { + // Trivial logger configuration - only to set format (difference: not using %logger) + std::stringstream ss; + ss << " * ALL:\n"; + ss << " FORMAT = %datetime %level %log\n"; + ss << "* DEBUG:\n"; + ss << " FORMAT = %datetime %level [%user@%host] [%func] [%loc] %log\n"; + // INFO and WARNING uses is defined by ALL + ss << "* ERROR:\n"; + ss << " FORMAT = %datetime %level %log\n"; + ss << "* FATAL:\n"; + ss << " FORMAT = %datetime %level %log\n"; + ss << "* QA:\n"; + ss << " FORMAT = %datetime %level %log\n"; + ss << "* VERBOSE:\n"; + ss << " FORMAT = %datetime %level-%vlevel %log\n"; + ss << "* TRACE:\n"; + ss << " FORMAT = %datetime %level [%func] [%loc] %log\n"; + DEFAULT_LOGGER_CONFIGURATION = ss.str(); + } // C'tor + // + // Log level name outputs + // + const std::string LOG_INFO_LEVEL_VALUE; + const std::string LOG_DEBUG_LEVEL_VALUE; + const std::string LOG_WARNING_LEVEL_VALUE; + const std::string LOG_ERROR_LEVEL_VALUE; + const std::string LOG_FATAL_LEVEL_VALUE; + const std::string LOG_VERBOSE_LEVEL_VALUE; + const std::string LOG_QA_LEVEL_VALUE; + const std::string LOG_TRACE_LEVEL_VALUE; + // + // Format specifiers + // + const std::string APP_NAME_FORMAT_SPECIFIER; + const std::string LOGGER_ID_FORMAT_SPECIFIER; + const std::string THREAD_ID_FORMAT_SPECIFIER; + const std::string LEVEL_FORMAT_SPECIFIER; + const std::string DATE_ONLY_FORMAT_SPECIFIER; + const std::string TIME_ONLY_FORMAT_SPECIFIER; + const std::string DATE_TIME_FORMAT_SPECIFIER; + const std::string LOCATION_FORMAT_SPECIFIER; + const std::string FUNCTION_FORMAT_SPECIFIER; + const std::string USER_FORMAT_SPECIFIER; + const std::string HOST_FORMAT_SPECIFIER; + const std::string LOG_MESSAGE_FORMAT_SPECIFIER; + const std::string VERBOSE_LEVEL_FORMAT_SPECIFIER; + // + // Others + // + const std::string NULL_POINTER; + const char FORMAT_SPECIFIER_ESCAPE_CHAR; + const unsigned int MAX_LOG_PER_CONTAINER; + const unsigned int MAX_LOG_PER_COUNTER; + const unsigned int DEFAULT_MILLISECOND_OFFSET; + const int MAX_VERBOSE_LEVEL; + int CURRENT_VERBOSE_LEVEL; + const std::string PATH_SLASH; + const std::string DEFAULT_LOG_FILENAME; + std::string DEFAULT_LOGGER_CONFIGURATION; + + enum kFormatFlags { + kDateOnly = 2, + kTimeOnly = 4, + kDateTime = 8, + kLoggerId = 16, + kLocation = 32, + kFunction = 64, + kUser = 128, + kHost = 256, + kLogMessage = 512, + kVerboseLevel = 1024, + kAppName = 2048, + kThreadId = 4096 + }; +}; // class Constants +namespace threading { + +//! +//! To take care of shared resources in multi-threaded application. Used internally, you should not need it. +//! +class Mutex { +public: +#if _ELPP_ASSEMBLY_SUPPORTED +# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +# define _ELPP_MUTEX_LOCK_GNU_ASM(lf_, old_) "movl $1,%%eax\n" \ + "\txchg %%eax,%0\n" \ + "\tmovl %%eax,%1\n" \ + "\t" : "=m" (lf_), "=m" (old_) : : "%eax", "memory" +# define _ELPP_MUTEX_UNLOCK_GNU_ASM(lf_) "movl $0,%%eax\n" \ + "\txchg %%eax,%0\n" \ + "\t" : "=m" (lf_) : : "%eax", "memory" +# endif // defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + Mutex(void) : lockerFlag_(0) { + } +#else + Mutex(void) { +# if _ELPP_OS_UNIX + pthread_mutex_init(&underlyingMutex_, NULL); +# elif _ELPP_OS_WINDOWS + InitializeCriticalSection(&underlyingMutex_); +# endif // _ELPP_OS_UNIX + } + + virtual ~Mutex(void) { +# if _ELPP_OS_UNIX + pthread_mutex_destroy(&underlyingMutex_); +# elif _ELPP_OS_WINDOWS + DeleteCriticalSection(&underlyingMutex_); +# endif // _ELPP_OS_UNIX + } +#endif // _ELPP_ASSEMBLY_SUPPORTED + + inline void lock(void) { +#if _ELPP_ASSEMBLY_SUPPORTED + bool locked = false; + while (!locked) { + locked = tryLock(); + if (!locked) { +# if _ELPP_OS_UNIX + sched_yield(); +# elif _ELPP_OS_WINDOWS + Sleep(0); +# endif + } + } +#else +# if _ELPP_OS_UNIX + pthread_mutex_lock(&underlyingMutex_); +# elif _ELPP_OS_WINDOWS + EnterCriticalSection(&underlyingMutex_); +# endif // _ELPP_OS_UNIX +#endif // _ELPP_ASSEMBLY_SUPPORTED + } + + inline bool tryLock(void) { +#if _ELPP_ASSEMBLY_SUPPORTED + int oldLock_; +# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + asm volatile (_ELPP_MUTEX_LOCK_GNU_ASM(lockerFlag_, oldLock_)); +# elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + int *ptrLock = &lockerFlag_; + __asm { + mov eax,1 + mov ecx,ptrLock + xchg eax,[ecx] + mov oldLock_,eax + } +# endif // defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + return (oldLock_ == 0); +#else +# if _ELPP_OS_UNIX + return (pthread_mutex_trylock(&underlyingMutex_) == 0) ? true : false; +# elif _ELPP_OS_WINDOWS + return TryEnterCriticalSection(&underlyingMutex_) ? true : false; +# endif // _ELPP_OS_UNIX +#endif // _ELPP_ASSEMBLY_SUPPORTED + } + + inline void unlock(void) { +#if _ELPP_ASSEMBLY_SUPPORTED +# if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) + asm volatile (_ELPP_MUTEX_UNLOCK_GNU_ASM(lockerFlag_)); +# elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) + int *ptrLock = &lockerFlag_; + __asm { + mov eax,0 + mov ecx,ptrLock + xchg eax,[ecx] + } +# endif // defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) +#else +# if _ELPP_OS_UNIX + pthread_mutex_unlock(&underlyingMutex_); +# elif _ELPP_OS_WINDOWS + LeaveCriticalSection(&underlyingMutex_); +# endif // _ELPP_OS_UNIX +#endif // _ELPP_ASSEMBLY_SUPPORTED + } +private: +#if _ELPP_ASSEMBLY_SUPPORTED + int lockerFlag_; +#else +# if _ELPP_OS_UNIX + pthread_mutex_t underlyingMutex_; +# elif _ELPP_OS_WINDOWS + CRITICAL_SECTION underlyingMutex_; +# endif // _ELPP_OS_UNIX +#endif // _ELPP_ASSEMBLY_SUPPORTED +}; // class Mutex +//! +//! Scoped mutex that works same as C++11 std::lock_guard. Used internally, you should not use it. +//! +class ScopedLock : private internal::NoCopy { +public: + explicit ScopedLock(Mutex& m_) { + mutex_ = &m_; + mutex_->lock(); + } + + virtual ~ScopedLock(void) { + mutex_->unlock(); + } +private: + Mutex* mutex_; + ScopedLock(void); +}; // class ScopedLock + +//! +//! \return ID of current thread. If std::thread is available it uses get_id() otherwise if on windows it uses +//! GetCurrentThreadId() otherwise empty string. Used internally, you should not use it. +//! +inline std::string getCurrentThreadId(void) { + std::stringstream ss; +#if (_ELPP_STD_THREAD_AVAILABLE) + ss << std::this_thread::get_id(); +#else +# if (_ELPP_OS_WINDOWS) + ss << GetCurrentThreadId(); +# endif // (_ELPP_OS_WINDOWS) +#endif + return ss.str(); +} + +} // namespace threading +namespace utilities { + +template +inline void safeDelete(T*& pointer, bool checkNullity = true) { + if (checkNullity && pointer == NULL) return; + delete pointer; + pointer = NULL; +} + +//! +//! String utilities class used internally. You should not use it. +//! +class StringUtils : private internal::StaticClass { +public: + static inline std::string trim(const std::string &str) { + std::size_t s = str.find_first_not_of(" \n\r\t"); + std::size_t e = str.find_last_not_of(" \n\r\t"); + if ((s == std::string::npos) || (e == std::string::npos)) { + return ""; + } + else { + return str.substr(s, e - s + 1); + } + } + + static inline bool startsWith(const std::string& str, const std::string& start) { + return (str.length() >= start.length()) && (str.compare(0, start.length(), start) == 0); + } + + static inline bool endsWith(const std::string& str, const std::string& end) { + return (str.length() >= end.length()) && (str.compare(str.length() - end.length(), end.length(), end) == 0); + } + + static inline std::vector& split(const std::string& s, char delim, std::vector& elems) { + std::stringstream ss(s); + std::string item; + while (std::getline(ss, item, delim)) { + elems.push_back(item); + } + return elems; + } + + static inline std::string replaceAll(const std::string& str, const std::string& replaceWhat, const std::string& replaceWith) { + if (replaceWhat == replaceWith) + return str; + std::string result = str; + std::size_t foundAt = std::string::npos; + while ((foundAt = result.find(replaceWhat)) != std::string::npos) { + result.replace(foundAt, replaceWhat.length(), replaceWith); + } + return result; + } + + static inline std::string stripAllWhiteSpaces(const std::string& str) { + std::string result = replaceAll(str, " ", ""); + result = replaceAll(result, "\n", ""); + result = replaceAll(result, "\r", ""); + result = replaceAll(result, "\t", ""); + return result; + } + + static inline void tolower(std::string& str) { + std::transform(str.begin(), str.end(), str.begin(), ::tolower); + } +}; + +//! +//! Operating System utilities class used internally. You should not use it. +//! +class OSUtils : private internal::StaticClass { +public: +#if _ELPP_OS_WINDOWS + static const char* getWindowsEnvironmentVariable(const char* variableName) { + const DWORD bufferLen = 50; + static char buffer[bufferLen]; + if (GetEnvironmentVariableA(variableName, buffer, bufferLen)) { + return buffer; + } + return NULL; + } +#endif // _ELPP_OS_WINDOWS +#if _ELPP_NDK + static std::string getProperty(const char* prop) { + char propVal[PROP_VALUE_MAX + 1]; + __system_property_get(prop, propVal); + return std::string(propVal); + } + + static std::string getDeviceName(void) { + std::stringstream ss; + std::string manufacturer = getProperty("ro.product.manufacturer"); + std::string model = getProperty("ro.product.model"); + if (manufacturer.empty() && model.empty()) { + return std::string(); + } + ss << manufacturer << " " << model; + return ss.str(); + } +#endif // _ELPP_NDK + // Runs command on terminal and returns the output. + // This is applicable only on linux and mac, for all other OS, an empty string is returned. + static const std::string getBashOutput(const char* command_) { + if (command_ == NULL) { + return std::string(); + } +#if _ELPP_OS_UNIX && !_ELPP_NDK + FILE* proc = NULL; + if ((proc = popen(command_, "r")) == NULL) { + std::cerr << "\nUnable to run command [" << command_ << "]" << std::endl; + return std::string(); + } + char hBuff[4096]; + if (fgets(hBuff, sizeof(hBuff), proc) != NULL) { + pclose(proc); + if (hBuff[strlen(hBuff) - 1] == '\n') { + hBuff[strlen(hBuff) - 1] = '\0'; + } + return std::string(hBuff); + } + return std::string(); +#else + return std::string(); +#endif // _ELPP_OS_UNIX + } + + static std::string getEnvironmentVariable(const char* variableName, const char* defaultVal, const char* alternativeBashCommand = NULL) { +#if _ELPP_OS_UNIX + const char* val = getenv(variableName); +#elif _ELPP_OS_WINDOWS + const char* val = getWindowsEnvironmentVariable(variableName); +#endif // _ELPP_OS_UNIX + if ((val == NULL) || ((strcmp(val, "") == 0))) { +#if _ELPP_OS_UNIX + // Try harder on unix-based systems + std::string valBash = internal::utilities::OSUtils::getBashOutput(alternativeBashCommand); + if (valBash.empty()) { + return std::string(defaultVal); + } else { + return valBash; + } +#elif _ELPP_OS_WINDOWS + return std::string(defaultVal); +#endif // _ELPP_OS_WINDOWS + } + return std::string(val); + } + + // Gets current username. + static const std::string currentUser(void) { +#if _ELPP_OS_UNIX && !_ELPP_NDK + return getEnvironmentVariable("USER", "user", "whoami"); +#elif _ELPP_OS_WINDOWS + return getEnvironmentVariable("USERNAME", "user"); +#elif _ELPP_NDK + return std::string("android"); +#else + return std::string(); +#endif // _ELPP_OS_UNIX + } + + // Gets current host name or computer name. + static const std::string currentHost(void) { +#if _ELPP_OS_UNIX && !_ELPP_NDK + return getEnvironmentVariable("HOSTNAME", "unknown-host", "hostname"); +#elif _ELPP_OS_WINDOWS + return getEnvironmentVariable("COMPUTERNAME", "unknown-host"); +#elif _ELPP_NDK + return getDeviceName(); +#else + return std::string(); +#endif // _ELPP_OS_UNIX + } + + // Determines whether or not provided path_ exist in current file system + static inline bool pathExists(const char* path_) { + if (path_ == NULL) { + return false; + } +#if _ELPP_OS_UNIX + struct stat st; + return (stat(path_, &st) == 0); +#elif _ELPP_OS_WINDOWS + DWORD fileType = GetFileAttributesA(path_); + if (fileType == INVALID_FILE_ATTRIBUTES) { + return false; + } + return (fileType & FILE_ATTRIBUTE_DIRECTORY) == 0 ? false : true; +#endif // _ELPP_OS_UNIX + } + + // Creates path as specified + static bool createPath(const std::string& path_) { + if (path_.empty()) { + return false; + } + if (internal::utilities::OSUtils::pathExists(path_.c_str())) { + return true; + } +#if _ELPP_OS_UNIX + const char* pathDelim_ = "/"; +#elif _ELPP_OS_WINDOWS + char pathDelim_[] = "\\"; +#endif // _ELPP_OS_UNIX + int status = -1; + + char* currPath_ = const_cast(path_.c_str()); + std::string buildingPath_ = std::string(); +#if _ELPP_OS_UNIX + if (path_[0] == '/') { + buildingPath_ = "/"; + } + currPath_ = STRTOK(currPath_, pathDelim_, 0); +#elif _ELPP_OS_WINDOWS + // Use secure functions API + char* nextTok_; + currPath_ = STRTOK(currPath_, pathDelim_, &nextTok_); +#endif // _ELPP_OS_UNIX + while (currPath_ != NULL) { + buildingPath_.append(currPath_); + buildingPath_.append(pathDelim_); +#if _ELPP_OS_UNIX + status = mkdir(buildingPath_.c_str(), _LOG_PERMS); + currPath_ = STRTOK(NULL, pathDelim_, 0); +#elif _ELPP_OS_WINDOWS + status = _mkdir(buildingPath_.c_str()); + currPath_ = STRTOK(NULL, pathDelim_, &nextTok_); +#endif // _ELPP_OS_UNIX + } + if (status == -1) { + return false; + } + return true; + } + + static std::string getPathFromFilename(const std::string& fullPath_, internal::Constants* constants_) { + if (fullPath_ == "" || fullPath_.find(constants_->PATH_SLASH) == std::string::npos) { + return fullPath_; + } + std::size_t lastSlashAt = fullPath_.find_last_of(constants_->PATH_SLASH); + if (lastSlashAt == 0) { + return constants_->PATH_SLASH; + } + return fullPath_.substr(0, lastSlashAt + 1); + } +}; // class OSUtils + +//! +//! Contains static functions related to log manipulation used internally. You should not use it. +//! +class LogManipulator : private internal::StaticClass { +public: + // Updates the formatSpecifier_ for currentFormat_ to value_ provided + static void updateFormatValue(const std::string& formatSpecifier_, + const std::string& value_, std::string& currentFormat_, + internal::Constants* constants_) { + std::size_t foundAt = std::string::npos; + while ((foundAt = currentFormat_.find(formatSpecifier_, foundAt + 1)) != std::string::npos){ + if (currentFormat_[foundAt > 0 ? foundAt - 1 : 0] == constants_->FORMAT_SPECIFIER_ESCAPE_CHAR) { + currentFormat_.erase(foundAt > 0 ? foundAt - 1 : 0, 1); + ++foundAt; + } else { + currentFormat_ = currentFormat_.replace(foundAt, formatSpecifier_.size(), value_); + return; + } + } + } +}; // class LogManipulator + +//! +//! Contains utility functions related to date/time used internally. You should not use it. +//! +class DateUtils : private internal::StaticClass { +public: +#if _ELPP_OS_WINDOWS + static void gettimeofday(struct timeval *tv) { + if (tv != NULL) { +# if defined(_MSC_EXTENSIONS) + const unsigned __int64 delta_ = 11644473600000000Ui64; +# else + const unsigned __int64 delta_ = 11644473600000000ULL; +# endif // defined(_MSC_EXTENSIONS) + const double secOffSet = 0.000001; + const unsigned long usecOffSet = 1000000; + FILETIME fileTime_; + GetSystemTimeAsFileTime(&fileTime_); + unsigned __int64 present_ = 0; + present_ |= fileTime_.dwHighDateTime; + present_ = present_ << 32; + present_ |= fileTime_.dwLowDateTime; + present_ /= 10; // mic-sec + // Subtract the difference + present_ -= delta_; + tv->tv_sec = static_cast(present_ * secOffSet); + tv->tv_usec = static_cast(present_ % usecOffSet); + } + } +#endif // _ELPP_OS_WINDOWS + + // Gets current date and time with milliseconds. + static std::string getDateTime(const std::string& bufferFormat_, unsigned int type_, internal::Constants* constants_, std::size_t milliSecondOffset_ = 1000) { + long milliSeconds = 0; + const int kDateBuffSize_ = 30; + char dateBuffer_[kDateBuffSize_] = ""; + char dateBufferOut_[kDateBuffSize_] = ""; +#if _ELPP_OS_UNIX + bool hasTime_ = ((type_ & constants_->kDateTime) || (type_ & constants_->kTimeOnly)); + timeval currTime; + gettimeofday(&currTime, NULL); + if (hasTime_) { + milliSeconds = currTime.tv_usec / milliSecondOffset_ ; + } + struct tm * timeInfo = localtime(&currTime.tv_sec); + strftime(dateBuffer_, sizeof(dateBuffer_), bufferFormat_.c_str(), timeInfo); + if (hasTime_) { + SPRINTF(dateBufferOut_, "%s.%03ld", dateBuffer_, milliSeconds); + } else { + SPRINTF(dateBufferOut_, "%s", dateBuffer_); + } +#elif _ELPP_OS_WINDOWS + const char* kTimeFormatLocal_ = "HH':'mm':'ss"; + const char* kDateFormatLocal_ = "dd/MM/yyyy"; + if ((type_ & constants_->kDateTime) || (type_ & constants_->kDateOnly)) { + if (GetDateFormatA(LOCALE_USER_DEFAULT, 0, 0, kDateFormatLocal_, dateBuffer_, kDateBuffSize_) != 0) { + SPRINTF(dateBufferOut_, "%s", dateBuffer_); + } + } + if ((type_ & constants_->kDateTime) || (type_ & constants_->kTimeOnly)) { + if (GetTimeFormatA(LOCALE_USER_DEFAULT, 0, 0, kTimeFormatLocal_, dateBuffer_, kDateBuffSize_) != 0) { + milliSeconds = static_cast(GetTickCount()) % milliSecondOffset_; + if (type_ & constants_->kDateTime) { + SPRINTF(dateBufferOut_, "%s %s.%03ld", dateBufferOut_, dateBuffer_, milliSeconds); + } else { + SPRINTF(dateBufferOut_, "%s.%03ld", dateBuffer_, milliSeconds); + } + } + } +#endif // _ELPP_OS_UNIX + return std::string(dateBufferOut_); + } + + static std::string formatMilliSeconds(double milliSeconds_) { + double result = milliSeconds_; + std::string unit = "ms"; + std::stringstream stream_; + if (result > 1000.0f) { + result /= 1000; unit = "seconds"; + if (result > 60.0f) { + result /= 60; unit = "minutes"; + if (result > 60.0f) { + result /= 60; unit = "hours"; + if (result > 24.0f) { + result /= 24; unit = "days"; + } + } + } + } + stream_ << result << " " << unit; + return stream_.str(); + } + + static inline double getTimeDifference(const timeval& endTime_, const timeval& startTime_) { + return static_cast((((endTime_.tv_sec - startTime_.tv_sec) * 1000000) + (endTime_.tv_usec - startTime_.tv_usec)) / 1000); + } +}; // class DateUtils +} // namespace utilities + +//! +//! Internal repository base to manage memory on heap. Used internally, you should not use it. +//! +template +class Registry { +public: + Registry(void) { + } + + virtual ~Registry(void) { + unregisterAll(); + } + + Registry(const Registry& other_) { + if (this != &other_) { + unregisterAll(); + for (std::size_t i = 0; i < other_.list_.size(); ++i) { + Class* curr_ = other_.list_.at(i); + if (curr_) { + list_.push_back(new Class(*curr_)); + } + } + } + } + + Registry& operator=(const Registry& other_) { + if (this == &other_) { + return *this; + } + unregisterAll(); + for (std::size_t i = 0; i < other_.list_.size(); ++i) { + Class* curr_ = other_.list_.at(i); + if (curr_) { + list_.push_back(new Class(*curr_)); + } + } + return *this; + } + + inline void registerNew(Class* c_) { + list_.push_back(c_); + } + + bool operator!=(const Registry& other_) { + if (list_.size() != other_.list_.size()) { + return true; + } + for (std::size_t i = 0; i < list_.size(); ++i) { + if (list_.at(i) != other_.list_.at(i)) { + return true; + } + } + return false; + } + + bool operator==(const Registry& other_) { + if (list_.size() != other_.list_.size()) { + return false; + } + for (std::size_t i = 0; i < list_.size(); ++i) { + if (list_.at(i) != other_.list_.at(i)) { + return false; + } + } + return true; + } + + template + Class* get(const T& t_) { + Iterator iter = std::find_if(list_.begin(), list_.end(), Predicate(t_)); + if (iter != list_.end() && *iter != NULL) { + return *iter; + } + return NULL; + } + + template + Class* get(const T& t_, const T2& t2_) { + Iterator iter = std::find_if(list_.begin(), list_.end(), Predicate(t_, t2_)); + if (iter != list_.end() && *iter != NULL) { + return *iter; + } + return NULL; + } + + template + inline bool exist(const T& t_) { + return (get(t_) != NULL); + } + + inline std::size_t count(void) const { + return list_.size(); + } + + inline bool empty(void) const { + return list_.empty(); + } + + Class* at(std::size_t i) const { + return list_.at(i); + } + +protected: + typedef typename std::vector::iterator Iterator; + + inline void unregisterAll(void) { + if (!empty()) { + std::for_each(list_.begin(), list_.end(), std::bind1st(std::mem_fun(&Registry::release), this)); + list_.clear(); + } + } + + inline void unregister(Class*& c_) { + if (c_) { + Iterator iter = list_.begin(); + for (; iter != list_.end(); ++iter) { + if (c_ == *iter) { + break; + } + } + if (iter != list_.end() && *iter != NULL) { + list_.erase(iter); + internal::utilities::safeDelete(c_); + } + } + } + + inline std::vector& list(void) { + return list_; + } +private: + std::vector list_; + + inline void release(Class* c_) { + internal::utilities::safeDelete(c_); + } +}; // class Registry + +//! +//! Scoped pointer used internally. You should not use it. +//! +template +class ScopedPointer { +public: + explicit ScopedPointer(T* ptr_ = 0) : + ptr_(ptr_), referenceCounter_(0) { + referenceCounter_ = new ReferenceCounter(); + referenceCounter_->increment(); + } + + ScopedPointer(const ScopedPointer& scopedPointer_) : + ptr_(scopedPointer_.ptr_), referenceCounter_(scopedPointer_.referenceCounter_) { + referenceCounter_->increment(); + } + + ScopedPointer& operator=(const ScopedPointer& other_) { + if (this != &other_) + { + validate(); + ptr_ = other_.ptr_; + referenceCounter_ = other_.referenceCounter_; + referenceCounter_->increment(); + } + return *this; + } + + virtual ~ScopedPointer(void) { + validate(); + } + + T& operator*(void) { + return *ptr_; + } + + T* operator->(void) { + return ptr_; + } + + T* pointer(void) { + return ptr_; + } + + class ReferenceCounter { + public: + ReferenceCounter(void) : count_(0) { + } + + ReferenceCounter& operator=(const ReferenceCounter& other_) { + if (this != &other_) { + count_ = other_.count_; + } + return *this; + } + + void increment(void) { + ++count_; + } + + int decrement(void) { + return this == NULL ? 0 : --count_; + } + + private: + int count_; + }; +private: + T* ptr_; + ReferenceCounter* referenceCounter_; + + void validate(void) { + if(referenceCounter_->decrement() == 0) { + internal::utilities::safeDelete(ptr_, false); + internal::utilities::safeDelete(referenceCounter_, false); + } + } +}; + +//! +//! Class that represents single configuration. +//! +//! Single configuration has a level (easyloggingpp::Level), type (easyloggingpp::ConfigurationType) +//! and std::string based value. This value is later parsed into more appropriate data type depending on +//! type +//! +class Configuration { +public: + //! + //! Full constructor used to set initial value of configuration + //! \param level_ + //! \param type_ + //! \param value_ + //! + Configuration(unsigned int level_, unsigned int type_, const std::string& value_) : + level_(level_), + type_(type_), + value_(value_) { + } + + //! + //! \return Level of current configuration + //! + unsigned int level(void) const { + return level_; + } + + //! + //! \return Configuration type of current configuration + //! + unsigned int type(void) const { + return type_; + } + + //! + //! \return String based configuration value + //! + std::string value(void) const { + return value_; + } + + //! + //! Set string based configuration value + //! \param value_ Value to set. Values have to be std::string; For boolean values use "true", "false", for any integral values + //! use them in quotes. They will be parsed when configuring + //! + void setValue(const std::string& value_) { + this->value_ = value_; + } + + //! + //! Predicate used to find configuration from configuration repository. This is used internally. + //! + class Predicate { + public: + Predicate(unsigned int level_, unsigned int type_) : + level_(level_), + type_(type_) { + } + + bool operator()(const Configuration* conf_) { + return ((conf_ != NULL) && (conf_->level() == level_) && (conf_->type() == type_)); + } + + private: + unsigned int level_; + unsigned int type_; + }; +private: + unsigned int level_; + unsigned int type_; + std::string value_; +}; + +} // namespace internal + +//! +//! Configuration repository that represents configuration for single logger +//! +class Configurations : public internal::Registry { +public: + //! + //! Default constructor + //! + Configurations(void) : + isFromFile_(false) { + } + + //! + //! Constructor used to set configurations via configuration file + //! \param configurationFile_ Full path to configuration file + //! \param base_ Configurations to base new configuration repository off. This value is used when you want to use + //! existing Configurations to base all the values and then set rest of configuration via configuration file. + //! + Configurations(const std::string& configurationFile_, Configurations* base_ = NULL) : + configurationFile_(configurationFile_), + isFromFile_(false) { + parseFromFile(configurationFile_, base_); + } + + //! + //! Set configurations based on other configurations + //! \param base_ Pointer to existing configurations. + //! + inline void setFromBase(Configurations* base_) { + if (base_ == NULL || base_ == this) return; + std::for_each(base_->list().begin(), base_->list().end(), std::bind1st(std::mem_fun(&Configurations::set), this)); + } + + //! + //! Checks to see whether specified configuration type exist in this repository + //! \param configurationType_ Configuration type to check against. Use easyloggingpp::ConfigurationType to prevent confusions + //! \return True if exist, false otherwise + //! + inline bool contains(unsigned int configurationType_) { + ELPP_FOR_EACH_CONFIGURATION(i, ConfigurationType::kMinValid, + if (get(i, configurationType_) != NULL) { + return true; + } + ); + return false; + } + + //! + //! Sets configuration for specified level_ and configurationType_. If configuration already exists for specified + //! level and configuration type, value just gets updated. + //! Remember, it is not recommended to set skip_ELPPALL_Check to false unless you know exactly what you are doing + //! \param level_ Level to set configuration for. Use easyloggingpp::Level to prevent confusion + //! \param configurationType_ Configuration type to set configuration against. Use easyloggingpp::ConfigurationType to prevent confusion + //! \param value_ String based configuration value + //! \param skipLEVEL_ALL Determines whether to skip 'easyloggingpp::Level::All'. This is skipped by default because setting + //! 'All' may override configuration. Be careful with this. + //! + void set(unsigned int level_, unsigned int configurationType_, const std::string& value_, bool skipLEVEL_ALL = false) { + if (value_ == "") return; // ignore empty values + if ((configurationType_ == ConfigurationType::PerformanceTracking && level_ != Level::All) || + (configurationType_ == ConfigurationType::MillisecondsWidth && level_ != Level::All)) { + // configurationType_ not applicable for this level_ + return; + } + internal::Configuration* conf_ = get(level_, configurationType_); + if (conf_ == NULL) { + registerNew(new internal::Configuration(level_, configurationType_, value_)); + } else { + // Configuration already there, just update the value! + conf_->setValue(value_); + } + if (!skipLEVEL_ALL && level_ == Level::All) { + setAll(configurationType_, value_, true); + } + } + + //! + //! Parse configuration from file. + //! \param configurationFile_ + //! \param base_Configurations to base new configuration repository off. This value is used when you want to use + //! existing Configurations to base all the values and then set rest of configuration via configuration file. + //! \return True if successfully parsed, false otherwise. You may define '_STOP_ON_FIRST_ELPP_ASSERTION' to make sure you + //! do not proceed without successful parse. + //! + bool parseFromFile(const std::string& configurationFile_, Configurations* base_ = NULL) { + setFromBase(base_); + std::ifstream fileStream_(configurationFile_.c_str(), std::ifstream::in); + __EASYLOGGINGPP_ASSERT(fileStream_.is_open(), "Unable to open configuration file [" << configurationFile_ << "] for parsing."); + bool parsedSuccessfully_ = false; + std::string line = std::string(); + unsigned int currLevel = 0; + while (fileStream_.good()) { + std::getline(fileStream_, line); + parsedSuccessfully_ = Parser::parseLine(line, currLevel, this); + __EASYLOGGINGPP_ASSERT(parsedSuccessfully_, "Unable to parse configuration line: " << line); + } + isFromFile_ = true; + return parsedSuccessfully_; + } + + //! + //! Parse configurations from configuration string. This configuration string has same syntax as configuration file contents. Make + //! sure all the necessary new line characters are provided. + //! \param configurationsString + //! \return True if successfully parsed, false otherwise. You may define '_STOP_ON_FIRST_ELPP_ASSERTION' to make sure you + //! do not proceed without successful parse. + //! + bool parseFromText(const std::string& configurationsString) { + bool parsedSuccessfully_ = false; + std::string line = std::string(); + unsigned int currLevel = 0; + std::vector lines; + internal::utilities::StringUtils::split(configurationsString, '\n', lines); + for (std::size_t i = 0; i < lines.size(); ++i) { + line = lines.at(i); + parsedSuccessfully_ = Parser::parseLine(line, currLevel, this); + __EASYLOGGINGPP_ASSERT(parsedSuccessfully_, "Unable to parse configuration line: " << line); + } + isFromFile_ = false; + return parsedSuccessfully_; + } + + //! + //! Sets configurations to default configurations set by easylogging++. + //! NOTE: This has nothing to do with Loggers::setDefaultConfigurations - thats completely different thing. This is + //! library's own default format. + //! + void setToDefault(void) { + setAll(ConfigurationType::Enabled, "true"); +#if _ELPP_OS_UNIX +# if _ELPP_NDK + setAll(ConfigurationType::Filename, "/data/local/tmp/myeasylog.txt"); +# else + setAll(ConfigurationType::Filename, "/tmp/logs/myeasylog.log"); +# endif // _ELPP_NDK +#elif _ELPP_OS_WINDOWS + setAll(ConfigurationType::Filename, "logs\\myeasylog.log"); +#endif // _ELPP_OS_UNIX + setAll(ConfigurationType::ToFile, "true"); + setAll(ConfigurationType::ToStandardOutput, "true"); + setAll(ConfigurationType::MillisecondsWidth, "3"); + setAll(ConfigurationType::PerformanceTracking, "false"); + setAll(easyloggingpp::ConfigurationType::RollOutSize, "0"); + setAll(ConfigurationType::Format, "%datetime %level [%logger] %log"); + set(Level::Debug, ConfigurationType::Format, "%datetime %level [%logger] [%user@%host] [%func] [%loc] %log"); + // INFO and WARNING are set to default by Level::ALL + set(Level::Error, ConfigurationType::Format, "%datetime %level [%logger] %log"); + set(Level::Fatal, ConfigurationType::Format, "%datetime %level [%logger] %log"); + set(Level::Verbose, ConfigurationType::Format, "%datetime %level-%vlevel [%logger] %log"); + set(Level::QA, ConfigurationType::Format, "%datetime %level [%logger] %log"); + set(Level::Trace, ConfigurationType::Format, "%datetime %level [%logger] [%func] [%loc] %log"); + } + + //! + //! Sets configuration for all levels. + //! Remember, it is not recommended to set skip_ELPPALL_Check to false unless you know exactly what you are doing + //! \param configurationType_ + //! \param value_ + //! \param skipLEVEL_ALL Determines whether to skip 'easyloggingpp::Level::All'. This is skipped by default because setting + //! 'All' may override configuration. Be careful with this. + //! + inline void setAll(unsigned int configurationType_, const std::string& value_, bool skipLEVEL_ALL = false) { + if (!skipLEVEL_ALL) { + set(Level::All, configurationType_, value_); + } + ELPP_FOR_EACH_LEVEL(i, Level::Debug, + set(i, configurationType_, value_); + ); + } + + //! + //! Clears the repository. + //! All the configurations are maintained on heap for faster access so if you are sure you will not use this + //! repository and you have configured all the loggers against this or you have used this configuration for all the + //! purposes you need it for, you may retain memory by using this method. If you do not do this, internal memory management + //! does it itself at the end of application execution. + //! + inline void clear(void) { + unregisterAll(); + } + + //! + //! \return Returns configuration file used in parsing this configurations. If this repository was set manually or by text + //! this returns empty string. + //! + std::string configurationFile(void) const { + return configurationFile_; + } + + //! + //! Parser used internally to parse configurations from file or text. You should not need this unless you are working on + //! some tool for EasyLogging++ + //! + class Parser : private internal::StaticClass { + public: + static void ignoreComments(std::string& line) { + std::size_t foundAt = 0; + std::size_t quotesStart = line.find("\""); + std::size_t quotesEnd = std::string::npos; + if (quotesStart != std::string::npos) { + quotesEnd = line.find("\"", quotesStart + 1); + } + if ((foundAt = line.find("//")) != std::string::npos) { + if (foundAt < quotesEnd) { + foundAt = line.find("//", quotesEnd + 1); + } + line = line.substr(0, foundAt); + } + } + + static inline bool isLevel(const std::string& line) { + return internal::utilities::StringUtils::startsWith(line, "*"); + } + + static inline bool isConfig(const std::string& line) { + std::size_t assignment = line.find('='); + return line != "" && + (line[0] >= 65 || line[0] <= 90 || line[0] >= 97 || line[0] <= 122) && + (assignment != std::string::npos) && + (line.size() > assignment); + } + + static inline bool isComment(const std::string& line) { + return internal::utilities::StringUtils::startsWith(line, "//"); + } + + static bool parseLine(std::string& line, unsigned int& currLevel, Configurations* conf) { + std::string currLevelStr = std::string(); + unsigned int currConfig = 0; + std::string currConfigStr = std::string(); + std::string currValue = std::string(); + line = internal::utilities::StringUtils::trim(line); + if (isComment(line)) return true; + ignoreComments(line); + if (line == "") { + // Comment ignored + return true; + } + if (isLevel(line)) { + currLevelStr = internal::utilities::StringUtils::stripAllWhiteSpaces(line); + if (currLevelStr.size() <= 2) { + return true; + } + currLevelStr = currLevelStr.substr(1, currLevelStr.size() - 2); + internal::utilities::StringUtils::tolower(currLevelStr); + currLevel = Level::convertFromString(currLevelStr); + return true; + } + if (isConfig(line)) { + std::size_t assignment = line.find('='); + currConfigStr = line.substr(0, assignment); + currConfigStr = internal::utilities::StringUtils::stripAllWhiteSpaces(currConfigStr); + internal::utilities::StringUtils::tolower(currConfigStr); + currConfig = ConfigurationType::convertFromString(currConfigStr); + currValue = line.substr(assignment + 1); + currValue = internal::utilities::StringUtils::trim(currValue); + std::size_t quotesStart = currValue.find("\"", 0); + std::size_t quotesEnd = std::string::npos; + if (quotesStart != std::string::npos) { + quotesEnd = currValue.find("\"", quotesStart + 1); + } + if (quotesStart != std::string::npos && quotesEnd != std::string::npos) { + // Quote provided - check and strip if valid + __EASYLOGGINGPP_ASSERT((quotesStart < quotesEnd), "Configuration error - No ending quote found in [" << currConfigStr << "]"); + __EASYLOGGINGPP_ASSERT((quotesStart + 1 != quotesEnd), "Empty configuration value for [" << currConfigStr << "]"); + if ((quotesStart != quotesEnd) && (quotesStart + 1 != quotesEnd)) { + // Explicit check in case if assertion is disabled + currValue = currValue.substr(quotesStart + 1, quotesEnd - 1); + } + } + } + __EASYLOGGINGPP_ASSERT(currLevel != Level::Unknown, "Unrecognized severity level [" << currLevelStr << "]"); + __EASYLOGGINGPP_ASSERT(currConfig != ConfigurationType::Unknown, "Unrecognized configuration [" << currConfigStr << "]"); + if (currLevel == Level::Unknown || currConfig == ConfigurationType::Unknown) { + return false; // unrecognizable level or config + } + conf->set(currLevel, currConfig, currValue); + return true; + } + }; // class Parser +private: + std::string configurationFile_; + bool isFromFile_; + internal::threading::Mutex mutex_; + + inline void set(internal::Configuration* conf_) { + if (conf_ == NULL) return; + set(conf_->level(), conf_->type(), conf_->value()); + } +}; // class Configurations + +class Loggers; // fwd declaration + +namespace internal { + +class RegisteredLoggers; // fwd declaration +class Writer; // fwd declaration + +//! +//! Configuration map used internally for faster access of configuration while executing. +//! +template +class ConfigurationMap { +public: + typedef typename std::pair Entry; + + ConfigurationMap(void) { + table = new Entry*[Level::kMaxValid + 1]; + for (unsigned int i = 0; i < (Level::kMaxValid + 1); ++i) { + table[i] = NULL; + } + count = 0; + } + + const T& get(unsigned int level_, bool forceGetLevel = false) { + if (forceGetLevel || table[level_] != NULL) { + if (table[level_] == NULL) { + return default_; + } + return table[level_]->second; + } else if (table[Level::All] != NULL) { + return table[Level::All]->second; + } + return default_; + } + + void set(unsigned int level_, const T& value) { + // Unset any existing value for this level + unset(level_); + table[level_] = new Entry(level_, value); + ++count; + } + + void unset(unsigned int level_) { + if (table[level_] != NULL) { + internal::utilities::safeDelete(table[level_]); + if (count > 0) + --count; + } + } + + inline bool exist(unsigned int level_) const { + return table[level_] != NULL; + } + + inline bool exist(unsigned int level_, const T& value) { + return get(level_, true) == value; + } + + void clear(void) { + for (unsigned int i = 0; i < (Level::kMaxValid + 1); ++i) { + internal::utilities::safeDelete(table[i]); + } + delete[] table; + count = 0; + } + + virtual ~ConfigurationMap(void) { + clear(); + } + + inline void setDefault(const T& default_) { + this->default_ = default_; + } + + inline std::size_t size(void) const { + return count; + } +private: + Entry** table; + std::size_t count; + T default_; +}; + +//! +//! Configurations used internally that defines data type of each configuration from easyloggingpp::ConfigurationType +//! +class TypedConfigurations { +public: + TypedConfigurations(const Configurations& configurations, internal::Constants* constants_) { + this->constants_ = constants_; + this->configurations_ = configurations; + enabledMap_.setDefault(false); + toFileMap_.setDefault(false); + toStandardOutputMap_.setDefault(false); + filenameMap_.setDefault(""); + logFormatMap_.setDefault(""); + dateFormatMap_.setDefault(""); + dateFormatSpecifierMap_.setDefault(""); + millisecondsWidthMap_.setDefault(3); + performanceTrackingMap_.setDefault(false); + fileStreamMap_.setDefault(NULL); + formatFlagMap_.setDefault(0x0); + rollOutSizeMap_.setDefault(0); + parse(configurations); + } + + virtual ~TypedConfigurations(void) { + deleteFileStreams(); + } + + const Configurations& configurations(void) const { + return configurations_; + } +private: + internal::ConfigurationMap enabledMap_; + internal::ConfigurationMap toFileMap_; + internal::ConfigurationMap filenameMap_; + internal::ConfigurationMap toStandardOutputMap_; + internal::ConfigurationMap logFormatMap_; + internal::ConfigurationMap dateFormatMap_; + internal::ConfigurationMap dateFormatSpecifierMap_; + internal::ConfigurationMap millisecondsWidthMap_; + internal::ConfigurationMap performanceTrackingMap_; + internal::ConfigurationMap fileStreamMap_; + internal::ConfigurationMap formatFlagMap_; + internal::ConfigurationMap rollOutSizeMap_; + internal::Constants* constants_; + Configurations configurations_; + + friend class Writer; + friend class easyloggingpp::Loggers; + + inline bool enabled(unsigned int level_) { + return enabledMap_.get(level_); + } + + inline bool toFile(unsigned int level_) { + return toFileMap_.get(level_); + } + + inline const std::string& filename(unsigned int level_) { + return filenameMap_.get(level_); + } + + inline bool toStandardOutput(unsigned int level_) { + return toStandardOutputMap_.get(level_); + } + + inline const std::string& logFormat(unsigned int level_) { + return logFormatMap_.get(level_); + } + + inline const std::string& dateFormat(unsigned int level_) { + return dateFormatMap_.get(level_); + } + + inline const std::string& dateFormatSpecifier(unsigned int level_) { + return dateFormatSpecifierMap_.get(level_); + } + + inline int millisecondsWidth(unsigned int level_ = Level::All) { + return millisecondsWidthMap_.get(level_); + } + + inline bool performanceTracking(unsigned int level_ = Level::All) { + return performanceTrackingMap_.get(level_); + } + + inline std::fstream* fileStream(unsigned int level_) { + return fileStreamMap_.get(level_); + } + + inline std::size_t rollOutSize(unsigned int level_) { + return rollOutSizeMap_.get(level_); + } + + inline int formatFlag(unsigned int level_) { + return formatFlagMap_.get(level_); + } + + void parse(const Configurations& configurations_) { + for (std::size_t i = 0; i < configurations_.count(); ++i) { + Configuration* conf = configurations_.at(i); + switch (conf->type()) { + case ConfigurationType::Enabled: + setValue(conf->level(), getBool(conf->value()), enabledMap_); + break; + case ConfigurationType::ToFile: + setValue(conf->level(), getBool(conf->value()), toFileMap_); + break; + case ConfigurationType::Filename: + insertFilename(conf->level(), conf->value()); + break; + case ConfigurationType::ToStandardOutput: + setValue(conf->level(), getBool(conf->value()), toStandardOutputMap_); + break; + case ConfigurationType::Format: + determineFormats(conf->level(), conf->value()); + break; + case ConfigurationType::MillisecondsWidth: + if (conf->level() == Level::All) { + int origVal = getInt(conf->value()); + int msl_; +#if _ELPP_OS_UNIX + switch (origVal) { + case 3: + msl_ = 1000; + break; + case 4: + msl_ = 100; + break; + case 5: + msl_ = 10; + break; + case 6: + msl_ = 1; + break; + default: + msl_ = constants_->DEFAULT_MILLISECOND_OFFSET; + } +#elif _ELPP_OS_WINDOWS + msl_ = 1000; + __EASYLOGGINGPP_SUPPRESS_UNSED(origVal); +#endif // _ELPP_OS_UNIX + setValue(conf->level(), msl_, millisecondsWidthMap_); + } + break; + case ConfigurationType::PerformanceTracking: + if (conf->level() == Level::All) { + setValue(conf->level(), getBool(conf->value()), performanceTrackingMap_); + } + break; + case ConfigurationType::RollOutSize: + setValue(conf->level(), static_cast(getULong(conf->value())), rollOutSizeMap_); + unsigned int validLevel_ = 0; + std::string rolloutFilename_ = std::string(); + checkRollOuts(conf->level(), validLevel_, rolloutFilename_); + break; + } + } + } + + void determineFormats(unsigned int level_, const std::string& originalFormat) { + unsigned int formatSpec = 0x0; + if (originalFormat.find(constants_->APP_NAME_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kAppName; + } + if (originalFormat.find(constants_->LOGGER_ID_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kLoggerId; + } + if (originalFormat.find(constants_->THREAD_ID_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kThreadId; + } + if (originalFormat.find(constants_->LOCATION_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kLocation; + } + if (originalFormat.find(constants_->FUNCTION_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kFunction; + } + if (originalFormat.find(constants_->USER_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kUser; + } + if (originalFormat.find(constants_->HOST_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kHost; + } + if (originalFormat.find(constants_->LOG_MESSAGE_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kLogMessage; + } + if (originalFormat.find(constants_->VERBOSE_LEVEL_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kVerboseLevel; + } + if (originalFormat.find(constants_->DATE_TIME_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kDateTime; + setValue(level_, constants_->DATE_TIME_FORMAT_SPECIFIER, dateFormatSpecifierMap_); + } else if (originalFormat.find(constants_->DATE_ONLY_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kDateOnly; + setValue(level_, constants_->DATE_ONLY_FORMAT_SPECIFIER, dateFormatSpecifierMap_); + } else if (originalFormat.find(constants_->TIME_ONLY_FORMAT_SPECIFIER) != std::string::npos) { + formatSpec |= constants_->kTimeOnly; + setValue(level_, constants_->TIME_ONLY_FORMAT_SPECIFIER, dateFormatSpecifierMap_); + } +#if _ELPP_OS_UNIX + const std::string kTimeFormatLocal_ = "%H:%M:%S"; + const std::string kDateFormatLocal_ = "%d/%m/%Y"; + std::string dateFormat; + + if (formatSpec & constants_->kDateOnly) { + dateFormat = kDateFormatLocal_; + } else if (formatSpec & constants_->kTimeOnly) { + dateFormat = kTimeFormatLocal_; + } else { + std::stringstream ss; + ss << kDateFormatLocal_ << " " << kTimeFormatLocal_; + dateFormat = ss.str(); + } + setValue(level_, dateFormat, dateFormatMap_); +#endif // _ELPP_OS_UNIX + setValue(level_, formatSpec, formatFlagMap_); + // Update %level + std::string origFormatCopy = originalFormat; + switch (level_) { + case Level::Debug: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_DEBUG_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Info: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_INFO_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Warning: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_WARNING_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Error: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_ERROR_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Fatal: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_FATAL_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Verbose: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_VERBOSE_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::QA: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_QA_LEVEL_VALUE, origFormatCopy, constants_); + break; + case Level::Trace: + internal::utilities::LogManipulator::updateFormatValue(constants_->LEVEL_FORMAT_SPECIFIER, + constants_->LOG_TRACE_LEVEL_VALUE, origFormatCopy, constants_); + break; + } + setValue(level_, origFormatCopy + "\n", logFormatMap_); + } + + void deleteFileStreams(void) { + ELPP_FOR_EACH_LEVEL(i, Level::kMinValid, + removeFile(i); + ); + } + + // This is different since we need unique values + void insertFilename(unsigned int level_, const std::string& fname_, bool forceNew = false) { + std::string fnameFull = fname_; + if (internal::utilities::StringUtils::endsWith(fnameFull, constants_->PATH_SLASH)) { + fnameFull.append(constants_->DEFAULT_LOG_FILENAME); + } + std::string path_ = internal::utilities::OSUtils::getPathFromFilename(fnameFull, constants_); + if (path_.size() < fnameFull.size()) { + // Contains path - create it if it does not already exist + internal::utilities::OSUtils::createPath(path_); + } + if (filenameMap_.size() == 0) { + filenameMap_.set(Level::All, fnameFull); + std::fstream *fsAll = newFileStream(fnameFull, forceNew); + if (fsAll != NULL) { + fileStreamMap_.set(Level::All, fsAll); + } + return; + } + ELPP_FOR_EACH_LEVEL(i, Level::kMinValid, + if (filenameMap_.exist(i, fnameFull)) { + return; + } + ); + filenameMap_.set(level_, fnameFull); + // Just before we proceed and create new file stream we check for existing one on same level, + // if we have existing one, we first delete it to prevent memory leak. + std::fstream *fs = fileStreamMap_.get(level_, true); + internal::utilities::safeDelete(fs); + fileStreamMap_.unset(level_); + fs = newFileStream(fnameFull, forceNew); + if (fs != NULL) { + fileStreamMap_.set(level_, fs); + } + } + + template + void setValue(unsigned int level_, const T& value_, internal::ConfigurationMap& map_, bool skipLEVEL_ALL = false) { + if (map_.size() == 0 && !skipLEVEL_ALL) { + map_.set(Level::All, value_); + return; + } + if (map_.exist(static_cast(Level::All), value_)) { + return; + } + map_.set(level_, value_); + } + + std::fstream* newFileStream(const std::string& filename, bool forceNew = false) { + std::fstream *fs = NULL; + if (forceNew) { + fs = new std::fstream(filename.c_str(), std::fstream::out); + } else { + fs = new std::fstream(filename.c_str(), std::fstream::out | std::fstream::app); + } + if (fs->is_open()) { + fs->flush(); + } else { + internal::utilities::safeDelete(fs, false); + std::cerr << "Bad file [" << filename << "]" << std::endl; + return NULL; + } + return fs; + } + + void removeFile(unsigned int level_) { + std::fstream* fs = fileStream(level_); + if (!fs) { + return; + } + if (fs->is_open()) { + fs->close(); + } + internal::utilities::safeDelete(fs, false); + fileStreamMap_.unset(level_); + filenameMap_.unset(level_); + } + + unsigned long getULong(const std::string& confValue_) { + bool valid = true; + std::string trimmedVal = internal::utilities::StringUtils::trim(confValue_); + if (trimmedVal.size() == 0) { + valid = false; + __EASYLOGGINGPP_SUPPRESS_UNSED(valid); + __EASYLOGGINGPP_ASSERT(valid, "Configuration value not a valid integer " << trimmedVal); + } + for (std::size_t i = 0; i < trimmedVal.size(); ++i) { + if (trimmedVal[i] < 48 || trimmedVal[i] > 57) { + valid = false; + break; + } + } + __EASYLOGGINGPP_SUPPRESS_UNSED(valid); + __EASYLOGGINGPP_ASSERT(valid, "Configuration value not a valid integer " << trimmedVal); + return atol(confValue_.c_str()); + } + + inline int getInt(const std::string& confValue_) { + return static_cast(getULong(confValue_)); + } + + inline bool getBool(const std::string& confValue_) { + std::string trimmedVal = internal::utilities::StringUtils::trim(confValue_); + return (trimmedVal == "1" || trimmedVal == "true" || trimmedVal == "TRUE"); + } + + std::size_t getSizeOfFile(std::fstream *fs) { + if (!fs) { + return 0; + } + std::streampos currPos = fs->tellg(); + fs->seekg (0, fs->end); + std::size_t size = static_cast(fs->tellg()); + fs->seekg (currPos); + return size; + } + + bool checkRollOuts(unsigned int level_, unsigned int& validLevel_, std::string& fname_) { + std::fstream* fs = fileStream(level_); + std::size_t rollOutSize_ = rollOutSize(level_); + if (rollOutSize_ != 0 && getSizeOfFile(fs) >= rollOutSize_) { + fname_ = filename(level_); +#if defined(_ELPP_INTERNAL_INFO) + std::cout << "Cleaning log file [" << fname_ << "]\n"; +#endif // defined(_ELPP_INTERNAL_INFO) + // Find and reset correct level. By correct level we mean the current + // available level in fileStream because this level_ could actually be using + // configurations from Level::All and you do not want to create a brand new + // stream just because we are rolling log away + validLevel_ = findValidLevel(fileStreamMap_, level_); + forceReinitiateFile(validLevel_, fname_); + return true; + } + return false; + } + + template + inline unsigned int findValidLevel(internal::ConfigurationMap& map_, unsigned int refLevel_) { + return map_.exist(refLevel_) ? refLevel_ : static_cast(Level::All); + } + + inline void forceReinitiateFile(unsigned int level_, const std::string& filename_) { + removeFile(level_); + insertFilename(level_, filename_, true); + } +}; +} // namespace internal + +//! +//! Represents single logger used to write log. +//! +class Logger { +public: + //! + //! Minimal constructor to set logger ID and constants. You should not use this constructor manually, instead use + //! easyloggingpp::Loggers::getLogger + //! \param uniqueIdentifier_ Logger ID that you will require to get logger from logger repository + //! \param constants_ Use easyloggingpp::internal::registeredLoggers->constants() + //! + Logger(const std::string& uniqueIdentifier_, internal::Constants* constants_) : + id_(uniqueIdentifier_), + constants_(constants_), + typedConfigurations_(NULL), + stream_(new std::stringstream()) { + Configurations defaultConfs; + defaultConfs.setToDefault(); + configure(defaultConfs); + userConfigurations_ = defaultConfs; + defaultConfs.clear(); + } + + //! + //! Full constructor to set logger ID, constants and configuration. + //! \param uniqueIdentifier_ Logger ID that you will require to get logger from logger repository + //! \param constants_ Use easyloggingpp::internal::registeredLoggers->constants() + //! \param configurations Configurations to set logger against + //! + Logger(const std::string& uniqueIdentifier_, internal::Constants* constants_, const Configurations& configurations) : + id_(uniqueIdentifier_), + constants_(constants_), + typedConfigurations_(NULL), + stream_(new std::stringstream()) { + configure(configurations); + } + + virtual ~Logger(void) { + internal::utilities::safeDelete(typedConfigurations_); + internal::utilities::safeDelete(stream_); + } + + //! + //! \return Logger ID + //! + inline std::string id(void) const { + return id_; + } + + //! + //! Configures logger against specified configurations + //! \param configurations_ + //! + void configure(const Configurations& configurations_) { +#if _ELPP_ENABLE_MUTEX + internal::threading::ScopedLock slock_(mutex_); + __EASYLOGGINGPP_SUPPRESS_UNSED(slock_); +#endif // _ELPP_ENABLE_MUTEX + // Configuring uses existing configuration as starting point + // and then sets configurations_ as base to prevent losing any + // previous configurations + Configurations base_ = userConfigurations_; + if (userConfigurations_ != configurations_) { + userConfigurations_ = configurations_; + base_.setFromBase(const_cast(&configurations_)); + } + internal::utilities::safeDelete(typedConfigurations_); + typedConfigurations_ = new internal::TypedConfigurations(base_, constants_); + configured_ = true; + } + + //! + //! Reconfigures logger + //! + inline void reconfigure(void) { + configure(this->userConfigurations_); + } + + //! + //! \return Application name for this logger + //! + inline std::string applicationName(void) const { + return applicationName_; + } + + + //! + //! Application name can vary from logger to logger. For example for a library application name may be different. + //! This is whats used later when you use '%app' in log format + //! + inline void setApplicationName(const std::string& applicationName_) { + this->applicationName_ = applicationName_; + } + + //! + //! \return Configurations that this logger is set against + //! + inline Configurations& configurations(void) { + return userConfigurations_; + } + + //! + //! \return Whether or not logger is configured. + //! + inline bool configured(void) const { + return configured_; + } + + //! + //! Predicate used in logger repository to find logger. This is used internally. You should not use it. + //! + class Predicate { + public: + explicit Predicate(const std::string& id_) : + id_(id_) { + } + inline bool operator()(const Logger* logger_) { + return ((logger_ != NULL) && (logger_->id() == id_)); + } + private: + std::string id_; + }; +private: + std::string id_; + internal::Constants* constants_; + Configurations userConfigurations_; + internal::TypedConfigurations* typedConfigurations_; + std::stringstream* stream_; + std::string applicationName_; + bool configured_; + internal::threading::Mutex mutex_; + friend class internal::Writer; + friend class Loggers; + friend class internal::RegisteredLoggers; + + Logger(void); + + std::stringstream* stream(void) { + return stream_; + } + + inline void acquireLock(void) { + mutex_.lock(); + } + + inline void releaseLock(void) { + mutex_.unlock(); + } +}; + +namespace internal { +//! +//! Internal log counter used for interval logging +//! +class LogCounter : private internal::NoCopy { +public: + explicit LogCounter(internal::Constants* constants_) : + file_(""), + line_(0), + position_(1), + constants_(constants_) { + } + + LogCounter(const char* file_, + unsigned long int line_, + internal::Constants* constants_) : + file_(file_), + line_(line_), + position_(1), + constants_(constants_) { + } + + virtual ~LogCounter(void) { + } + + inline void resetLocation(const char* file_, + unsigned long int line_) { + this->file_ = file_; + this->line_ = line_; + } + + inline void reset(std::size_t n_) { + if (position_ >= constants_->MAX_LOG_PER_COUNTER) { + position_ = (n_ >= 1 ? constants_->MAX_LOG_PER_COUNTER % n_ : 0); + } + ++position_; + } + + inline const char* file(void) const { + return file_; + } + + inline unsigned long int line(void) const { + return line_; + } + + inline std::size_t position(void) const { + return position_; + } + + class Predicate { + public: + Predicate(const char* file_, unsigned long int line_) + : file_(file_), + line_(line_) { + } + inline bool operator()(const LogCounter* counter_) { + return ((counter_ != NULL) && + (counter_->file_ == file_) && + (counter_->line_ == line_)); + } + + private: + const char* file_; + unsigned long int line_; + }; +private: + const char* file_; + unsigned long int line_; + std::size_t position_; + internal::Constants* constants_; +}; // class LogCounter + +//! +//! Internal LogCounter repository +//! +class RegisteredCounters : public Registry { +public: + bool validate(const char* file_, unsigned long int line_, std::size_t n_, internal::Constants* constants_) { +#if _ELPP_ENABLE_MUTEX + internal::threading::ScopedLock slock_(mutex_); + __EASYLOGGINGPP_SUPPRESS_UNSED(slock_); +#endif // _ELPP_ENABLE_MUTEX + bool result_ = false; + internal::LogCounter* counter_ = get(file_, line_); + if (counter_ == NULL) { + registerNew(counter_ = new internal::LogCounter(file_, line_, constants_)); + } + if (n_ >= 1 && counter_->position() != 0 && counter_->position() % n_ == 0) { + result_ = true; + } + counter_->reset(n_); + return result_; + } +private: + internal::threading::Mutex mutex_; +}; // class RegisteredCounters + +//! +//! Internal logger repository. You should not access functionalities directly, you should use easyloggingpp::Loggers instead +//! +class RegisteredLoggers : public internal::Registry { +public: + RegisteredLoggers(void) : + constants_(new internal::Constants()), + username_(internal::utilities::OSUtils::currentUser()), + hostname_(internal::utilities::OSUtils::currentHost()), + counters_(new internal::RegisteredCounters()) { + defaultConfigurations_.setToDefault(); + Configurations conf; + conf.setToDefault(); + conf.parseFromText(constants_->DEFAULT_LOGGER_CONFIGURATION); + registerNew(new Logger("trivial", constants_, conf)); + registerNew(new Logger("business", constants_)); + registerNew(new Logger("security", constants_)); + Configurations confPerformance; + confPerformance.setToDefault(); + confPerformance.setAll(ConfigurationType::PerformanceTracking, "true"); + registerNew(new Logger("performance", constants_, confPerformance)); + } + + virtual ~RegisteredLoggers(void) { + internal::utilities::safeDelete(constants_); + internal::utilities::safeDelete(counters_); + } + + inline internal::Constants* constants(void) const { + return constants_; + } + + inline RegisteredCounters* counters(void) { + return counters_; + } + + inline bool validateCounter(const char* file_, unsigned long int line_, std::size_t n_) { + return counters_->validate(file_, line_, n_, constants_); + } +private: + internal::Constants* constants_; + std::string username_; + std::string hostname_; + internal::threading::Mutex mutex_; + internal::RegisteredCounters* counters_; + Configurations defaultConfigurations_; + + friend class Writer; + friend class easyloggingpp::Loggers; + + inline const std::string& username(void) { + return username_; + } + + inline const std::string& hostname(void) { + return hostname_; + } + + inline void setDefaultConfigurations(const Configurations& configurations) { + defaultConfigurations_.setFromBase(const_cast(&configurations)); + } + + Logger* get(const std::string& id_, bool forceCreation_ = true) { +#if _ELPP_ENABLE_MUTEX + internal::threading::ScopedLock slock_(mutex_); + __EASYLOGGINGPP_SUPPRESS_UNSED(slock_); +#endif // _ELPP_ENABLE_MUTEX + Logger* logger_ = internal::Registry::get(id_); + if (logger_ == NULL && forceCreation_) { + logger_ = new Logger(id_, constants_, defaultConfigurations_); + registerNew(logger_); + } + return logger_; + } + + inline void unregister(Logger*& logger_) { +#if _ELPP_ENABLE_MUTEX + internal::threading::ScopedLock slock_(mutex_); +#endif // _ELPP_ENABLE_MUTEX + internal::Registry::unregister(logger_); + } + + inline void acquireLock(void) { + mutex_.lock(); + } + + inline void releaseLock(void) { + mutex_.unlock(); + } + + void setApplicationArguments(int argc, char** argv) { + while (argc-- > 0) { + // Look for --v=X argument + if ((strlen(argv[argc]) >= 5) && (argv[argc][0] == '-') && (argv[argc][1] == '-') && + (argv[argc][2] == 'v') && (argv[argc][3] == '=') && (isdigit(argv[argc][4]))) { + // Current argument is --v=X + // where X is a digit between 0-9 + constants_->CURRENT_VERBOSE_LEVEL = atoi(argv[argc] + 4); + } + // Look for -v argument + else if ((strlen(argv[argc]) == 2) && (argv[argc][0] == '-') && (argv[argc][1] == 'v')) { + constants_->CURRENT_VERBOSE_LEVEL = constants_->MAX_VERBOSE_LEVEL; + } + // Look for --verbose argument + else if ((strlen(argv[argc]) == 9) && (argv[argc][0] == '-') && (argv[argc][1] == '-') && + (argv[argc][2] == 'v') && (argv[argc][3] == 'e') && (argv[argc][4] == 'r') && + (argv[argc][5] == 'b') && (argv[argc][6] == 'o') && (argv[argc][7] == 's') && + (argv[argc][8] == 'e')) { + constants_->CURRENT_VERBOSE_LEVEL = constants_->MAX_VERBOSE_LEVEL; + } + } + } + + inline void setApplicationArguments(int argc, const char** argv) { + setApplicationArguments(argc, const_cast(argv)); + } +}; + +extern internal::ScopedPointer registeredLoggers; +#if defined(_ELPP_STL_LOGGING) +namespace workarounds { +// There is workaround needed to loop through some stl containers. In order to do that, we need iterable containers +// of same type and provide iterator interface and pass it on to writeIterator(). +// Remember, this is passed by value in constructor so that we dont change original containers. +// This operation is as expensive as O(class_.size()) or O(constants->MAX_LOG_PER_COUNTER) which ever is smaller. + +// +// Abstract IterableContainer template that provides interface for iterable classes of type T +// +template +class IterableContainer { +public: + typedef typename Container::iterator iterator; + typedef typename Container::const_iterator const_iterator; + IterableContainer(void){} + virtual ~IterableContainer(void) {} + iterator begin(void) { return getContainer().begin(); } + iterator end(void) { return getContainer().end(); } + const_iterator begin(void) const { return getContainer().begin(); } + const_iterator end(void) const { return getContainer().end(); } +private: + virtual Container& getContainer(void) = 0; +}; + +// +// Implements IterableContainer and provides iterable std::priority_queue class +// +template, typename Comparator = std::less > +class IterablePriorityQueue : public IterableContainer, public std::priority_queue { +public: + IterablePriorityQueue(std::priority_queue queue_) { + std::size_t count_ = 0; + while (++count_ < registeredLoggers->constants()->MAX_LOG_PER_CONTAINER && !queue_.empty()) { + this->push(queue_.top()); + queue_.pop(); + } + } +private: + inline Container& getContainer(void) { + return this->c; + } +}; + +// +// Implements IterableContainer and provides iterable std::queue class +// +template > +class IterableQueue : public IterableContainer, public std::queue { +public: + IterableQueue(std::queue queue_) { + std::size_t count_ = 0; + while (++count_ < registeredLoggers->constants()->MAX_LOG_PER_CONTAINER && !queue_.empty()) { + this->push(queue_.front()); + queue_.pop(); + } + } +private: + inline Container& getContainer(void) { + return this->c; + } +}; + +// +// Implements IterableContainer and provides iterable std::stack class +// +template > +class IterableStack : public IterableContainer, public std::stack { +public: + IterableStack(std::stack stack_) { + std::size_t count_ = 0; + while (++count_ < registeredLoggers->constants()->MAX_LOG_PER_CONTAINER && !stack_.empty()) { + this->push(stack_.top()); + stack_.pop(); + } + } +private: + inline Container& getContainer(void) { + return this->c; + } +}; +} // namespace workarounds +#endif //defined(_ELPP_STL_LOGGING) + +#define _ELPP_STREAM(l) (*(l->stream())) + +class NullWriter : private internal::NoCopy { +public: + NullWriter(void) {} + + template + inline NullWriter& operator<<(const T&) { + return *this; + } +}; + +class Writer : private internal::NoCopy { +public: + Writer(const std::string& loggerId_, + unsigned int aspect_, + unsigned int severity_, + const char* func_, + const char* file_, + const unsigned long int line_, + bool condition_ = true, + int verboseLevel_ = 0, + int counter_ = 0) : + aspect_(aspect_), + severity_(severity_), + func_(func_), + file_(file_), + line_(line_), + condition_(condition_), + verboseLevel_(verboseLevel_), + counter_(counter_), + proceed_(true) { + constants_ = registeredLoggers->constants(); + logger_ = registeredLoggers->get(loggerId_, false); + if (logger_ == NULL) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Logger [" << loggerId_ << "] not registered or configured yet!"); + proceed_ = false; + } +#if _ELPP_ENABLE_MUTEX + registeredLoggers->acquireLock(); + mutex_.lock(); +#endif // _ELPP_ENABLE_MUTEX + + if (proceed_) { + proceed_ = logger_->typedConfigurations_->enabled(severity_); + } + if (proceed_) { +#if (defined(_ELPP_STRICT_ROLLOUT)) + checkRollOuts(severity_, logger_); +#endif // (defined(_ELPP_STRICT_ROLLOUT)) + } + if (proceed_ && (severity_ == Level::Verbose)) { + proceed_ = (verboseLevel_ <= constants_->CURRENT_VERBOSE_LEVEL); + } + if (proceed_ && (aspect_ == Aspect::Conditional)) { + proceed_ = condition_; + } + } + + virtual ~Writer(void) { + if (proceed_) { + buildAndWriteLine(); + } +#if _ELPP_ENABLE_MUTEX + registeredLoggers->releaseLock(); + mutex_.unlock(); +#endif // _ELPP_ENABLE_MUTEX + } + + inline Writer& operator<<(const std::string& log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(char log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(bool log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(signed short log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(unsigned short log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(signed int log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(unsigned int log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(signed long log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(unsigned long log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(float log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(double log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(char* log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(const char* log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(const void* log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(long double log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_; + return *this; + } + inline Writer& operator<<(const std::wstring& log_) { + if (!proceed_) { return *this; } + return operator<<(log_.c_str()); + } + inline Writer& operator<<(const wchar_t* log_) { + if (!proceed_) { return *this; } + if (log_ == NULL) { + _ELPP_STREAM(logger_) << constants_->NULL_POINTER; + return *this; + } + std::size_t len_ = wcslen(log_) + 1; + char* buff_ = (char*)malloc(len_ + 1); +# if _ELPP_OS_UNIX || (_ELPP_OS_WINDOWS && !_ELPP_CRT_DBG_WARNINGS) + std::wcstombs(buff_, log_, len_); +# elif _ELPP_OS_WINDOWS + std::size_t convCount_ = 0; + mbstate_t mbState_; + ::memset((void*)&mbState_, 0, sizeof(mbState_)); + wcsrtombs_s(&convCount_, buff_, len_, &log_, len_, &mbState_); +# endif // _ELPP_OS_UNIX + _ELPP_STREAM(logger_) << buff_; + free(buff_); + return *this; + } +#if defined(_ELPP_STL_LOGGING) + template + inline Writer& operator<<(const std::vector& vec_) { + if (!proceed_) { return *this; } + return writeIterator(vec_.begin(), vec_.end(), vec_.size()); + } + template + inline Writer& operator<<(const std::list& list_) { + if (!proceed_) { return *this; } + return writeIterator(list_.begin(), list_.end(), list_.size()); + } + template + inline Writer& operator<<(const std::deque& deque_) { + if (!proceed_) { return *this; } + return writeIterator(deque_.begin(), deque_.end(), deque_.size()); + } + template + inline Writer& operator<<(const std::queue& queue_) { + if (!proceed_) { return *this; } + internal::workarounds::IterableQueue iterableQueue_ = + static_cast >(queue_); + return writeIterator(iterableQueue_.begin(), iterableQueue_.end(), iterableQueue_.size()); + } + template + inline Writer& operator<<(const std::stack& stack_) { + if (!proceed_) { return *this; } + internal::workarounds::IterableStack iterableStack_ = + static_cast >(stack_); + return writeIterator(iterableStack_.begin(), iterableStack_.end(), iterableStack_.size()); + } + template + inline Writer& operator<<(const std::priority_queue& priorityQueue_) { + if (!proceed_) { return *this; } + internal::workarounds::IterablePriorityQueue iterablePriorityQueue_ = + static_cast >(priorityQueue_); + return writeIterator(iterablePriorityQueue_.begin(), iterablePriorityQueue_.end(), iterablePriorityQueue_.size()); + } + template + inline Writer& operator<<(const std::set& set_) { + if (!proceed_) { return *this; } + return writeIterator(set_.begin(), set_.end(), set_.size()); + } + template + inline Writer& operator<<(const std::multiset& set_) { + if (!proceed_) { return *this; } + return writeIterator(set_.begin(), set_.end(), set_.size()); + } + template + inline Writer& operator<<(const std::pair& pair_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << "("; + operator << (static_cast(pair_.first)); + _ELPP_STREAM(logger_) << ", "; + operator << (static_cast(pair_.second)); + _ELPP_STREAM(logger_) << ")"; + return *this; + } + template + inline Writer& operator<<(const std::bitset& bitset_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << "["; + _ELPP_STREAM(logger_) << bitset_.to_string(); + _ELPP_STREAM(logger_) << "]"; + return *this; + } + template + inline Writer& operator<<(const std::map& map_) { + if (!proceed_) { return *this; } + return writeIterator(map_.begin(), map_.end(), map_.size()); + } + template + inline Writer& operator<<(const std::multimap& map_) { + if (!proceed_) { return *this; } + return writeIterator(map_.begin(), map_.end(), map_.size()); + } +#endif // defined(_ELPP_STL_LOGGING) +#if defined(QT_CORE_LIB) && defined(_ELPP_QT_LOGGING) + inline Writer& operator<<(const QString& log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_.toStdString(); + return *this; + } + inline Writer& operator<<(const QStringRef& log_) { + if (!proceed_) { return *this; } + return operator<<(log_.toString()); + } + inline Writer& operator<<(qint64 log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << QString::number(log_).toStdString(); + return *this; + } + inline Writer& operator<<(quint64 log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << QString::number(log_).toStdString(); + return *this; + } + inline Writer& operator<<(QChar log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_.toLatin1(); + return *this; + } +# if (!_ELPP_QT_5) + inline Writer& operator<<(QBool log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << (bool(log_ != 0) ? "true" : "false"); + return *this; + } +# endif // (!_ELPP_QT_5) + inline Writer& operator<<(const QLatin1String& log_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << log_.latin1(); + return *this; + } + template + inline Writer& operator<<(const QList& list_) { + if (!proceed_) { return *this; } + return writeIterator(list_.begin(), list_.end(), list_.size()); + } + template + inline Writer& operator<<(const QVector& vec_) { + if (!proceed_) { return *this; } + return writeIterator(vec_.begin(), vec_.end(), vec_.size()); + } + template + inline Writer& operator<<(const QQueue& queue_) { + if (!proceed_) { return *this; } + return writeIterator(queue_.begin(), queue_.end(), queue_.size()); + } + template + inline Writer& operator<<(const QSet& set_) { + if (!proceed_) { return *this; } + return writeIterator(set_.begin(), set_.end(), set_.size()); + } + template + inline Writer& operator<<(const QPair& pair_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << "("; + operator << (static_cast(pair_.first)); + _ELPP_STREAM(logger_) << ", "; + operator << (static_cast(pair_.second)); + _ELPP_STREAM(logger_) << ")"; + return *this; + } + template + inline Writer& operator<<(const QMap& map_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << "["; + QList keys = map_.keys(); + typename QList::const_iterator begin = keys.begin(); + typename QList::const_iterator end = keys.end(); + int max_ = static_cast(constants_->MAX_LOG_PER_CONTAINER); // to prevent warning + for (int index_ = 0; begin != end && index_ < max_; ++index_, ++begin) { + _ELPP_STREAM(logger_) << "("; + operator << (static_cast(*begin)); + _ELPP_STREAM(logger_) << ", "; + operator << (static_cast(map_.value(*begin))); + _ELPP_STREAM(logger_) << ")"; + _ELPP_STREAM(logger_) << ((index_ < keys.size() -1) ? ", " : ""); + } + if (begin != end) { + _ELPP_STREAM(logger_) << " ..."; + } + _ELPP_STREAM(logger_) << "]"; + return *this; + } + template + inline Writer& operator<<(const QMultiMap& map_) { + if (!proceed_) { return *this; } + operator << (static_cast >(map_)); + return *this; + } + template + inline Writer& operator<<(const QHash& hash_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << "["; + QList keys = hash_.keys(); + typename QList::const_iterator begin = keys.begin(); + typename QList::const_iterator end = keys.end(); + int max_ = static_cast(constants_->MAX_LOG_PER_CONTAINER); // prevent type warning + for (int index_ = 0; begin != end && index_ < max_; ++index_, ++begin) { + _ELPP_STREAM(logger_) << "("; + operator << (static_cast(*begin)); + _ELPP_STREAM(logger_) << ", "; + operator << (static_cast(hash_.value(*begin))); + _ELPP_STREAM(logger_) << ")"; + _ELPP_STREAM(logger_) << ((index_ < keys.size() -1) ? ", " : ""); + } + if (begin != end) { + _ELPP_STREAM(logger_) << " ..."; + } + _ELPP_STREAM(logger_) << "]"; + return *this; + } + template + inline Writer& operator<<(const QMultiHash& multiHash_) { + if (!proceed_) { return *this; } + operator << (static_cast >(multiHash_)); + return *this; + } + template + inline Writer& operator<<(const QLinkedList& linkedList_) { + if (!proceed_) { return *this; } + return writeIterator(linkedList_.begin(), linkedList_.end(), linkedList_.size()); + } + template + inline Writer& operator<<(const QStack& stack_) { + if (!proceed_) { return *this; } + return writeIterator(stack_.begin(), stack_.end(), stack_.size()); + } +#endif // defined(QT_CORE_LIB) && defined(_ELPP_QT_LOGGING) + template + inline Writer& operator<<(const Class& class_) { + if (!proceed_) { return *this; } + _ELPP_STREAM(logger_) << class_; + return *this; + } +private: + unsigned int aspect_; + unsigned int severity_; + const char* func_; + const char* file_; + const unsigned long int line_; + bool condition_; + int verboseLevel_; + int counter_; + Logger* logger_; + std::stringstream tempss_; + std::string currLine_; + bool proceed_; + internal::Constants* constants_; + internal::threading::Mutex mutex_; + + friend class Logger; + + template + inline Writer& writeIterator(Iterator begin_, Iterator end_, std::size_t size_) { + _ELPP_STREAM(logger_) << "["; + for (std::size_t i = 0; begin_ != end_ && i < constants_->MAX_LOG_PER_CONTAINER; ++i, ++begin_) { + operator << (*begin_); + _ELPP_STREAM(logger_) << ((i < size_ - 1) ? ", " : ""); + } + if (begin_ != end_) { + _ELPP_STREAM(logger_) << " ..."; + } + _ELPP_STREAM(logger_) << "]"; + return *this; + } + + void buildAndWriteLine(void) { + internal::RegisteredLoggers* rl_ = registeredLoggers.pointer(); + TypedConfigurations* conf_ = logger_->typedConfigurations_; + unsigned int f_ = conf_->formatFlag(severity_); // format spec + currLine_ = conf_->logFormat(severity_); + std::string dateFormat = conf_->dateFormat(severity_); + std::string fs_; // format specifier + std::string v_; // value + // App name + if (f_ & constants_->kAppName) { + v_ = logger_->applicationName(); + fs_ = constants_->APP_NAME_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Logger ID + if (f_ & constants_->kLoggerId) { + v_ = logger_->id(); + fs_ = constants_->LOGGER_ID_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Thread ID + if (f_ & constants_->kThreadId) { + std::stringstream ss; + ss << threading::getCurrentThreadId(); + fs_ = constants_->THREAD_ID_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, ss.str(), currLine_, constants_); + } + // Date/Time + if ((f_ & constants_->kDateOnly) || (f_ & constants_->kTimeOnly) || (f_ & constants_->kDateTime)) { + v_ = internal::utilities::DateUtils::getDateTime(dateFormat, + f_, constants_, conf_->millisecondsWidth(Level::All)); + fs_ = conf_->dateFormatSpecifier(severity_); + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Function + if (f_ & constants_->kFunction) { + v_ = std::string(func_); + fs_ = constants_->FUNCTION_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Location + if (f_ & constants_->kLocation) { + tempss_ << file_ << ":" << line_; + fs_ = constants_->LOCATION_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, tempss_.str(), currLine_, constants_); + tempss_.str(""); + } + // User + if (f_ & constants_->kUser) { + v_ = rl_->username(); + fs_ = constants_->USER_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Host + if (f_ & constants_->kHost) { + v_ = rl_->hostname(); + fs_ = constants_->HOST_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, v_, currLine_, constants_); + } + // Verbose level + if ((severity_ == Level::Verbose) && (f_ & constants_->kVerboseLevel)) { + tempss_ << verboseLevel_; + fs_ = constants_->VERBOSE_LEVEL_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, tempss_.str(), currLine_, constants_); + } + // Log message + if (f_ & constants_->kLogMessage) { + fs_ = constants_->LOG_MESSAGE_FORMAT_SPECIFIER; + internal::utilities::LogManipulator::updateFormatValue(fs_, logger_->stream()->str(), currLine_, constants_); + } + log(); + } + +#if (defined(_ELPP_STRICT_ROLLOUT)) + bool checkRollOuts(unsigned int level_, Logger* baseLogger_) { + unsigned int validLevel_ = 0; + std::string rolledOutFile = std::string(); + if (baseLogger_->typedConfigurations_->checkRollOuts(level_, validLevel_, rolledOutFile)) { + Logger* currLogger_ = NULL; + for (unsigned int i = 0; i < registeredLoggers->count(); ++i) { + currLogger_ = registeredLoggers->list().at(i); + if (currLogger_ == baseLogger_) + continue; + std::string fname = currLogger_->typedConfigurations_->filename(validLevel_); + if (fname == rolledOutFile) { + currLogger_->typedConfigurations_->forceReinitiateFile(validLevel_, fname); + } + } + return true; + } + return false; + } +#endif // (defined(_ELPP_STRICT_ROLLOUT)) + + inline void syncWritePointer(unsigned int level_, Logger* targetLogger_, std::fstream* baseStream_) { + targetLogger_->acquireLock(); + targetLogger_->typedConfigurations_->fileStream(level_)->seekg(baseStream_->tellg()); + targetLogger_->releaseLock(); + } + + void safeWriteToFile(unsigned int level_, Logger* logger_, const std::string& line) { + std::string baseFilename_ = logger_->typedConfigurations_->filename(level_); + std::fstream* fstr = logger_->typedConfigurations_->fileStream(level_); + (*fstr) << line; + fstr->flush(); + Logger* currLogger_ = NULL; + for (std::size_t i = 0; i < registeredLoggers->count(); ++i) { + currLogger_ = registeredLoggers->list().at(i); + if (currLogger_ == logger_) + continue; + std::string fname = currLogger_->typedConfigurations_->filename(level_); + if (fname == baseFilename_) { + syncWritePointer(level_, currLogger_, fstr); + } + } + } + + void log(void) { + if (logger_->stream_) { + if (logger_->typedConfigurations_->toFile(severity_)) { + safeWriteToFile(severity_, logger_, currLine_); + } + if (logger_->typedConfigurations_->toStandardOutput(severity_)) { + std::cout << currLine_; + } + logger_->stream_->str(""); + } + } +}; +} // namespace internal + +class VersionInfo : private internal::StaticClass { +public: + // Minimal formatted displayable information + static inline const std::string formattedInfo(void) { + std::stringstream ss; + ss << "EasyLogging++ v" << version() << " (" << releaseDate() << ")"; + ss << std::endl; + ss << website(); + ss << std::endl; + ss << copyright(); + return ss.str(); + } + + // Current version number + static inline const std::string version(void) { return std::string("8.91"); } + + // Release date of current version + static inline const std::string releaseDate(void) { return std::string("12-07-2013 1243hrs"); } + + // Original author and maintainer + static inline const std::string author(void) { return std::string("Majid Khan "); } + + // Web link + static inline const std::string website(void) { return std::string("http://icplusplus.com/tools/easylogging"); } + + // Link to source code + static inline const std::string sourceCodeLink(void) { return std::string("https://github.com/mkhan3189/EasyLoggingPP"); } + + // Copyright information + static inline const std::string copyright(void) { return std::string("Copyright (c) 2012 - 2013 Majid Khan"); } + + // Full licence + static const std::string licence(void) { + std::stringstream ss; + ss << " This software is provided 'as-is', without any express or implied" << std::endl; + ss << " warranty. In no event will the authors be held liable for any damages" << std::endl; + ss << " arising from the use of this software." << std::endl; + ss << std::endl; + ss << " Permission is granted to anyone to use this software for any purpose," << std::endl; + ss << " including commercial applications, and to alter it and redistribute" << std::endl; + ss << " it freely, subject to the following restrictions:" << std::endl; + ss << std::endl; + ss << " 1. The origin of this software must not be misrepresented; you must" << std::endl; + ss << " not claim that you wrote the original software. If you use this" << std::endl; + ss << " software in a product, an acknowledgment in the product documentation" << std::endl; + ss << " would be appreciated but is not required." << std::endl; + ss << std::endl; + ss << " 2. Altered source versions must be plainly marked as such, and must" << std::endl; + ss << " not be misrepresented as being the original software." << std::endl; + ss << std::endl; + ss << " 3. This notice may not be removed or altered from any source" << std::endl; + ss << " distribution"; + return ss.str(); + } +}; // class VersionInfo + +//! +//! \brief Helper class to manage loggers and configurations +//! +//! A static helper class for users of library. This class contains functions related to register +//! and configure logger/s +//! +class Loggers : private internal::StaticClass { +public: + + //! + //! Get existing logger, if logger does not exist a newly created logger is returned + //! \param identifier_ A unique ID for logger + //! \return Pointer to easyloggingpp::Logger from logger repository + //! + static inline Logger* getLogger(const std::string& identifier_) { + return internal::registeredLoggers->get(identifier_); + } + + //! + //! Reconfigures logger with easyloggingpp::Configurations + //! \param logger_ Pointer to Logger to configure. You get use getLogger() to get pointer from logger repository + //! \param configurations_ easyloggingpp::Configurations to configure logger against + //! \return Updated pointer to Logger + //! + static inline Logger* reconfigureLogger(Logger* logger_, const Configurations& configurations_) { + if (!logger_) return NULL; + logger_->configure(configurations_); + return logger_; + } + + //! + //! Reconfigures logger with easyloggingpp::Configurations + //! \param identifier_ Logger ID + //! \param configurations_ easyloggingpp::Configurations to configure logger against + //! \return Updated pointer to Logger + //! + static inline Logger* reconfigureLogger(const std::string& identifier_, Configurations& configurations_) { + Logger* logger_ = Loggers::getLogger(identifier_); + Loggers::reconfigureLogger(logger_, configurations_); + return logger_; + } + + //! + //! Reconfigures all loggers available in logger repository + //! \param configurations_ easyloggingpp::Configurations to configure logger against + //! + static inline void reconfigureAllLoggers(Configurations& configurations_) { + for (std::size_t i = 0; i < internal::registeredLoggers->count(); ++i) { + Logger* l = internal::registeredLoggers->at(i); + Loggers::reconfigureLogger(l, configurations_); + } + } + + //! + //! Reconfigures all loggers for single configuration. + //! \param configurationType_ Configuration type to update. Use easyloggingpp::ConfigurationType to prevent confusion + //! \param value_ Value to set. Values have to be std::string; For boolean values use "true", "false", for any integral values + //! use them in quotes. They will be parsed when configuring + //! + static inline void reconfigureAllLoggers(unsigned int configurationType_, const std::string& value_) { + for (std::size_t i = 0; i < internal::registeredLoggers->count(); ++i) { + Logger* l = internal::registeredLoggers->at(i); + l->configurations().setAll(configurationType_, value_); + l->reconfigure(); + } + } + + //! + //! Sets default configurations. This configuration is used for future loggers. + //! \param configurations + //! \param configureExistingLoggers If true, all loggers are updated against provided configuration otherwise only future loggers + //! will be updated and all the existing loggers will use configurations that have been set previously. + //! + static inline void setDefaultConfigurations(Configurations& configurations, bool configureExistingLoggers = false) { + internal::registeredLoggers->setDefaultConfigurations(configurations); + if (configureExistingLoggers) { + Loggers::reconfigureAllLoggers(configurations); + } + } + + //! + //! Sets application arguments and uses them where needed. Example use is when application is run with '--v=X' or '-v', verbose logging + //! turns on + //! \param argc Argument count + //! \param argv Argument value array pointer + //! + static inline void setApplicationArguments(int argc, char** argv) { + internal::registeredLoggers->setApplicationArguments(argc, argv); + } + + //! + //! Sets application arguments and uses them where needed. Example use is when application is run with '--v=X' or '-v', verbose logging + //! turns on + //! \param argc + //! \param argv + //! + static inline void setApplicationArguments(int argc, const char** argv) { + internal::registeredLoggers->setApplicationArguments(argc, argv); + } + + //! + //! Disables all loggers + //! + static inline void disableAll(void) { + reconfigureAllLoggers(ConfigurationType::Enabled, "false"); + } + + //! + //! Enable all loggers + //! + static inline void enableAll(void) { + reconfigureAllLoggers(ConfigurationType::Enabled, "true"); + } + + //! + //! Reconfigure all loggers to write to single log file + //! \param logFilename_ Full path to log file + //! + static inline void setFilename(const std::string& logFilename_) { + reconfigureAllLoggers(ConfigurationType::Filename, logFilename_); + } + + //! + //! Reconfigure specified logger to write to specified log file + //! \param logger_ Pointer to logger. You may use Loggers::get(id) to get pointer + //! \param logFilename_ Full path to log file + //! + static inline void setFilename(Logger* logger_, const std::string& logFilename_) { + if (!logger_) return; + logger_->configurations().setAll(ConfigurationType::Filename, logFilename_); + logger_->reconfigure(); + } + + //! + //! Determines whether or not performance tracking is enabled + //! \return True if enabled, false otherwise + //! + static inline bool performanceTrackingEnabled(void) { + return performanceLogger()->typedConfigurations_->performanceTracking(); + } + + //! + //! Disables performance tracking. + //! Performance tracking is logged using 'performance' logger. + //! + static inline void disablePerformanceTracking(void) { + Logger* l = Loggers::performanceLogger(); + l->configurations().setAll(ConfigurationType::PerformanceTracking, "false"); + l->reconfigure(); + } + + //! + //! Enable performance tracking + //! Performance tracking is logged using 'performance' logger. + //! + static inline void enablePerformanceTracking(void) { + Logger* l = Loggers::performanceLogger(); + l->configurations().setAll(ConfigurationType::PerformanceTracking, "true"); + l->reconfigure(); + } + + //! + //! Iterates through logger repository and puts IDs into listOfIds + //! \param listOfIds (Passed by reference) Vector to fill up + //! + static inline void getAllLogIdentifiers(std::vector& listOfIds) { + listOfIds.clear(); + for (std::size_t i = 0; i < internal::registeredLoggers->count(); ++i) { + listOfIds.push_back(internal::registeredLoggers->at(i)->id()); + } + } + + //! + //! \return Returns one of default loggers 'trivial' logger + //! + static inline Logger* trivialLogger(void) { + return Loggers::getLogger("trivial"); + } + + //! + //! \return Returns one of default loggers 'business' logger + //! + static inline Logger* businessLogger(void) { + return Loggers::getLogger("business"); + } + + //! + //! \return Returns one of default loggers 'security' logger + //! + static inline Logger* securityLogger(void) { + return Loggers::getLogger("security"); + } + + //! + //! \return Returns one of default loggers 'performance' logger + //! + static inline Logger* performanceLogger(void) { + return Loggers::getLogger("performance"); + } + + //! + //! Static class that contains static helper functions used to read configurations + //! + class ConfigurationsReader : private internal::StaticClass { + public: + static inline bool enabled(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->enabled(level_); + } + + static inline bool enabled(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->enabled(level_); + } + + static inline bool toFile(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->toFile(level_); + } + + static inline bool toFile(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->toFile(level_); + } + + static inline const std::string& filename(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->filename(level_); + } + + static inline const std::string& filename(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->filename(level_); + } + + static inline bool toStandardOutput(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->toStandardOutput(level_); + } + + static inline bool toStandardOutput(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->toStandardOutput(level_); + } + + static inline const std::string& logFormat(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->logFormat(level_); + } + + static inline const std::string& logFormat(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->logFormat(level_); + } + + static inline int millisecondsWidth(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->millisecondsWidth(level_); + } + + static inline int millisecondsWidth(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->millisecondsWidth(level_); + } + + static inline bool performanceTracking(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->performanceTracking(level_); + } + + static inline bool performanceTracking(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->performanceTracking(level_); + } + + static inline std::size_t logRollOutSize(Logger* logger_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(logger_ != NULL, "Invalid Logger provided - nullptr"); + return constConf(logger_)->rollOutSize(level_); + } + + static inline std::size_t logRollOutSize(internal::TypedConfigurations* conf_, unsigned int level_ = Level::All) { + __EASYLOGGINGPP_ASSERT(conf_ != NULL, "Invalid TypedConfigurations provided - nullptr"); + return conf_->rollOutSize(level_); + } + + private: + static inline internal::TypedConfigurations* constConf(Logger* logger_) { + return logger_->typedConfigurations_; + } + }; // class ConfigurationsReader +private: + internal::threading::Mutex mutex_; +}; +// +// Helping Macros +// +// Performance tracking macros +#if ((!defined(_DISABLE_PERFORMANCE_TRACKING)) || (!defined(_DISABLE_INFO_LOGS))) +# if _ELPP_OS_UNIX +# define _ELPP_GET_CURR_TIME(tm) gettimeofday(tm, NULL); +# elif _ELPP_OS_WINDOWS +# define _ELPP_GET_CURR_TIME(tm) easyloggingpp::internal::utilities::DateUtils::gettimeofday(tm); +# endif +# define START_FUNCTION_LOG "Executing [" << __func__ << "]" +# define TIME_OUTPUT "Executed [" << __func__ << "] in [" << \ + easyloggingpp::internal::utilities::DateUtils::formatMilliSeconds( \ + easyloggingpp::internal::utilities::DateUtils::getTimeDifference(functionEndTime, functionStartTime)) << "]" +# define FUNC_SUB_COMMON_START { timeval functionStartTime, functionEndTime; _ELPP_GET_CURR_TIME(&functionStartTime) +# define WRITE_FUNC_PERFORMANCE _ELPP_GET_CURR_TIME(&functionEndTime); \ + if (easyloggingpp::Loggers::performanceTrackingEnabled()) { PINFO << TIME_OUTPUT; } +# define FUNC_SUB_COMMON_END WRITE_FUNC_PERFORMANCE; +# define SUB(FUNCTION_NAME,PARAMS) void FUNCTION_NAME PARAMS FUNC_SUB_COMMON_START +# define END_SUB FUNC_SUB_COMMON_END } +# define FUNC(RETURNING_TYPE,FUNCTION_NAME,PARAMS) RETURNING_TYPE FUNCTION_NAME PARAMS FUNC_SUB_COMMON_START +# define RETURN(return_value) FUNC_SUB_COMMON_END return return_value; +# define END_FUNC(return_value) RETURN(return_value) } +# define MAIN(argc, argv) FUNC(int, main, (argc, argv)) +# define END_MAIN(return_value) FUNC_SUB_COMMON_END; return return_value; } +# define RETURN_MAIN(exit_status) return exit_status; +#else +# define SUB(FUNCTION_NAME,PARAMS) void FUNCTION_NAME PARAMS { +# define END_SUB } +# define FUNC(RETURNING_TYPE,FUNCTION_NAME,PARAMS) RETURNING_TYPE FUNCTION_NAME PARAMS { +# define END_FUNC(x) return x; } +# define RETURN(expr) return expr; +# define MAIN(argc, argv) FUNC(int, main, (argc, argv)) +# define END_MAIN(x) return x; } +# define RETURN_MAIN(exit_status) return exit_status; +#endif // ((!defined(_DISABLE_PERFORMANCE_TRACKING)) || (!defined(_DISABLE_INFO_LOGS))) + +#define _ELPP_LOG_WRITER(_logger, _level) easyloggingpp::internal::Writer(\ + _logger, easyloggingpp::internal::Aspect::Normal, _level, __func__, __FILE__, __LINE__) +#define _ELPP_LOG_WRITER_COND(_c, _logger, _level) if (_c) easyloggingpp::internal::Writer(\ + _logger, easyloggingpp::internal::Aspect::Conditional, _level, __func__, __FILE__, __LINE__, _c) +#define _ELPP_LOG_WRITER_N(_n, _logger, _level) if (easyloggingpp::internal::registeredLoggers->validateCounter(\ + __FILE__, __LINE__, _n)) easyloggingpp::internal::Writer(_logger, easyloggingpp::internal::Aspect::Interval,\ + _level, __func__, __FILE__, __LINE__, true, 0, _n) +#undef VLOG_IS_ON +#define VLOG_IS_ON(verboseLevel) verboseLevel <= easyloggingpp::internal::registeredLoggers->constants()->CURRENT_VERBOSE_LEVEL +// Undef levels to support LOG(LEVEL) +#undef INFO +#undef DEBUG +#undef ERROR +#undef FATAL +#undef QA +#undef TRACE +#undef VERBOSE +// +// Custom loggers - macro names with levels - requires loggerId +// +// Undef existing +#undef CINFO +#undef CWARNING +#undef CDEBUG +#undef CERROR +#undef CFATAL +#undef ERROR +#undef CQA +#undef CTRACE +#undef CVERBOSE +#undef CINFO_IF +#undef CWARNING_IF +#undef CDEBUG_IF +#undef CERROR_IF +#undef CFATAL_IF +#undef ERROR_IF +#undef CQA_IF +#undef CTRACE_IF +#undef CVERBOSE_IF +#undef CINFO_EVERY_N +#undef CWARNING_EVERY_N +#undef CDEBUG_EVERY_N +#undef CERROR_EVERY_N +#undef CFATAL_EVERY_N +#undef ERROR_EVERY_N +#undef CQA_EVERY_N +#undef CTRACE_EVERY_N +#undef CVERBOSE_EVERY_N +// Normal logs +#if _ELPP_INFO_LOG +# define CINFO(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Info) +#else +# define CINFO(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_INFO_LOG +#if _ELPP_WARNING_LOG +# define CWARNING(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Warning) +#else +# define CWARNING(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_WARNING_LOG +#if _ELPP_DEBUG_LOG +# define CDEBUG(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Debug) +#else +# define CDEBUG(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_DEBUG_LOG +#if _ELPP_ERROR_LOG +# define CERROR(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Error) +#else +# define CERROR(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_ERROR_LOG +#if _ELPP_FATAL_LOG +# define CFATAL(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Fatal) +#else +# define CFATAL(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_FATAL_LOG +#if _ELPP_QA_LOG +# define CQA(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::QA) +#else +# define CQA(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_QA_LOG +#if _ELPP_TRACE_LOG +# define CTRACE(loggerId) _ELPP_LOG_WRITER(loggerId, easyloggingpp::Level::Trace) +#else +# define CTRACE(loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_TRACE_LOG +#if _ELPP_VERBOSE_LOG +# define CVERBOSE(vlevel_, loggerId) easyloggingpp::internal::Writer(loggerId, easyloggingpp::internal::Aspect::Normal, \ + easyloggingpp::Level::Verbose, __func__, __FILE__, __LINE__, true, vlevel_) +#else +# define CVERBOSE(vlevel_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_VERBOSE_LOG +// Conditional logs +#if _ELPP_INFO_LOG +# define CINFO_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Info) +#else +# define CINFO_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_INFO_LOG +#if _ELPP_WARNING_LOG +# define CWARNING_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Warning) +#else +# define CWARNING_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_WARNING_LOG +#if _ELPP_DEBUG_LOG +# define CDEBUG_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Debug) +#else +# define CDEBUG_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_DEBUG_LOG +#if _ELPP_ERROR_LOG +# define CERROR_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Error) +#else +# define CERROR_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_ERROR_LOG +#if _ELPP_FATAL_LOG +# define CFATAL_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Fatal) +#else +# define CFATAL_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_FATAL_LOG +#if _ELPP_QA_LOG +# define CQA_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::QA) +#else +# define CQA_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_QA_LOG +#if _ELPP_TRACE_LOG +# define CTRACE_IF(condition_, loggerId) _ELPP_LOG_WRITER_COND(condition_, loggerId, easyloggingpp::Level::Trace) +#else +# define CTRACE_IF(condition_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_TRACE_LOG +#if _ELPP_VERBOSE_LOG +# define CVERBOSE_IF(condition_, vlevel_, loggerId) if (condition_) easyloggingpp::internal::Writer(loggerId, easyloggingpp::internal::Aspect::Conditional, \ + easyloggingpp::Level::Verbose, __func__, __FILE__, __LINE__, condition_, vlevel_) +#else +# define CVERBOSE_IF(condition_, vlevel_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_VERBOSE_LOG +// Interval logs +#if _ELPP_INFO_LOG +# define CINFO_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Info) +#else +# define CINFO_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_INFO_LOG +#if _ELPP_WARNING_LOG +# define CWARNING_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Warning) +#else +# define CWARNING_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_WARNING_LOG +#if _ELPP_DEBUG_LOG +# define CDEBUG_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Debug) +#else +# define CDEBUG_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_DEBUG_LOG +#if _ELPP_ERROR_LOG +# define CERROR_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Error) +#else +# define CERROR_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_ERROR_LOG +#if _ELPP_FATAL_LOG +# define CFATAL_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Fatal) +#else +# define CFATAL_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_FATAL_LOG +#if _ELPP_QA_LOG +# define CQA_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::QA) +#else +# define CQA_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_QA_LOG +#if _ELPP_TRACE_LOG +# define CTRACE_EVERY_N(interval_, loggerId) _ELPP_LOG_WRITER_N(interval_, loggerId, easyloggingpp::Level::Trace) +#else +# define CTRACE_EVERY_N(interval_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_TRACE_LOG +#if _ELPP_VERBOSE_LOG +# define CVERBOSE_EVERY_N(interval_, vlevel_, loggerId) if (easyloggingpp::internal::registeredLoggers->validateCounter(__FILE__, __LINE__, interval_)) \ + easyloggingpp::internal::Writer(loggerId, easyloggingpp::internal::Aspect::Interval, \ + easyloggingpp::Level::Verbose, __func__, __FILE__, __LINE__, true, vlevel_, interval_) +#else +# define CVERBOSE_EVERY_N(interval_, vlevel_, loggerId) easyloggingpp::internal::NullWriter() +#endif // _ELPP_VERBOSE_LOG +// +// Custom Loggers - Requires (level, loggerId) +// +// undef existing +#undef CLOG +#undef CLOG_VERBOSE +#undef CVLOG +#undef CLOG_IF +#undef CLOG_VERBOSE_IF +#undef CVLOG_IF +#undef CLOG_EVERY_N +#undef CLOG_VERBOSE_EVERY_N +#undef CVLOG_EVERY_N +// Normal logs +#define CLOG(LEVEL, loggerId) C##LEVEL(loggerId) +#define CLOG_VERBOSE(vlevel, loggerId) CVERBOSE(vlevel, loggerId) +#define CVLOG(vlevel, loggerId) CVERBOSE(vlevel, loggerId) +// Conditional logs +#define CLOG_IF(condition, LEVEL, loggerId) C##LEVEL##_IF(condition, loggerId) +#define CLOG_VERBOSE_IF(condition, vlevel, loggerId) CVERBOSE_IF(condition, vlevel, loggerId) +#define CVLOG_IF(condition, vlevel, loggerId) CVERBOSE_IF(condition, vlevel, loggerId) +// Interval logs +#define CLOG_EVERY_N(n, LEVEL, loggerId) C##LEVEL##_EVERY_N(n, loggerId) +#define CLOG_VERBOSE_EVERY_N(n, vlevel, loggerId) CVERBOSE_EVERY_N(n, vlevel, loggerId) +#define CVLOG_EVERY_N(n, vlevel, loggerId) CVERBOSE_EVERY_N(n, vlevel, loggerId) +// +// Default Loggers macro using CLOG(), CLOG_VERBOSE() and CVLOG() macros +// +// undef existing +#undef LOG +#undef LOG_VERBOSE +#undef VLOG +#undef LOG_IF +#undef LOG_VERBOSE_IF +#undef VLOG_IF +#undef LOG_EVERY_N +#undef LOG_VERBOSE_EVERY_N +#undef VLOG_EVERY_N +// Normal logs +#define LOG(LEVEL) CLOG(LEVEL, "trivial") +#define LOG_VERBOSE(vlevel) CLOG_VERBOSE(vlevel, "trivial") +#define VLOG(vlevel) CVLOG(vlevel, "trivial") +// Conditional logs +#define LOG_IF(condition, LEVEL) CLOG_IF(condition, LEVEL, "trivial") +#define LOG_VERBOSE_IF(condition, vlevel) CLOG_VERBOSE_IF(condition, vlevel, "trivial") +#define VLOG_IF(condition, vlevel) CVLOG_IF(condition, vlevel, "trivial") +// Interval logs +#define LOG_EVERY_N(n, LEVEL) CLOG_EVERY_N(n, LEVEL, "trivial") +#define LOG_VERBOSE_EVERY_N(n, vlevel) CLOG_VERBOSE_EVERY_N(n, vlevel, "trivial") +#define VLOG_EVERY_N(n, vlevel) CVLOG_EVERY_N(n, vlevel, "trivial") +// +// Default Loggers macro using C##LEVEL("trivial") +// +// undef existing +#undef LINFO +#undef LWARNING +#undef LDEBUG +#undef LERROR +#undef LFATAL +#undef LQA +#undef LTRACE +#undef LVERBOSE +#undef LINFO_IF +#undef LWARNING_IF +#undef LDEBUG_IF +#undef LERROR_IF +#undef LFATAL_IF +#undef LQA_IF +#undef LTRACE_IF +#undef LVERBOSE_IF +#undef LINFO_EVERY_N +#undef LWARNING_EVERY_N +#undef LDEBUG_EVERY_N +#undef LERROR_EVERY_N +#undef LFATAL_EVERY_N +#undef LQA_EVERY_N +#undef LTRACE_EVERY_N +#undef LVERBOSE_EVERY_N +// Normal logs +#define LINFO CINFO("trivial") +#define LWARNING CWARNING("trivial") +#define LDEBUG CDEBUG("trivial") +#define LERROR CERROR("trivial") +#define LFATAL CFATAL("trivial") +#define LQA CQA("trivial") +#define LTRACE CTRACE("trivial") +#define LVERBOSE(level) CVERBOSE(level, "trivial") +// Conditional logs +#define LINFO_IF(condition) CINFO_IF(condition, "trivial") +#define LWARNING_IF(condition) CWARNING_IF(condition, "trivial") +#define LDEBUG_IF(condition) CDEBUG_IF(condition, "trivial") +#define LERROR_IF(condition) CERROR_IF(condition, "trivial") +#define LFATAL_IF(condition) CFATAL_IF(condition, "trivial") +#define LQA_IF(condition) CQA_IF(condition, "trivial") +#define LTRACE_IF(condition) CTRACE_IF(condition, "trivial") +#define LVERBOSE_IF(condition, level) CVERBOSE_IF(condition, level, "trivial") +// Interval logs +#define LINFO_EVERY_N(n) CINFO_EVERY_N(n, "trivial") +#define LWARNING_EVERY_N(n) CWARNING_EVERY_N(n, "trivial") +#define LDEBUG_EVERY_N(n) CDEBUG_EVERY_N(n, "trivial") +#define LERROR_EVERY_N(n) CERROR_EVERY_N(n, "trivial") +#define LFATAL_EVERY_N(n) CFATAL_EVERY_N(n, "trivial") +#define LQA_EVERY_N(n) CQA_EVERY_N(n, "trivial") +#define LTRACE_EVERY_N(n) CTRACE_EVERY_N(n, "trivial") +#define LVERBOSE_EVERY_N(n, level) CVERBOSE_EVERY_N(n, level, "trivial") +// +// Default Loggers macro using C##LEVEL("business") +// +// undef existing +#undef BINFO +#undef BWARNING +#undef BDEBUG +#undef BERROR +#undef BFATAL +#undef BQA +#undef BTRACE +#undef BVERBOSE +#undef BINFO_IF +#undef BWARNING_IF +#undef BDEBUG_IF +#undef BERROR_IF +#undef BFATAL_IF +#undef BQA_IF +#undef BTRACE_IF +#undef BVERBOSE_IF +#undef BINFO_EVERY_N +#undef BWARNING_EVERY_N +#undef BDEBUG_EVERY_N +#undef BERROR_EVERY_N +#undef BFATAL_EVERY_N +#undef BQA_EVERY_N +#undef BTRACE_EVERY_N +#undef BVERBOSE_EVERY_N +// Normal logs +#define BINFO CINFO("business") +#define BWARNING CWARNING("business") +#define BDEBUG CDEBUG("business") +#define BERROR CERROR("business") +#define BFATAL CFATAL("business") +#define BQA CQA("business") +#define BTRACE CTRACE("business") +#define BVERBOSE(level) CVERBOSE(level, "business") +// Conditional logs +#define BINFO_IF(condition) CINFO_IF(condition, "business") +#define BWARNING_IF(condition) CWARNING_IF(condition, "business") +#define BDEBUG_IF(condition) CDEBUG_IF(condition, "business") +#define BERROR_IF(condition) CERROR_IF(condition, "business") +#define BFATAL_IF(condition) CFATAL_IF(condition, "business") +#define BQA_IF(condition) CQA_IF(condition, "business") +#define BTRACE_IF(condition) CTRACE_IF(condition, "business") +#define BVERBOSE_IF(condition, level) CVERBOSE_IF(condition, level, "business") +// Interval logs +#define BINFO_EVERY_N(n) CINFO_EVERY_N(n, "business") +#define BWARNING_EVERY_N(n) CWARNING_EVERY_N(n, "business") +#define BDEBUG_EVERY_N(n) CDEBUG_EVERY_N(n, "business") +#define BERROR_EVERY_N(n) CERROR_EVERY_N(n, "business") +#define BFATAL_EVERY_N(n) CFATAL_EVERY_N(n, "business") +#define BQA_EVERY_N(n) CQA_EVERY_N(n, "business") +#define BTRACE_EVERY_N(n) CTRACE_EVERY_N(n, "business") +#define BVERBOSE_EVERY_N(n, level) CVERBOSE_EVERY_N(n, level, "business") +// +// Default Loggers macro using C##LEVEL("security") +// +// undef existing +#undef SINFO +#undef SWARNING +#undef SDEBUG +#undef SERROR +#undef SFATAL +#undef SQA +#undef STRACE +#undef SVERBOSE +#undef SINFO_IF +#undef SWARNING_IF +#undef SDEBUG_IF +#undef SERROR_IF +#undef SFATAL_IF +#undef SQA_IF +#undef STRACE_IF +#undef SVERBOSE_IF +#undef SINFO_EVERY_N +#undef SWARNING_EVERY_N +#undef SDEBUG_EVERY_N +#undef SERROR_EVERY_N +#undef SFATAL_EVERY_N +#undef SQA_EVERY_N +#undef STRACE_EVERY_N +#undef SVERBOSE_EVERY_N +// Normal logs +#define SINFO CINFO("security") +#define SWARNING CWARNING("security") +#define SDEBUG CDEBUG("security") +#define SERROR CERROR("security") +#define SFATAL CFATAL("security") +#define SQA CQA("security") +#define STRACE CTRACE("security") +#define SVERBOSE(level) CVERBOSE(level, "security") +// Conditional logs +#define SINFO_IF(condition) CINFO_IF(condition, "security") +#define SWARNING_IF(condition) CWARNING_IF(condition, "security") +#define SDEBUG_IF(condition) CDEBUG_IF(condition, "security") +#define SERROR_IF(condition) CERROR_IF(condition, "security") +#define SFATAL_IF(condition) CFATAL_IF(condition, "security") +#define SQA_IF(condition) CQA_IF(condition, "security") +#define STRACE_IF(condition) CQA_IF(condition, "security") +#define SVERBOSE_IF(condition, level) CVERBOSE_IF(condition, level, "security") +// Interval logs +#define SINFO_EVERY_N(n) CINFO_EVERY_N(n, "security") +#define SWARNING_EVERY_N(n) CWARNING_EVERY_N(n, "security") +#define SDEBUG_EVERY_N(n) CDEBUG_EVERY_N(n, "security") +#define SERROR_EVERY_N(n) CERROR_EVERY_N(n, "security") +#define SFATAL_EVERY_N(n) CFATAL_EVERY_N(n, "security") +#define SQA_EVERY_N(n) CQA_EVERY_N(n, "security") +#define STRACE_EVERY_N(n) CTRACE_EVERY_N(n, "security") +#define SVERBOSE_EVERY_N(n, level) CVERBOSE_EVERY_N(n, level, "security") +// +// Default Loggers macro using C##LEVEL("performance") +// +// undef existing +#undef PINFO +#undef PWARNING +#undef PDEBUG +#undef PERROR +#undef PFATAL +#undef PQA +#undef PTRACE +#undef PVERBOSE +#undef PINFO_IF +#undef PWARNING_IF +#undef PDEBUG_IF +#undef PERROR_IF +#undef PFATAL_IF +#undef PQA_IF +#undef PTRACE_IF +#undef PVERBOSE_IF +#undef PINFO_EVERY_N +#undef PWARNING_EVERY_N +#undef PDEBUG_EVERY_N +#undef PERROR_EVERY_N +#undef PFATAL_EVERY_N +#undef PQA_EVERY_N +#undef PTRACE_EVERY_N +#undef PVERBOSE_EVERY_N +// Normal logs +#define PINFO CINFO("performance") +#define PWARNING CWARNING("performance") +#define PDEBUG CDEBUG("performance") +#define PERROR CERROR("performance") +#define PFATAL CFATAL("performance") +#define PQA CQA("performance") +#define PTRACE CTRACE("performance") +#define PVERBOSE(level) CVERBOSE(level, "performance") +// Conditional logs +#define PINFO_IF(condition) CINFO_IF(condition, "performance") +#define PWARNING_IF(condition) CWARNING_IF(condition, "performance") +#define PDEBUG_IF(condition) CDEBUG_IF(condition, "performance") +#define PERROR_IF(condition) CERROR_IF(condition, "performance") +#define PFATAL_IF(condition) CFATAL_IF(condition, "performance") +#define PQA_IF(condition) CQA_IF(condition, "performance") +#define PTRACE_IF(condition) CQA_IF(condition, "performance") +#define PVERBOSE_IF(condition, level) CVERBOSE_IF(condition, level, "performance") +// Interval logs +#define PINFO_EVERY_N(n) CINFO_EVERY_N(n, "performance") +#define PWARNING_EVERY_N(n) CWARNING_EVERY_N(n, "performance") +#define PDEBUG_EVERY_N(n) CDEBUG_EVERY_N(n, "performance") +#define PERROR_EVERY_N(n) CERROR_EVERY_N(n, "performance") +#define PFATAL_EVERY_N(n) CFATAL_EVERY_N(n, "performance") +#define PQA_EVERY_N(n) CQA_EVERY_N(n, "performance") +#define PTRACE_EVERY_N(n) CTRACE_EVERY_N(n, "performance") +#define PVERBOSE_EVERY_N(n, level) CVERBOSE_EVERY_N(n, level, "performance") +// Undefine macros that are not needed anymore +#undef _ELPP_ASSEMBLY_SUPPORTED +#undef _ELPP_STREAM +#undef _ELPP_MUTEX_LOCK_GNU_ASM +#undef _ELPP_MUTEX_UNLOCK_GNU_ASM +#undef _ELPP_ENABLE_MUTEX +#undef _ENABLE_EASYLOGGING +#undef __EASYLOGGINGPP_SUPPRESS_UNSED +#undef _ELPP_DEBUG_LOG +#undef _ELPP_INFO_LOG +#undef _ELPP_WARNING_LOG +#undef _ELPP_ERROR_LOG +#undef _ELPP_FATAL_LOG +#undef _ELPP_QA_LOG +#undef _ELPP_VERBOSE_LOG +#undef _ELPP_TRACE_LOG +#undef _INITIALIZE_EASYLOGGINGPP +#undef _START_EASYLOGGINGPP +#undef _ELPP_COUNTER +#undef _ELPP_COUNTER_POSITION +#define _INITIALIZE_EASYLOGGINGPP \ + namespace easyloggingpp { \ + namespace internal { \ + ScopedPointer registeredLoggers( \ + new RegisteredLoggers()); \ + } \ + } +#define _START_EASYLOGGINGPP(argc, argv) easyloggingpp::Loggers::setApplicationArguments(argc, argv); +#define _ELPP_COUNTER easyloggingpp::internal::registeredLoggers->counters()->get(__FILE__, __LINE__) +#define _ELPP_COUNTER_POSITION (_ELPP_COUNTER == NULL ? 0 : _ELPP_COUNTER->position()) +} // easyloggingpp +#endif // EASYLOGGINGPP_H diff --git a/error.cpp b/error.cpp new file mode 100644 index 0000000..511c088 --- /dev/null +++ b/error.cpp @@ -0,0 +1,19 @@ +#include "error.h" + +using namespace std; + +ErrorException::ErrorException(string msg) : msg(msg) {} + +ErrorException::~ErrorException() throw() {} + +string ErrorException::getMessage() { + return msg; +} + +const char *ErrorException::what() const throw () { + return ("Error: " + msg).c_str(); +} + +void error(string str) { + throw ErrorException(str); +} diff --git a/error.h b/error.h new file mode 100644 index 0000000..2cb2d5b --- /dev/null +++ b/error.h @@ -0,0 +1,57 @@ +/* + * File: error.h + * ------------- + * This file defines the ErrorException class and the + * error function. + */ + +#ifndef _error_h +#define _error_h + +#include +#include + +/* + * Class: ErrorException + * --------------------- + * This exception is thrown by calls to the error + * function, which makes it possible for clients to respond to error + * conditions. Typical code for catching errors looks like this: + * + *
+ *    try {
+ *       . . . code in which an error might occur . . .
+ *    } catch (ErrorException & ex) {
+ *       . . . code to handle the error condition . . .
+ *    }
+ *
+ * + * If an ErrorException is thrown at any point in the + * range of the try (including in functions called from + * that code), control will jump immediately to the error handler. + */ + + +class ErrorException : public std::exception { +public: + ErrorException(std::string msg); + virtual ~ErrorException() throw (); + virtual std::string getMessage(); + virtual const char *what() const throw (); + +private: + std::string msg; +}; + +/* + * Function: error + * Usage: error(msg); + * ------------------ + * Signals an error condition in a program by throwing an + * ErrorException with the specified message. + */ + +void error(std::string str); + + +#endif diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..cc08e90 --- /dev/null +++ b/main.cpp @@ -0,0 +1,350 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "error.h" +#include "Deletion.h" +#include "ClipReader.h" +#include "BamStatCalculator.h" +#include "Helper.h" +//#include "Parameters.h" +#include "clip.h" +#include "range.h" +#include "Thirdparty/Timer.h" + +#include "easylogging++.h" + +// +// Getopt +// +#define PROGRAM_NAME "sprites" +#define PROGRAM_VERSION "1.0" +#define PROGRAM_BUGREPORT "zhangz@csu.edu.cn" +const int DEFAULT_MIN_OVERLAP=12; +const int DEFAULT_MIN_MAPQUAL=1; +const int DEFAULT_SD_CUTOFF=4; + +static const char *DFINDER_VERSION_MESSAGE = +PROGRAM_NAME " Version " PROGRAM_VERSION "\n" +"Written by Zhen Zhang.\n" +"\n" +"Copyright 2013 netlab.csu.edu.cn\n"; + +static const char *DFINDER_USAGE_MESSAGE = +"Usage: " PROGRAM_NAME " [OPTION] ... BAMFILE\n" +"Find deletions from records in BAMFILE\n" +"\n" +" --help display this help and exit\n" +" -v, --verbose display verbose output\n" +" -r, --reffile=FILE read the reference sequence from FILE\n" +" -o, --outfile=FILE write the deletion calls to FILE (default: BAMFILE.calls)\n" +" -e, --error-rate=F the maximum error rate allowed between two sequences to consider them overlapped (default: 0.04)\n" +" -m, --min-overlap=LEN minimum overlap required between two reads (default: 12)\n" +" -q, --mapping-qual=MAPQ minimum mapping quality of a read (default: 1)\n" +" -n, --allowed-num=SIZE a soft-clip is defined as valid, when the clipped part is not less than SIZE (default: 5)\n" +"\nThe following two option must appear together (if ommitted, attempt ot learn the mean and the standard deviation of insert size):\n" +" -i, --insert-mean=N the mean of insert size\n" +" --enhanced-mode enable the enhanced mode, in which reads of type 2 are considered besides type 1\n" +" -s, --insert-sd=N the standard deviation of insert size\n" +"\nReport bugs to " PROGRAM_BUGREPORT "\n\n"; + +namespace opt +{ + static unsigned int verbose; + static std::string bamFile; + static std::string refFile; + static std::string outFile; + static double errorRate = 0.04; + static int minOverlap = DEFAULT_MIN_OVERLAP; + static int minMapQual = DEFAULT_MIN_MAPQUAL; + static int allowedNum = 12; + static int mode = 0; + + static bool bLearnInsert = true; + static int insertMean; + static int insertSd; +} + +static const char* shortopts = "o:q:r:e:m:n:i:s:v"; + +enum { OPT_HELP = 1, OPT_VERSION, OPT_ENHANCED_MODE }; + +static const struct option longopts[] = { + { "verbose", no_argument, NULL, 'v' }, + { "min-overlap", required_argument, NULL, 'm' }, + { "mapping-qual", required_argument, NULL, 'q' }, + { "allowed-num", required_argument, NULL, 'n' }, + { "reffile", required_argument, NULL, 'r'}, + { "outfile", required_argument, NULL, 'o' }, + { "error-rate", required_argument, NULL, 'e' }, + { "insert-mean", required_argument, NULL, 'i' }, + { "insert-sd", required_argument, NULL, 's' }, + { "help", no_argument, NULL, OPT_HELP }, + { "version", no_argument, NULL, OPT_VERSION }, + { "enhanced-mode", no_argument, NULL, OPT_ENHANCED_MODE }, + { NULL, 0, NULL, 0 } +}; + +void parseOptions(int argc, char** argv); +void output(const std::string& filename, const std::vector& dels); + +_INITIALIZE_EASYLOGGINGPP + +// +// Main +// +int main(int argc, char *argv[]) { + +// std::string s1 = +// "TCACTTGAACCCAGGAGGCAGAGGTTCCAGTGAGCTGAGATCATGCCACTGCACTCCAGCCTGGGCAACAGAGCGAGGCTCCATCTCA" +// "TCTCCTCTTTCCCTCCTGCCAACTGAAAATGTTTGCTTCGCTCTGTGAAAATAATGTTAATAAAAATGTCTATATACACATATAAAATGTCACTTATAAAAGATGTTAACTATAAAATAG" +// "CAGCTAGGGATAAGAGTTCTTAAGTCAAATCCTTAGAATCAATTAATTAGCTCTCCCAAACAAAACAAAACAAAACAAAAAAAGGCCATGGCCGAGCATGGTGGCTGACACCTGTAATCC" +// "CAGCACTTTAGGAGACTGAGGTGGGTAGACGGAGGTCAGGAGTTCAAGACCAGCGTGGCCAACATAGTGAAACCCCGTCTCTACTAAAAATACAAAAAAATTTGCCGGGCATAGAGGTGC" +// "ACACCTGTAATCCCAGCTACTTGGGAGGCTGAGGCACAAGAATCGCTTGAACCCAGGAGGTGGAAGTTGCAGCAACCTGAGGTTGCACCACTGCACTCCAGCCTGGGCAACAGAGCGAGA" +// "CTCCATCTCAAATAAATAAACAAACAAACAAAAACAAACTAGCTCTGCCAGTTGCTACCTTGAGAAAGTCACTTAACTTTTCTAAACCTCTTTTCCACCTATAAAAGTTAGTAATTGCTT" +// "AATTCACATATTGTGAGAATAAGAGAAATACTCTATATGGTACACTCATGACAATGACTAGGACACACTAAATACCCGTACTCAATTCAACAATGATCAGCATTATTACTGATTTACTAA" +// "TCTGCACTAATAAGCACAATAAGCTCTAACTAATAAGCAAAATAATTACTAACAATTATTTTAAATACTGTTAGTGGTACATACCTTATAATCTATAAAAGATTCTTGTTCCTGTTGACA" +// "CTGGGAAAGATAATCCTTCATATCATTCAATTCATC"; +// std::string s2 = "TCACTTGAACCCAGGAGGCAGAGGTTCCAGTGAGCTGAGATCATGCCACTGCACTCCAGCCTGGGCAACAGAGCGAGGCTCCATCTCAAATAAATAATCAA"; + +// std::string s1 = "ACGGGGACT"; +// std::string s2 = "ACGTTACT"; +// SequenceOverlap result = Overlapper::ageAlignSuffix(s1, s2, ScoreParam(1, -1, 2, 4)); +// LINFO << result; +// return 0; + + parseOptions(argc, argv); + + if (opt::bLearnInsert) { + std::cout << "Estimate the mean and standard deviation of insert size:" << std::endl; + BamStatCalculator calc(opt::bamFile); + opt::insertMean = calc.getInsertMean(); + opt::insertSd = calc.getInsertSd(); + std::cout << "Mean: " << opt::insertMean << std::endl; + std::cout << "Sd: " << opt::insertSd << std::endl; + } + +// Parameters params = { opt::allowedNum, +// opt::mode, +// opt::minOverlap, +// 1.0f - opt::errorRate, +// opt::insertMean, +// opt::insertSd }; + + ClipReader creader(opt::bamFile, opt::allowedNum, opt::mode, opt::minMapQual, opt::insertMean + DEFAULT_SD_CUTOFF * opt::insertSd); + + BamTools::BamReader bamReader; + if (!bamReader.Open(opt::bamFile)) + error("Could not open the input BAM file."); + if (!bamReader.LocateIndex()) + error("Could not locate the index file"); + + FaidxWrapper faidx(opt::refFile); + + int insLength = opt::insertMean + 3 * opt::insertSd; + double identityRate = 1.0f - opt::errorRate; + + std::vector deletions; + +// Timer* pTimer = new Timer("Preprocessing split reads"); + Timer* pTimer = new Timer("Calling deletions"); + AbstractClip *pClip; +// std::vector clips; + while ((pClip = creader.nextClip())) { +// clips.push_back(pClip); + try { + auto del = pClip->call(bamReader, faidx, insLength, opt::minOverlap, identityRate, opt::minMapQual); + deletions.push_back(del); + } catch (ErrorException& ex) { + // std::cout << ex.getMessage() << std::endl; + } + } + delete pTimer; + +// std::cout << "# Soft-clipping reads: " << clips.size() << std::endl; + +/* + sort(clips.begin(), clips.end(), + [](AbstractClip* pc1, AbstractClip* pc2){ return pc1->getClipPosition() < pc2->getClipPosition(); }); + + size_t k = 50; + for (size_t i = 0; i < clips.size() - 1; ++i) { + for (size_t j = i + 1; j < std::min(i + k, clips.size()); ++j) { + if (clips[i]->hasConflictWith(clips[j])) { + clips[i]->setConflictFlag(true); + clips[j]->setConflictFlag(true); + } + } + } + + std::cout << "#Reads with soft-clipping (original): " << clips.size() << std::endl; + + std::vector newClips; + std::copy_if(clips.begin(), clips.end(), back_inserter(newClips), + [](AbstractClip* pc){ return !pc->getConflictFlag(); }); + + std::cout << "#Reads with soft-clipping after resolving conflicts: " << newClips.size() << std::endl; + + std::vector > clipClusters; + cluster(clips, clipClusters, + [](AbstractClip* pc1, AbstractClip* pc2){ return pc1->getClipPosition() == pc2->getClipPosition(); }); + + std::cout << "#Reads with soft-clipping after clustering: " << clipClusters.size() << std::endl; + + std::vector finalClips; + finalClips.reserve(clipClusters.size()); + std::transform(clipClusters.begin(), clipClusters.end(), back_inserter(finalClips), + [](const std::vector& v){ return v[v.size()/2]; }); +*/ + + /* + pTimer = new Timer("Calling deletions"); + for (auto pClip: clips) { +// if (pClip->getConflictFlag()) continue; + try { + auto del = pClip->call(bamReader, faidx, insLength, opt::minOverlap, identityRate, opt::minMapQual); + deletions.push_back(del); + } catch (ErrorException& ex) { + // std::cout << ex.getMessage() << std::endl; + } + } + delete pTimer; + */ + + if (deletions.empty()) { + std::cout << "No deletion was found." << std::endl; + return 0; + } + + pTimer = new Timer("Merging deletions"); + std::sort(deletions.begin(), deletions.end()); + deletions.erase(std::unique(deletions.begin(), deletions.end()), deletions.end()); + +// std::vector > delClusters; + +// cluster(deletions, delClusters, +// [](const Deletion& d1, const Deletion& d2){ return d1.overlaps(d2); }); + + std::vector finalDels; + merge(deletions, finalDels, + [](const Deletion& d1, const Deletion& d2){ return d1.overlaps(d2); }); +// finalDels.reserve(delClusters.size()); +// for (auto &clu: delClusters) { +// finalDels.push_back(clu[0]); + /* + if (clu.size() == 1) finalDels.push_back(clu[0]); + else { + Deletion d(clu[0].getReferenceName(), + clu[0].getStart1(), + clu[clu.size()-1].getEnd1(), + clu[0].getStart2(), + clu[clu.size()-1].getEnd2(), + clu[0].getLength(), + clu[0].getFromTag()); + finalDels.push_back(d); + } + */ +// } + delete pTimer; + + output(opt::outFile, finalDels); + + return 0; +} + +// +// Handle command line arguments +// +void parseOptions(int argc, char** argv) +{ + bool bInsertMean = false; + bool bInsertSd = false; + bool die = false; + for (char c; (c = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1;) + { + std::istringstream arg(optarg != NULL ? optarg : ""); + switch (c) + { + case 'n': arg >> opt::allowedNum; break; + case 'm': arg >> opt::minOverlap; break; + case 'q': arg >> opt::minMapQual; break; + case 'r': arg >> opt::refFile; break; + case 'o': arg >> opt::outFile; break; + case 'e': arg >> opt::errorRate; break; + case '?': die = true; break; + case 'v': opt::verbose++; break; + case 'i': arg >> opt::insertMean; bInsertMean = true; break; + case 's': arg >> opt::insertSd; bInsertSd = true; break; + case OPT_ENHANCED_MODE: opt::mode = 1; break; + case OPT_HELP: + std::cout << DFINDER_USAGE_MESSAGE; + exit(EXIT_SUCCESS); + case OPT_VERSION: + std::cout << DFINDER_VERSION_MESSAGE; + exit(EXIT_SUCCESS); + } + } + + if (argc - optind < 1) + { + std::cerr << PROGRAM_NAME ": missing arguments\n"; + die = true; + } + else if (argc - optind > 1) + { + std::cerr << PROGRAM_NAME ": too many arguments\n"; + die = true; + } + + if (bInsertMean & bInsertSd) { + opt::bLearnInsert = false; + } + + if (bInsertMean ^ bInsertSd) { + std::cerr << PROGRAM_NAME ": the mean and standard deviation of insert size must be specified together\n"; + die = true; + } + + if(opt::errorRate > 1.0f) + { + std::cerr << PROGRAM_NAME ": invalid error-rate parameter: " << opt::errorRate << "\n"; + die = true; + } + + if(opt::refFile.empty()) + { + std::cerr << PROGRAM_NAME ": the reference file must be specified\n"; + die = true; + } + + if (die) + { + std::cout << "\n" << DFINDER_USAGE_MESSAGE; + exit(EXIT_FAILURE); + } + + // Validate parameters + if(opt::errorRate <= 0) + opt::errorRate = 0.0f; + + // Parse the input filename + opt::bamFile = argv[optind++]; + + std::string out_prefix = stripFilename(opt::bamFile); + if(opt::outFile.empty()) + { + opt::outFile = out_prefix + ".bedpe"; + } + +} + +void output(const std::string &filename, const std::vector &dels) { + std::ofstream out(filename.c_str()); + size_t i = 1; + std::for_each(std::begin(dels), std::end(dels), [&i, &out](const Deletion &d) + {out << d << "\tDEL." << i << "." << d.getFromTag() << std::endl; i++;}); +} diff --git a/range.cpp b/range.cpp new file mode 100644 index 0000000..7a2ec59 --- /dev/null +++ b/range.cpp @@ -0,0 +1,89 @@ +#include "range.h" +#include +#include +#include +#include +#include + +using namespace std; + +void clusterRanges(const vector &ranges, std::vector &clusters) +{ + + vector endPoints; + for (size_t i = 0; i < ranges.size(); ++i) { + endPoints.push_back({ranges[i].start, i, true}); + endPoints.push_back({ranges[i].end, i, false}); + } + + sort(endPoints.begin(), endPoints.end()); + set usedIds; + queue buffer; + + for (auto it = endPoints.begin(); it != endPoints.end(); ++it) { + if ((*it).isStart) buffer.push((*it).ownerId); + else { + if (usedIds.count((*it).ownerId)) continue; + IdCluster clu; + while (!buffer.empty()) { + clu.push_back(buffer.front()); + usedIds.insert(buffer.front()); + buffer.pop(); + } + if (!clu.empty()) clusters.push_back(clu); + } + } + IdCluster clu; + while (!buffer.empty()) { + clu.push_back(buffer.front()); + usedIds.insert(buffer.front()); + buffer.pop(); + } + if (!clu.empty()) clusters.push_back(clu); + +} + + +void append(size_t startIndex, size_t endIndex, std::vector &clusters) +{ + IdCluster buffer(endIndex - startIndex); + std::iota(std::begin(buffer), std::end(buffer), startIndex); + clusters.push_back(buffer); +} + +void clusterRanges2(const vector &ranges, std::vector &clusters) +{ + size_t startIndex = 0; + + for (size_t i = 1; i < ranges.size(); ++i) { + if (!ranges[i-1].overlaps(ranges[i])) { + append(startIndex, i, clusters); + startIndex = i; + } + } + append(startIndex, ranges.size(), clusters); + +} + + +int IRange::length() const +{ + return end - start + 1; +} + +bool IRange::operator<(const IRange &other) const +{ + if (start != other.start) return start < other.start; + return end < other.end; +} + +bool IRange::overlaps(const IRange &other) const +{ + return (start >= other.start && start < other.end) || + (other.start >= start && other.start < end); +} + +bool IRangeEndPoint::operator<(const IRangeEndPoint &other) const +{ + return position < other.position; +} diff --git a/range.h b/range.h new file mode 100644 index 0000000..caebc1f --- /dev/null +++ b/range.h @@ -0,0 +1,30 @@ +#ifndef RANGE_H +#define RANGE_H + +#include +#include + +struct IRange { + int start; + int end; + + int length() const; + + bool operator<(const IRange &other) const; + bool overlaps(const IRange& other) const; +}; + +struct IRangeEndPoint { + int position; + std::size_t ownerId; + bool isStart; + + bool operator<(const IRangeEndPoint &other) const; +}; + +typedef std::vector IdCluster; + +void clusterRanges(const std::vector &ranges, std::vector &clusters); +void clusterRanges2(const std::vector &ranges, std::vector &clusters); + +#endif // RANGE_H