diff --git a/src/BAFpileup.cpp b/src/BAFpileup.cpp index 0b7d862..02aae29 100644 --- a/src/BAFpileup.cpp +++ b/src/BAFpileup.cpp @@ -19,72 +19,68 @@ A copy of the GNU General Public License is available at *************************************************************************/ -#include "BAFpileup.h" - -using namespace std; - -BAFpileup::BAFpileup() -{ - //ctor -} - -void BAFpileup::makepileup(GenomeCopyNumber & sampleCopyNumber, GenomeCopyNumber & controlCopyNumber, - std::string sample_MateFile, std::string control_Matefile, std::string outputDir, std::string makeminipileup, - std::string const& mateFileName ,std::string const& inputFormat, std::string const& matesOrientation, +#include "BAFpileup.h" + +using namespace std; + +BAFpileup::BAFpileup() +{ + //ctor +} + +void BAFpileup::makepileup(GenomeCopyNumber & sampleCopyNumber, GenomeCopyNumber & controlCopyNumber, + std::string sample_MateFile, std::string control_Matefile, std::string outputDir, std::string makeminipileup, + std::string const& mateFileName ,std::string const& inputFormat, std::string const& matesOrientation, std::string pathToSamtools, std::string pathToSambamba, std::string SambambaThreads, std::string chrLenFileName, std::string controlName, std::string targetBed, std::string pathToBedtools, - std::string fastaFile, int minQualPerPos) -{ + std::string fastaFile, int minQualPerPos) +{ //create a .bed file with regions of interest to create a minipileup: targeted + flanks for WES or all chromosomes for WGS: - std::string bedFileWithRegionsOfInterest = outputDir + "_NewCaptureRegions" + ".bed"; - - if (targetBed != "") - { - int flanks = calculateFlankLength(mateFileName, inputFormat, matesOrientation, pathToSamtools, pathToSambamba, SambambaThreads); - calculateNewBoundaries(targetBed, flanks, bedFileWithRegionsOfInterest); - } - else - { - createBedFileWithChromosomeLengths(bedFileWithRegionsOfInterest,chrLenFileName); + std::string bedFileWithRegionsOfInterest = outputDir + "_NewCaptureRegions" + ".bed"; + + if (targetBed != "") + { + int flanks = calculateFlankLength(mateFileName, inputFormat, matesOrientation, pathToSamtools, pathToSambamba, SambambaThreads); + calculateNewBoundaries(targetBed, flanks, bedFileWithRegionsOfInterest); + } + else + { + createBedFileWithChromosomeLengths(bedFileWithRegionsOfInterest,chrLenFileName); } - pathToBedtools_=pathToBedtools; // /* - string intersected = intersectWithBedtools(makeminipileup, outputDir, bedFileWithRegionsOfInterest, chrLenFileName); + pathToBedtools_=pathToBedtools; // /* + string intersected = intersectWithBedtools(makeminipileup, outputDir, bedFileWithRegionsOfInterest, chrLenFileName); string sampleOutFileName = createPileUpFile( outputDir, pathToSamtools , pathToSambamba, SambambaThreads, sample_MateFile, intersected, fastaFile,minQualPerPos); - + //BAFtumor = computeBAF(sampleCopyNumber, _sample, outputDir, "_sample"); - if (controlName.compare("")!=0) { + if (controlName.compare("")!=0) { string controlOutFileName = createPileUpFile( controlName, pathToSamtools, pathToSambamba, SambambaThreads, control_Matefile, intersected, fastaFile,minQualPerPos); - } - //computeBAF(controlCopyNumber, _control, outputDir, "_control"); - remove(intersected.c_str()); // */ + } + //computeBAF(controlCopyNumber, _control, outputDir, "_control"); + remove(intersected.c_str()); // */ } - + float BAFpileup::calculateFlankLength(std::string const& mateFileName, std::string const& inputFormat_str, std::string const& matesOrientation_str, std::string pathToSamtools_, std::string pathToSambamba, std::string SambambaThreads) -{ +{ if (matesOrientation_str=="0") return 0; // do not add anything in case of single end data if (getInputFormat(inputFormat_str)!=SAM_INPUT_FORMAT) return 0; - std::ifstream fileMates (mateFileName.c_str()); - vector insertSizeVector; - string line; - if (!fileMates.is_open()) { - cerr << "Error: unable to open "+mateFileName+"\n" ; - exit(-1); + std::ifstream fileMates (mateFileName.c_str()); + vector insertSizeVector; + string line; + if (!fileMates.is_open()) { + cerr << "Error: unable to open "+mateFileName+"\n" ; + exit(-1); } - fileMates.close(); - - #ifdef PROFILE_TRACE - time_t t0 = time(NULL); - #endif - - // MateOrientation matesOrientation = getMateOrientation(matesOrientation_str); // will not consider read orientation to increase speed - char* line_buffer; - FILE *stream; + fileMates.close(); + + // MateOrientation matesOrientation = getMateOrientation(matesOrientation_str); // will not consider read orientation to increase speed + char* line_buffer; + FILE *stream; char buffer[MAX_BUFFER]; bool zgOrbam=0; - int numberOfReadsToCheck=3000000; + int numberOfReadsToCheck=3000000; int j = 0; - int fragmentLength=0; + int fragmentLength=0; if(mateFileName.substr(mateFileName.size()-3,3).compare("bam")==0 || mateFileName.substr(mateFileName.size()-3,3).compare(".gz")==0) { string command; if (mateFileName.substr(mateFileName.size()-3,3).compare("bam")==0) { @@ -94,96 +90,98 @@ float BAFpileup::calculateFlankLength(std::string const& mateFileName, std::stri command = pathToSamtools_ + " view "+mateFileName; } } - if (mateFileName.substr(mateFileName.size()-3,3).compare(".gz")==0) { - command = "gzip -c -d "+mateFileName; + if (mateFileName.substr(mateFileName.size()-3,3).compare(".gz")==0) { + command = "gzip -c -d "+mateFileName; } - stream = - #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) - _popen(command.c_str(), "r"); - #else - popen(command.c_str(), "r"); + + stream = + #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) + _popen(command.c_str(), "r"); + #else + popen(command.c_str(), "r"); #endif zgOrbam=1; - while (((line_buffer = getLine(buffer, MAX_BUFFER, stream, line)) != NULL) && j 8 && strs[6][0]=='=') { int frLen=atoi(strs[8]); if(frLen>0 && frLen< 10000) { fragmentLength+=frLen; j++; - } + } } } } else { fileMates.open(mateFileName.c_str()); while( getline( fileMates, line ) ) { - if (line[0] == '@') + if (line[0] == '@') continue; - vector strs; - strs = split(line, '\t'); + vector strs; + strs = split(line, '\t'); if (strs.size() > 8 && strs[6][0]=='=') { int frLen=atoi(strs[8].c_str()); if(frLen>0 && frLen< 10000) { fragmentLength+=frLen; j++; - } + } } } fileMates.close(); } - - - fragmentLength = fragmentLength/j; + + + fragmentLength = fragmentLength/j; float flanks = fragmentLength/2; if (zgOrbam) { - #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) - _pclose(stream); - #else - pclose(stream); + + #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) + _pclose(stream); + #else + pclose(stream); #endif - } + } + + cout << "..will increase flanking regions by "<< flanks << " bp"< strs = split(line, '\t'); // Print new capture regions + + if (file.is_open()) { + std::string line; + exons_Count = 0; + exons_Counttmp = 0; + while (std::getline(file,line)) { + vector strs = split(line, '\t'); // Print new capture regions myfile << strs[0] << "\t" << atoi(strs[1].c_str())-flanks << "\t" << atoi(strs[2].c_str())+flanks<< "\t" << strs[0]<<":" < chr_names; @@ -230,67 +228,65 @@ void BAFpileup::createBedFileWithChromosomeLengths(std::string bedFileWithRegion } file.close(); cout << "..File "< " + intersected; - - stream = - #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) - _popen(command.c_str(), "w"); - #else - popen(command.c_str(), "w"); + + stream = + #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) + _popen(command.c_str(), "w"); + #else + popen(command.c_str(), "w"); #endif - #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) - _pclose(stream); - #else - pclose(stream); + #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) + _pclose(stream); + #else + pclose(stream); #endif - - remove(bedFileWithRegionsOfInterest.c_str()); + + remove(bedFileWithRegionsOfInterest.c_str()); if (makeminipileup.substr(makeminipileup.size()-3,3).compare("bed")==0) { return intersected; } - //else: VCF input format; need to transform into BED + //else: VCF input format; need to transform into BED ifstream file (intersected.c_str()); - ofstream myfile; + ofstream myfile; std::string intersectedBed = outputDir + "_SNPinNewCaptureRegions.bed"; - myfile.open(intersectedBed.c_str()); - std:: string line; - int nb_snp = 0; - while (std::getline(file,line)) { + myfile.open(intersectedBed.c_str()); + std:: string line; + int nb_snp = 0; + while (std::getline(file,line)) { nb_snp++; std::vector strs = split(line, '\t'); - myfile << strs[0] << "\t"<< atoi(strs[1].c_str()) -1 << "\t" << atoi(strs[1].c_str()) << "\t" << strs[3] << "\t" << strs[4]<<"\n"; - } + myfile << strs[0] << "\t"<< atoi(strs[1].c_str()) -1 << "\t" << atoi(strs[1].c_str()) << "\t" << strs[3] << "\t" << strs[4]<<"\n"; + } myfile.close(); file.close(); - remove(intersected.c_str()); - return intersectedBed; -} - + remove(intersected.c_str()); + return intersectedBed; +} + std::string BAFpileup::createPileUpFile(std::string outputDir, std::string samtools_path, std::string pathToSambamba,std::string SambambaThreads , std::string control_MateFile, std::string intersected, std::string fastaFile, int minQualPerPos) -{ - string minipileup = outputDir + "_minipileup" +".pileup"; - FILE *stream; +{ + string minipileup = outputDir + "_minipileup" +".pileup"; + FILE *stream; string command; if (pathToSambamba != "") { @@ -300,151 +296,151 @@ std::string BAFpileup::createPileUpFile(std::string outputDir, std::string samto command = samtools_path + " mpileup -f "+fastaFile+" -d 8000 -Q "+int2string(minQualPerPos)+" -q 1 -l " + intersected + " " + control_MateFile + " > " + minipileup; //discard reads wit 0 mapping quality } - stream = - #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) - _popen(command.c_str(), "w"); - #else - popen(command.c_str(), "w"); + stream = + #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) + _popen(command.c_str(), "w"); + #else + popen(command.c_str(), "w"); #endif - #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) - _pclose(stream); - #else - pclose(stream); + #if defined(_WIN32) || (defined(__APPLE__) && defined(__MACH__)) + _pclose(stream); + #else + pclose(stream); #endif - cout << "..If you have got an error at this step and a mini-pileup file is empty, check that you are using samtools v1.1 or later and provide a corresponding path in your config file"< >BAFpileup::computeBAF(GenomeCopyNumber & sampleorcontrol, std::string minipileup, std::string outputDir, std::string filename) -{ - - int numberofObjects = 0; - - vector::iterator it; - for ( it=sampleorcontrol.chrCopyNumber_.begin() ; it != sampleorcontrol.chrCopyNumber_.end(); it++ ) - { - numberofObjects++; - } - - string line; - ifstream file (minipileup.c_str()); - int nb_snp = 0; - while (std::getline(file,line)) - { - nb_snp++; - } - - file.clear(); - file.seekg(0); - - chr = vector < vector >(numberofObjects); - snp_pos_pileup = vector < vector >(numberofObjects); - A_nb = vector < vector >(numberofObjects); - B_nb = vector < vector >(numberofObjects); - AB_nb = vector < vector >(numberofObjects); - - - int k = 0; - for ( it=sampleorcontrol.chrCopyNumber_.begin() ; it != sampleorcontrol.chrCopyNumber_.end(); it++ ) - { - file.clear(); - file.seekg(0); - int i = 0; - int l = 0; - while (std::getline(file,line) && (k < numberofObjects)) - { - i = 0; - string chrtmp; - while (line[i] != '\t') - { - chrtmp += line[i]; - i++; - } - i++; - if (chrtmp == "chr" + it->getChromosome()) - { - - chr[k].push_back(chrtmp); - string snp_pos_pileuptmp; - while (line[i] != '\t') - { - snp_pos_pileuptmp += line[i]; - i++; - } - snp_pos_pileup[k].push_back(snp_pos_pileuptmp); - i++; - - int j = 0; - while ((snp_pos_pileup[k][l] != snp_pos[j]) && (j < nb_snp)) - { - j++; - } - - while (line[i] != '\t') - { - i++; - } - i++; - string AB_nbtmp; - while (line[i] != '\t') - { - AB_nbtmp += line[i]; - i++; - } - AB_nb[k].push_back(AB_nbtmp); - i++; - char* alternate_base = const_cast(alt_base[j].c_str()); - char alternate_basetmp = *alternate_base; - char* reference_base = const_cast(ref_base[j].c_str()); - char reference_basetmp = *reference_base; - B_nb[k].push_back(0); - A_nb[k].push_back(0); - while (line[i] != '\t') - { - if ((char)line[i] == reference_basetmp || (char)line[i] == (reference_basetmp+32) || ((char)line[i] == (reference_basetmp-32))) - { - B_nb[k][l]++; - } - if ((char)line[i] == alternate_basetmp || (char)line[i] == (alternate_basetmp+32) || ((char)line[i] == (alternate_basetmp-32))) - { - A_nb[k][l]++; - } - i++; - } - i++; - l++; - } - } - k++; - } - - string Newfile = outputDir + "_BAF" + ".txt"; - ofstream myfile; - std::vector < std::vector > BAF = vector < vector > (numberofObjects); - - int j = 0; - for ( it=sampleorcontrol.chrCopyNumber_.begin() ; it != sampleorcontrol.chrCopyNumber_.end(); it++ ) - { - for (int i = 0; i < chr[j].size(); i++) - { - BAF[j].push_back(((float)B_nb[j][i]) / ((float)A_nb[j][i] + (float)B_nb[j][i])); - } - j++; - } - - j = 0; - myfile.open(Newfile.c_str()); - for ( it=sampleorcontrol.chrCopyNumber_.begin() ; it != sampleorcontrol.chrCopyNumber_.end(); it++ ) - { - for (int i = 0; i < chr[j].size(); i++) - { - myfile << chr[j][i] << "\t" << snp_pos_pileup[j][i] << "\t" << AB_nb[j][i] << "\t" << B_nb[j][i] << "\t" << A_nb[j][i] << "\t" << BAF[j][i]<<"\n"; - } - j++; - } - myfile.close(); - return BAF; -} -*/ + cout << "..If you have got an error at this step and a mini-pileup file is empty, check that you are using samtools v1.1 or later and provide a corresponding path in your config file"< >BAFpileup::computeBAF(GenomeCopyNumber & sampleorcontrol, std::string minipileup, std::string outputDir, std::string filename) +{ + + int numberofObjects = 0; + + vector::iterator it; + for ( it=sampleorcontrol.chrCopyNumber_.begin() ; it != sampleorcontrol.chrCopyNumber_.end(); it++ ) + { + numberofObjects++; + } + + string line; + ifstream file (minipileup.c_str()); + int nb_snp = 0; + while (std::getline(file,line)) + { + nb_snp++; + } + + file.clear(); + file.seekg(0); + + chr = vector < vector >(numberofObjects); + snp_pos_pileup = vector < vector >(numberofObjects); + A_nb = vector < vector >(numberofObjects); + B_nb = vector < vector >(numberofObjects); + AB_nb = vector < vector >(numberofObjects); + + + int k = 0; + for ( it=sampleorcontrol.chrCopyNumber_.begin() ; it != sampleorcontrol.chrCopyNumber_.end(); it++ ) + { + file.clear(); + file.seekg(0); + int i = 0; + int l = 0; + while (std::getline(file,line) && (k < numberofObjects)) + { + i = 0; + string chrtmp; + while (line[i] != '\t') + { + chrtmp += line[i]; + i++; + } + i++; + if (chrtmp == "chr" + it->getChromosome()) + { + + chr[k].push_back(chrtmp); + string snp_pos_pileuptmp; + while (line[i] != '\t') + { + snp_pos_pileuptmp += line[i]; + i++; + } + snp_pos_pileup[k].push_back(snp_pos_pileuptmp); + i++; + + int j = 0; + while ((snp_pos_pileup[k][l] != snp_pos[j]) && (j < nb_snp)) + { + j++; + } + + while (line[i] != '\t') + { + i++; + } + i++; + string AB_nbtmp; + while (line[i] != '\t') + { + AB_nbtmp += line[i]; + i++; + } + AB_nb[k].push_back(AB_nbtmp); + i++; + char* alternate_base = const_cast(alt_base[j].c_str()); + char alternate_basetmp = *alternate_base; + char* reference_base = const_cast(ref_base[j].c_str()); + char reference_basetmp = *reference_base; + B_nb[k].push_back(0); + A_nb[k].push_back(0); + while (line[i] != '\t') + { + if ((char)line[i] == reference_basetmp || (char)line[i] == (reference_basetmp+32) || ((char)line[i] == (reference_basetmp-32))) + { + B_nb[k][l]++; + } + if ((char)line[i] == alternate_basetmp || (char)line[i] == (alternate_basetmp+32) || ((char)line[i] == (alternate_basetmp-32))) + { + A_nb[k][l]++; + } + i++; + } + i++; + l++; + } + } + k++; + } + + string Newfile = outputDir + "_BAF" + ".txt"; + ofstream myfile; + std::vector < std::vector > BAF = vector < vector > (numberofObjects); + + int j = 0; + for ( it=sampleorcontrol.chrCopyNumber_.begin() ; it != sampleorcontrol.chrCopyNumber_.end(); it++ ) + { + for (int i = 0; i < chr[j].size(); i++) + { + BAF[j].push_back(((float)B_nb[j][i]) / ((float)A_nb[j][i] + (float)B_nb[j][i])); + } + j++; + } + + j = 0; + myfile.open(Newfile.c_str()); + for ( it=sampleorcontrol.chrCopyNumber_.begin() ; it != sampleorcontrol.chrCopyNumber_.end(); it++ ) + { + for (int i = 0; i < chr[j].size(); i++) + { + myfile << chr[j][i] << "\t" << snp_pos_pileup[j][i] << "\t" << AB_nb[j][i] << "\t" << B_nb[j][i] << "\t" << A_nb[j][i] << "\t" << BAF[j][i]<<"\n"; + } + j++; + } + myfile.close(); + return BAF; +} +*/ diff --git a/src/BAFpileup.h b/src/BAFpileup.h index 6b7df64..afb6e1f 100644 --- a/src/BAFpileup.h +++ b/src/BAFpileup.h @@ -1,14 +1,14 @@ #pragma once -#ifndef BAFPILEUP_H -#define BAFPILEUP_H - -#include +#ifndef BAFPILEUP_H +#define BAFPILEUP_H + +#include #include #include "GenomeCopyNumber.h" - -class BAFpileup -{ - public: + +class BAFpileup +{ + public: BAFpileup(); void makepileup(GenomeCopyNumber & sampleCopyNumber, GenomeCopyNumber & controlCopyNumber, std::string sample_MateFile,std::string control_MateFile, @@ -35,7 +35,7 @@ class BAFpileup std::vector < std::vector > A_nb; std::vector < std::vector > B_nb; std::vector < std::vector > AB_nb; - + private: int length_; @@ -50,7 +50,7 @@ class BAFpileup std::vector length_with_flanksTmp; std::vector strand; std::vector ref_name; - std::string pathToBedtools_; -}; - -#endif // BAFPILEUP_H + std::string pathToBedtools_; +}; + +#endif // BAFPILEUP_H diff --git a/src/GenomeCopyNumber.cpp b/src/GenomeCopyNumber.cpp index db70e65..ad6d363 100644 --- a/src/GenomeCopyNumber.cpp +++ b/src/GenomeCopyNumber.cpp @@ -1062,7 +1062,7 @@ int GenomeCopyNumber::getNumberOfChromosomes() { return chrCopyNumber_.size(); } -long double GenomeCopyNumber::calculateRSS(int ploidy) +long double GenomeCopyNumber::calculateRSS(int ploidy) { string::size_type pos = 0; vector observedvalues; @@ -1104,7 +1104,7 @@ long double GenomeCopyNumber::calculateRSS(int ploidy) RSS = RSS + (long double)pow(diff,2); } } - return RSS; + return RSS; } diff --git a/src/GenomeCopyNumber.h b/src/GenomeCopyNumber.h index ccc3a37..f8f213b 100644 --- a/src/GenomeCopyNumber.h +++ b/src/GenomeCopyNumber.h @@ -123,7 +123,7 @@ class GenomeCopyNumber void setWESanalysis(bool WESgiven); void setmakingPileup(bool makingPileup_given); double Percentage_GenomeExplained(int &); - long double calculateRSS(int ploidy); + long double calculateRSS(int ploidy); private: diff --git a/src/RSSerror.cpp b/src/RSSerror.cpp index e675976..c893b3a 100644 --- a/src/RSSerror.cpp +++ b/src/RSSerror.cpp @@ -1,12 +1,12 @@ -#include "RSSerror.h" +#include "RSSerror.h" using namespace std; - -RSSerror::RSSerror() + +RSSerror::RSSerror() { -} +} -long double calculateRSS(GenomeCopyNumber & samplecopynumber, int ploidy) +long double calculateRSS(GenomeCopyNumber & samplecopynumber, int ploidy) { string::size_type pos = 0; vector observedvalues; @@ -48,5 +48,5 @@ long double calculateRSS(GenomeCopyNumber & samplecopynumber, int ploidy) RSS = RSS + (long double)pow(diff,2); } } - return RSS; + return RSS; } diff --git a/src/RSSerror.h b/src/RSSerror.h index 70b450d..407d743 100644 --- a/src/RSSerror.h +++ b/src/RSSerror.h @@ -1,7 +1,7 @@ #pragma once -#ifndef RSSERROR_H -#define RSSERROR_H - +#ifndef RSSERROR_H +#define RSSERROR_H + #include #include #include @@ -13,13 +13,13 @@ #include "GenomeCopyNumber.h" #include "ChrCopyNumber.h" - -class RSSerror -{ - public: - RSSerror(); + +class RSSerror +{ + public: + RSSerror(); }; -long double calculateRSS(GenomeCopyNumber & samplecopynumber, int ploidy); - -#endif // RSSERROR_H +long double calculateRSS(GenomeCopyNumber & samplecopynumber, int ploidy); + +#endif // RSSERROR_H diff --git a/src/SNPatChr.h b/src/SNPatChr.h index d346bef..8142df5 100644 --- a/src/SNPatChr.h +++ b/src/SNPatChr.h @@ -1,15 +1,15 @@ -#ifndef SNPATCHR_H -#define SNPATCHR_H +#ifndef SNPATCHR_H +#define SNPATCHR_H #include -#include +#include #include "SNPposition.h" - -class SNPatChr -{ - public: + +class SNPatChr +{ + public: SNPatChr(const std::string&); SNPatChr(); void setChromosome(const std::string& chromosome); @@ -25,11 +25,11 @@ class SNPatChr void setStatusAt(int index,float value) ; int getSize(); - virtual ~SNPatChr(); - protected: + virtual ~SNPatChr(); + protected: private: std::vector SNPpositionArray_; - std::string chromosome_; -}; - -#endif // SNPATCHR_H + std::string chromosome_; +}; + +#endif // SNPATCHR_H diff --git a/src/SNPinGenome.cpp b/src/SNPinGenome.cpp index 6afc706..1ca824c 100644 --- a/src/SNPinGenome.cpp +++ b/src/SNPinGenome.cpp @@ -269,7 +269,7 @@ long SNPinGenome::processPileUPLine(int & positionCount, char* line, string & ol processChrName(chr); int lindex = p_genomeCopyNumber->findIndex(chr); if (lindex != NA) { - if (valueToReturn = strccnt(strs[4], '^')) { + if ((valueToReturn = strccnt(strs[4], '^'))) { ChrCopyNumber& chrCopyNumber = p_genomeCopyNumber->getChrCopyNumberAt(lindex); int step = p_genomeCopyNumber->getStep(); for (int i=0; ifindIndex(chr); if (lindex != NA) { - if (valueToReturn = strccnt(strs[4], '^')) { + if ((valueToReturn = strccnt(strs[4], '^'))) { ChrCopyNumber& chrCopyNumber = p_genomeCopyNumber->getChrCopyNumberAt(lindex); int l = 0; bool leftIsInTheWindow = false; @@ -312,7 +312,7 @@ long SNPinGenome::processPileUPLine(int & positionCount, char* line, string & ol if (currentPosition==sNPpositionToProceed) { try { - float localBAF=addInfoFromAPileUp(atoi(strs[3]),minimalTotalLetterCountPerPosition,(*SNP_atChr_)[index].getNucleotideAt(positionCount), + addInfoFromAPileUp(atoi(strs[3]),minimalTotalLetterCountPerPosition,(*SNP_atChr_)[index].getNucleotideAt(positionCount), index,positionCount,sNPpositionToProceed,strs[4], minimalQualityPerPosition,strs[5]); // if (localBAF>=0) { // heterozygousBAFs.push_back(localBAF); @@ -333,8 +333,8 @@ long SNPinGenome::processPileUPLine(int & positionCount, char* line, string & ol sNPpositionToProceed=NA; } if (currentPosition==sNPpositionToProceed) { - try { - float localBAF=addInfoFromAPileUp(atoi(strs[3]),minimalTotalLetterCountPerPosition,(*SNP_atChr_)[index].getNucleotideAt(positionCount), + try { + addInfoFromAPileUp(atoi(strs[3]),minimalTotalLetterCountPerPosition,(*SNP_atChr_)[index].getNucleotideAt(positionCount), index,positionCount,sNPpositionToProceed,strs[4], minimalQualityPerPosition,strs[5]); // if (localBAF>=0) { // heterozygousBAFs.push_back(localBAF); diff --git a/src/SNPposition.cpp b/src/SNPposition.cpp index 9645e15..96246dd 100644 --- a/src/SNPposition.cpp +++ b/src/SNPposition.cpp @@ -28,7 +28,7 @@ SNPposition::SNPposition(int position, char* alt) //for a VCF line nucleotide_ = alt[0]; } else { char* strs[4]; - unsigned strs_cnt = split(alt, ',', strs); + split(alt, ',', strs); nucleotide_ = strs[0][0]; } freq_ = 0; // EV: must be initialized @@ -45,7 +45,7 @@ SNPposition::SNPposition(int position, char* letters, const char* strand, const if (strlen(letters) == 1) { //should not get here nucleotide_ = letters[0]; } else { - unsigned strs_cnt = split(letters, '/', strs); + split(letters, '/', strs); char c_ref; if (reverse) { c_ref = complement(ref[0]); @@ -66,11 +66,11 @@ SNPposition::SNPposition(int position, char* letters, const char* strand, const bin_=NA; //before initialization } - -SNPposition::~SNPposition() -{ - //dtor -} + +SNPposition::~SNPposition() +{ + //dtor +} int SNPposition::getPosition() { return position_; @@ -78,7 +78,7 @@ int SNPposition::getPosition() { char SNPposition::getNucleotide() { return nucleotide_; -} +} void SNPposition::setFrequency(float freq) { freq_ = freq; @@ -87,7 +87,7 @@ void SNPposition::setFrequency(float freq) { void SNPposition::setStatus(float status) { status_ = status; } - + float SNPposition::getValue() { return freq_ ; } diff --git a/src/SNPposition.h b/src/SNPposition.h index 2e959fb..631a827 100644 --- a/src/SNPposition.h +++ b/src/SNPposition.h @@ -1,14 +1,14 @@ -#ifndef SNPPOSITION_H -#define SNPPOSITION_H - +#ifndef SNPPOSITION_H +#define SNPPOSITION_H + #include #include #include "myFunc.h" - -class SNPposition -{ - public: + +class SNPposition +{ + public: SNPposition(int position, char* letters, const char* strand, const char* ref); //from TXT SNPposition(int position, char* alt); //for VCF virtual ~SNPposition(); @@ -19,14 +19,14 @@ class SNPposition void setFrequency(float freq); void setStatus(float status); void setBin(int i); - int getBin(); - protected: + int getBin(); + protected: private: unsigned int position_; char nucleotide_; float freq_; float status_; //0 - AA/BB, 0.5 - AB int bin_; //bin in the copy number array -}; - -#endif // SNPPOSITION_H +}; + +#endif // SNPPOSITION_H diff --git a/src/SeekSubclones.cpp b/src/SeekSubclones.cpp index a44e9b6..af3af6b 100644 --- a/src/SeekSubclones.cpp +++ b/src/SeekSubclones.cpp @@ -19,11 +19,11 @@ A copy of the GNU General Public License is available at *************************************************************************/ -#include "SeekSubclones.h" +#include "SeekSubclones.h" using namespace std; -SeekSubclones::SeekSubclones(void) +SeekSubclones::SeekSubclones(void) { } @@ -31,12 +31,12 @@ SeekSubclones::~SeekSubclones(void) { } - + SeekSubclones::SeekSubclones(GenomeCopyNumber & samplecopynumber, int ploidy, std::string myName, float minimal_pop) { ploidy_ = ploidy; - minimal_pop_ = minimal_pop/100; - getSegmentsInfo(samplecopynumber, myName); -} + minimal_pop_ = minimal_pop/100; + getSegmentsInfo(samplecopynumber, myName); +} void SeekSubclones::getSegmentsInfo(GenomeCopyNumber & samplecopynumber, std::string myName) @@ -191,7 +191,7 @@ bool SeekSubclones::PercentageTest(std::vector & data, float& threshold) subclone = true; } return subclone; -} +} void SeekSubclones::EstimateSubclonalPopulation(vector data, float threshold, int ploidy_) diff --git a/src/SeekSubclones.h b/src/SeekSubclones.h index f60024e..f23e86e 100644 --- a/src/SeekSubclones.h +++ b/src/SeekSubclones.h @@ -1,6 +1,6 @@ #pragma once -#ifndef SEEKSUBCLONES_H -#define SEEKSUBCLONES_H +#ifndef SEEKSUBCLONES_H +#define SEEKSUBCLONES_H #include #include @@ -13,23 +13,23 @@ #include #include #include - -class SeekSubclones -{ - public: + +class SeekSubclones +{ + public: SeekSubclones(GenomeCopyNumber & samplecopynumber, int ploidy, std::string myName, float minimal_pop); SeekSubclones(); ~SeekSubclones(void); void getSegmentsInfo(GenomeCopyNumber & samplecopynumber, std::string myName); bool SignTest(std::vector & data, float& threshold, int bonfer_correction); - void EstimateSubclonalPopulation(std::vector data, float threshold, int ploidy_); - bool PercentageTest(std::vector & data, float& threshold); + void EstimateSubclonalPopulation(std::vector data, float threshold, int ploidy_); + bool PercentageTest(std::vector & data, float& threshold); protected: int ploidy_; float minimal_pop_; std::vector copynumber_; - std::vector population_; - private: + std::vector population_; + private: }; - -#endif // SEEKSUBCLONES_H + +#endif // SEEKSUBCLONES_H diff --git a/src/ThreadPool.h b/src/ThreadPool.h index fa910ce..ea0f01d 100644 --- a/src/ThreadPool.h +++ b/src/ThreadPool.h @@ -19,12 +19,12 @@ #include #include //starting from v6.6 for compatibility with Ubuntu -#ifdef _WIN32 +#ifdef _WIN32 //x32 Windows definitions -#include -#else +#include +#else //other platforms -#include +#include #endif class ThreadPool; @@ -101,10 +101,10 @@ class Thread { bool waitFinished() { - #ifdef _WIN32 + #ifdef _WIN32 //x32 Windows definitions Sleep(WAIT_USECONDS); -#else +#else //other platforms usleep(WAIT_USECONDS); #endif diff --git a/src/binomialdistr.h b/src/binomialdistr.h index 55a0c00..aa1932b 100644 --- a/src/binomialdistr.h +++ b/src/binomialdistr.h @@ -1,152 +1,152 @@ #ifndef HEADER_362C1E424D7DDEE3 #define HEADER_362C1E424D7DDEE3 -/************************************************************************* -Cephes Math Library Release 2.8: June, 2000 -Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier - -Contributors: - * Sergey Bochkanov (ALGLIB project). Translation from C to - pseudocode. - -See subroutines comments for additional copyrights. - ->>> SOURCE LICENSE >>> -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation (www.fsf.org); either version 2 of the -License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -A copy of the GNU General Public License is available at -http://www.fsf.org/licensing/licenses - ->>> END OF LICENSE >>> -*************************************************************************/ - -#ifndef _binomialdistr_h -#define _binomialdistr_h - -#include "ap.h" -#include "ialglib.h" - -#include "gammafunc.h" -#include "normaldistr.h" +/************************************************************************* +Cephes Math Library Release 2.8: June, 2000 +Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier + +Contributors: + * Sergey Bochkanov (ALGLIB project). Translation from C to + pseudocode. + +See subroutines comments for additional copyrights. + +>>> SOURCE LICENSE >>> +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation (www.fsf.org); either version 2 of the +License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A copy of the GNU General Public License is available at +http://www.fsf.org/licensing/licenses + +>>> END OF LICENSE >>> +*************************************************************************/ + +#ifndef _binomialdistr_h +#define _binomialdistr_h + +#include "ap.h" +#include "ialglib.h" + +#include "gammafunc.h" +#include "normaldistr.h" #include "ibetaf.h" #include -#include +#include #include "myFunc.h" - - -//#include "nearunityunit.h" - - -/************************************************************************* -Binomial distribution - -Returns the sum of the terms 0 through k of the Binomial -probability density: - - k - -- ( n ) j n-j - > ( ) p (1-p) - -- ( j ) - j=0 - -The terms are not summed directly; instead the incomplete -beta integral is employed, according to the formula - -y = bdtr( k, n, p ) = incbet( n-k, k+1, 1-p ). - -The arguments must be positive, with p ranging from 0 to 1. - -ACCURACY: - -Tested at random points (a,b,p), with p between 0 and 1. - - a,b Relative error: -arithmetic domain # trials peak rms - For p between 0.001 and 1: - IEEE 0,100 100000 4.3e-15 2.6e-16 - -Cephes Math Library Release 2.8: June, 2000 -Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier -*************************************************************************/ -double binomialdistribution(int k, int n, double p); - - -/************************************************************************* -Complemented binomial distribution - -Returns the sum of the terms k+1 through n of the Binomial -probability density: - - n - -- ( n ) j n-j - > ( ) p (1-p) - -- ( j ) - j=k+1 - -The terms are not summed directly; instead the incomplete -beta integral is employed, according to the formula - -y = bdtrc( k, n, p ) = incbet( k+1, n-k, p ). - -The arguments must be positive, with p ranging from 0 to 1. - -ACCURACY: - -Tested at random points (a,b,p). - - a,b Relative error: -arithmetic domain # trials peak rms - For p between 0.001 and 1: - IEEE 0,100 100000 6.7e-15 8.2e-16 - For p between 0 and .001: - IEEE 0,100 100000 1.5e-13 2.7e-15 - -Cephes Math Library Release 2.8: June, 2000 -Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier -*************************************************************************/ -double binomialcdistribution(int k, int n, double p); - - -/************************************************************************* -Inverse binomial distribution - -Finds the event probability p such that the sum of the -terms 0 through k of the Binomial probability density -is equal to the given cumulative probability y. - -This is accomplished using the inverse beta integral -function and the relation - -1 - p = incbi( n-k, k+1, y ). - -ACCURACY: - -Tested at random points (a,b,p). - - a,b Relative error: -arithmetic domain # trials peak rms - For p between 0.001 and 1: - IEEE 0,100 100000 2.3e-14 6.4e-16 - IEEE 0,10000 100000 6.6e-12 1.2e-13 - For p between 10^-6 and 0.001: - IEEE 0,100 100000 2.0e-12 1.3e-14 - IEEE 0,10000 100000 1.5e-12 3.2e-14 - -Cephes Math Library Release 2.8: June, 2000 -Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier -*************************************************************************/ -double invbinomialdistribution(int k, int n, double y); - - -#endif - + + +//#include "nearunityunit.h" + + +/************************************************************************* +Binomial distribution + +Returns the sum of the terms 0 through k of the Binomial +probability density: + + k + -- ( n ) j n-j + > ( ) p (1-p) + -- ( j ) + j=0 + +The terms are not summed directly; instead the incomplete +beta integral is employed, according to the formula + +y = bdtr( k, n, p ) = incbet( n-k, k+1, 1-p ). + +The arguments must be positive, with p ranging from 0 to 1. + +ACCURACY: + +Tested at random points (a,b,p), with p between 0 and 1. + + a,b Relative error: +arithmetic domain # trials peak rms + For p between 0.001 and 1: + IEEE 0,100 100000 4.3e-15 2.6e-16 + +Cephes Math Library Release 2.8: June, 2000 +Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier +*************************************************************************/ +double binomialdistribution(int k, int n, double p); + + +/************************************************************************* +Complemented binomial distribution + +Returns the sum of the terms k+1 through n of the Binomial +probability density: + + n + -- ( n ) j n-j + > ( ) p (1-p) + -- ( j ) + j=k+1 + +The terms are not summed directly; instead the incomplete +beta integral is employed, according to the formula + +y = bdtrc( k, n, p ) = incbet( k+1, n-k, p ). + +The arguments must be positive, with p ranging from 0 to 1. + +ACCURACY: + +Tested at random points (a,b,p). + + a,b Relative error: +arithmetic domain # trials peak rms + For p between 0.001 and 1: + IEEE 0,100 100000 6.7e-15 8.2e-16 + For p between 0 and .001: + IEEE 0,100 100000 1.5e-13 2.7e-15 + +Cephes Math Library Release 2.8: June, 2000 +Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier +*************************************************************************/ +double binomialcdistribution(int k, int n, double p); + + +/************************************************************************* +Inverse binomial distribution + +Finds the event probability p such that the sum of the +terms 0 through k of the Binomial probability density +is equal to the given cumulative probability y. + +This is accomplished using the inverse beta integral +function and the relation + +1 - p = incbi( n-k, k+1, y ). + +ACCURACY: + +Tested at random points (a,b,p). + + a,b Relative error: +arithmetic domain # trials peak rms + For p between 0.001 and 1: + IEEE 0,100 100000 2.3e-14 6.4e-16 + IEEE 0,10000 100000 6.6e-12 1.2e-13 + For p between 10^-6 and 0.001: + IEEE 0,100 100000 2.0e-12 1.3e-14 + IEEE 0,10000 100000 1.5e-12 3.2e-14 + +Cephes Math Library Release 2.8: June, 2000 +Copyright 1984, 1987, 1995, 2000 by Stephen L. Moshier +*************************************************************************/ +double invbinomialdistribution(int k, int n, double y); + + +#endif + #endif // header guard diff --git a/src/freec b/src/freec old mode 100644 new mode 100755 index 157ef02..538bfb7 Binary files a/src/freec and b/src/freec differ diff --git a/src/ialglib.cpp b/src/ialglib.cpp index 206e862..999578c 100644 --- a/src/ialglib.cpp +++ b/src/ialglib.cpp @@ -9,8 +9,6 @@ optimized ALGLIB subroutines. static const int alglib_simd_alignment = 16; static const int alglib_r_block = 32; static const int alglib_c_block = 24; -static const int alglib_half_r_block = alglib_r_block/2; -static const int alglib_half_c_block = alglib_c_block/2; static const int alglib_twice_r_block = alglib_r_block*2; static const int alglib_twice_c_block = alglib_c_block*2; //#define ABLAS_PREFETCH(x) _mm_prefetch((const char*)(x),_MM_HINT_T0) @@ -402,7 +400,7 @@ This subroutine copies unaligned complex vector 1. strideb is stride measured in complex numbers, not doubles 2. conj may be "N" (no conj.) or "C" (conj.) ********************************************************************/ -void ialglib::vcopy_complex(int n, const ap::complex *a, int stridea, double *b, int strideb, char *conj) +void ialglib::vcopy_complex(int n, const ap::complex *a, int stridea, double *b, int strideb, const char *conj) { int i; @@ -434,7 +432,7 @@ This subroutine copies unaligned complex vector (passed as double*) 1. strideb is stride measured in complex numbers, not doubles 2. conj may be "N" (no conj.) or "C" (conj.) ********************************************************************/ -void ialglib::vcopy_complex(int n, const double *a, int stridea, double *b, int strideb, char *conj) +void ialglib::vcopy_complex(int n, const double *a, int stridea, double *b, int strideb, const char *conj) { int i; diff --git a/src/ialglib.h b/src/ialglib.h index 86d31bb..6317bd2 100644 --- a/src/ialglib.h +++ b/src/ialglib.h @@ -19,8 +19,8 @@ void mv_complex_generic(int m, int n, const double *a, const double *x, ap::comp void vzero(int n, double *p, int stride); void vzero_complex(int n, ap::complex *p, int stride); void vcopy(int n, const double *a, int stridea, double *b, int strideb); -void vcopy_complex(int n, const ap::complex *a, int stridea, double *b, int strideb, char *conj); -void vcopy_complex(int n, const double *a, int stridea, double *b, int strideb, char *conj); +void vcopy_complex(int n, const ap::complex *a, int stridea, double *b, int strideb, const char *conj); +void vcopy_complex(int n, const double *a, int stridea, double *b, int strideb, const char *conj); void mcopyblock(int m, int n, const double *a, int op, int stride, double *b); void mcopyunblock(int m, int n, const double *a, int op, double *b, int stride); void mcopyblock_complex(int m, int n, const ap::complex *a, int op, int stride, double *b); diff --git a/src/myFunc.cpp b/src/myFunc.cpp index 8244748..53d8d0f 100644 --- a/src/myFunc.cpp +++ b/src/myFunc.cpp @@ -51,7 +51,7 @@ unsigned int split(char* str_ori, char delim, char* elems[]) unsigned int jj = 0; unsigned int elem_cnt = 0; char c; - for (; c = *str++; ++jj) { + for (; (c = *str++); ++jj) { if (c == delim) { str_ori[jj] = 0; elems[elem_cnt++] = &str_ori[last_jj]; @@ -367,7 +367,7 @@ long getReadNumberFromPileup(std::string const& fileName) { strs = split(line, '\t'); if (strs.size() > 4) { int toadd; - if (toadd=strccnt(strs[4].c_str(), '^')) { + if ((toadd = strccnt(strs[4].c_str(), '^'))) { count += toadd; } } @@ -390,7 +390,7 @@ long getReadNumberFromPileup(std::string const& fileName) { strs = split(line, '\t'); if (strs.size() > 4) { int toadd; - if (toadd=strccnt(strs[4].c_str(), '^')) { + if ((toadd = strccnt(strs[4].c_str(), '^'))) { count += toadd; } }