diff --git a/.pydevproject b/.pydevproject
new file mode 100644
index 0000000..40e9f40
--- /dev/null
+++ b/.pydevproject
@@ -0,0 +1,5 @@
+
+
+Default
+python 2.7
+
diff --git a/PL_to_GL_reorder.py b/PL_to_GL_reorder.py
new file mode 100755
index 0000000..6e53133
--- /dev/null
+++ b/PL_to_GL_reorder.py
@@ -0,0 +1,77 @@
+#!/bin/env python3
+
+import argparse
+from Bio.bgzf import BgzfWriter
+import gzip
+import sys
+
+def main():
+
+ args = parse_args()
+ with gzip.open(args.out, 'wt') as fd_out:
+ for i_vcf, vcf in enumerate(args.vcf):
+ with gzip.open(vcf, 'rt') as fd_vcf:
+ ## loop metadata lines and header line
+ for line in fd_vcf:
+ ## end of metadata lines
+ if line[0:2] != '##':
+ ## print header line
+ print(line, end='', file=fd_out)
+ ## break loop at the header line
+ break
+ if i_vcf == 0:
+ ## print metadata line
+ print(line, end='', file=fd_out)
+ ## loop records
+ for line in fd_vcf:
+ l = line.rstrip().split('\t')
+ field = l[8].split(':')
+ ## index PL
+ i_PL = l[8].split(':').index('PL')
+ field.append('GL')
+ GT = field.pop(0)
+ field.sort()
+ field.insert(0, GT)
+ l[8] = ':'.join(field)
+ ## index GL
+ i_GL = l[8].split(':').index('GL')
+ s = '\t'.join(l[:9])
+ print(s, sep='\t', file=fd_out, end='')
+ for i_GT in range(9, len(l)):
+ lGT = l[i_GT]
+ info = l[i_GT].split(':')
+ if lGT == './.:0,0':
+ toSplit = './.:0,0:0'
+ info = toSplit.split(':')
+ if info[0] == './.':
+ s_GL = '.,.,.'
+ else:
+ s_PL = l[i_GT].split(':')[i_PL]
+ if s_PL == '.':
+ s_GL = '.,.,.'
+ else:
+ s_GL = ','.join(
+ str(-float(PL)/10) for PL in s_PL.split(','))
+ if s_GL==".":
+ print(line)
+ exit()
+ info.insert(i_GL, s_GL)
+ l[i_GT] = info
+ print('\t' +
+ ':'.join(l[i_GT]), sep='\t', end='',
+ file=fd_out)
+ print(end='\n', file=fd_out)
+
+ return
+
+
+def parse_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--vcf', nargs = '+')
+ parser.add_argument('--out', required=True)
+ args = parser.parse_args()
+
+ return args
+
+if __name__ == '__main__':
+ main()
diff --git a/prepareGenFromBeagle4_modified20160601/bin/prepareGenFromBeagle4 b/prepareGenFromBeagle4_modified20160601/bin/prepareGenFromBeagle4
new file mode 100755
index 0000000..6ec9ef0
Binary files /dev/null and b/prepareGenFromBeagle4_modified20160601/bin/prepareGenFromBeagle4 differ
diff --git a/prepareGenFromBeagle4_modified20160601/makefile b/prepareGenFromBeagle4_modified20160601/makefile
new file mode 100644
index 0000000..5cf3579
--- /dev/null
+++ b/prepareGenFromBeagle4_modified20160601/makefile
@@ -0,0 +1,90 @@
+#compiler
+CXX=g++
+
+#compiler flags
+COPTI= -O3
+CDEBG= -g
+CWARN= -Wall -Wextra -Wno-sign-compare
+CBAMH= -D_LIBBAM
+CSHV1=
+
+#linker flags
+LOPTI= -O3
+LDEBG= -g
+LSTDD= -lm -lpthread -lboost_iostreams -lboost_program_options
+LSTDS= -Wl,-Bstatic -lboost_iostreams -lz -lbz2 -Wl,-Bdynamic -lm -lpthread
+LCL3S= -Wl,-Bstatic -lz -lbz2 -Wl,-Bdynamic -lm -lpthread
+LBAMD= -lbam -lz
+
+#executable file
+EFILE= bin/prepareGenFromBeagle4
+
+#header files
+HFILE= $(shell find src -name *.h)
+
+#source files
+CFILE= $(shell find src -name *.cpp)
+
+#source path
+VPATH= $(shell for file in `find src -name *.cpp`; do echo $$(dirname $$file); done)
+
+#include path
+ISTDP= -Isrc
+IBAMP= -Ilib
+#ICL3P= -I/apps/software/Boost/1.57.0-goolf-1.7.20-Python-2.7.9/include
+#ICL3P= -I$EBROOTBOOST/include
+ICL3P= -I/apps/software/Boost/1.60.0-foss-2015b-Python-2.7.9/include/
+
+#library path
+LSTDP= -Llib
+
+#object files
+OFILE= $(shell for file in `find src -name *.cpp`; do echo obj/$$(basename $$file .cpp).o; done)
+OBOST=
+
+#default
+all: cluster
+
+#dynamic release
+dynamic: CFLAG=$(COPTI) $(CWARN) $(CSHV1)
+dynamic: LFLAG=$(LOPTI) $(LSTDD)
+dynamic: IFLAG=$(ISTDP)
+dynamic: $(EFILE)
+
+#debug release
+debug: CFLAG=$(CDEBG) $(CWARN) $(CSHV1)
+debug: LFLAG=$(LDEBG) $(LSTDD)
+debug: IFLAG=$(ISTDP)
+debug: $(EFILE)
+
+#static release
+static: CFLAG=$(COPTI) $(CWARN) $(CSHV1)
+static: LFLAG=$(LOPTI) $(LSTDS)
+static: IFLAG=$(ISTDP)
+static: $(EFILE)
+
+#cluster release
+cluster: CFLAG=$(COPTI) $(CWARN) $(CSHV1)
+cluster: LFLAG=$(LOPTI) $(LCL3S)
+cluster: IFLAG=$(ISTDP) $(ICL3P)
+#cluster: OBOST=/apps/software/Boost/1.57.0-goolf-1.7.20-Python-2.7.9/lib/libboost_iostreams.a /apps/software/Boost/1.57.0-goolf-1.7.20-Python-2.7.9/lib/libboost_program_options.a
+OBOST=-lz /apps/software/Boost/1.60.0-foss-2015b-Python-2.7.9/lib/libboost_iostreams.a
+cluster: $(EFILE)
+
+$(EFILE): $(OFILE)
+ $(CXX) $^ $(OBOST) -o $@ $(LFLAG)
+
+obj/%.o: %.cpp $(HFILE)
+ $(CXX) -o $@ -c $< $(CFLAG) $(IFLAG)
+
+clean:
+ rm -f obj/*.o $(EFILE)
+
+test:
+ cp $(EFILE) ~/$(EFILE).v10
+
+oxford:
+ cp $(EFILE) ~/bin/.
+
+install:
+ cp $(EFILE) ./tmp/
diff --git a/prepareGenFromBeagle4_modified20160601/makefile.bak b/prepareGenFromBeagle4_modified20160601/makefile.bak
new file mode 100644
index 0000000..3182d38
--- /dev/null
+++ b/prepareGenFromBeagle4_modified20160601/makefile.bak
@@ -0,0 +1,87 @@
+#compiler
+CXX=g++
+
+#compiler flags
+COPTI= -O3
+CDEBG= -g
+CWARN= -Wall -Wextra -Wno-sign-compare
+CBAMH= -D_LIBBAM
+CSHV1=
+
+#linker flags
+LOPTI= -O3
+LDEBG= -g
+LSTDD= -lm -lpthread -lboost_iostreams -lboost_program_options
+LSTDS= -Wl,-Bstatic -lboost_iostreams -lz -lbz2 -Wl,-Bdynamic -lm -lpthread
+LCL3S= -Wl,-Bstatic -lz -lbz2 -Wl,-Bdynamic -lm -lpthread
+LBAMD= -lbam -lz
+
+#executable file
+EFILE= bin/prepareGenFromBeagle4
+
+#header files
+HFILE= $(shell find src -name *.h)
+
+#source files
+CFILE= $(shell find src -name *.cpp)
+
+#source path
+VPATH= $(shell for file in `find src -name *.cpp`; do echo $$(dirname $$file); done)
+
+#include path
+ISTDP= -Isrc
+IBAMP= -Ilib
+ICL3P= -I/users/delaneau/BOOST/include
+
+#library path
+LSTDP= -Llib
+
+#object files
+OFILE= $(shell for file in `find src -name *.cpp`; do echo obj/$$(basename $$file .cpp).o; done)
+OBOST=
+
+#default
+all: dynamic
+
+#dynamic release
+dynamic: CFLAG=$(COPTI) $(CWARN) $(CSHV1)
+dynamic: LFLAG=$(LOPTI) $(LSTDD)
+dynamic: IFLAG=$(ISTDP)
+dynamic: $(EFILE)
+
+#debug release
+debug: CFLAG=$(CDEBG) $(CWARN) $(CSHV1)
+debug: LFLAG=$(LDEBG) $(LSTDD)
+debug: IFLAG=$(ISTDP)
+debug: $(EFILE)
+
+#static release
+static: CFLAG=$(COPTI) $(CWARN) $(CSHV1)
+static: LFLAG=$(LOPTI) $(LSTDS)
+static: IFLAG=$(ISTDP)
+static: $(EFILE)
+
+#cluster release
+cluster: CFLAG=$(COPTI) $(CWARN) $(CSHV1)
+cluster: LFLAG=$(LOPTI) $(LCL3S)
+cluster: IFLAG=$(ISTDP) $(ICL3P)
+cluster: OBOST=~/BOOST/lib/libboost_iostreams.a ~/BOOST/lib/libboost_program_options.a
+cluster: $(EFILE)
+
+$(EFILE): $(OFILE)
+ $(CXX) $^ $(OBOST) -o $@ $(LFLAG)
+
+obj/%.o: %.cpp $(HFILE)
+ $(CXX) -o $@ -c $< $(CFLAG) $(IFLAG)
+
+clean:
+ rm -f obj/*.o $(EFILE)
+
+test:
+ cp $(EFILE) ~/$(EFILE).v10
+
+oxford:
+ cp $(EFILE) ~/bin/.
+
+install:
+ cp $(EFILE) /usr/local/bin/.
diff --git a/prepareGenFromBeagle4_modified20160601/obj/chunk.o b/prepareGenFromBeagle4_modified20160601/obj/chunk.o
new file mode 100644
index 0000000..3ab7ecc
Binary files /dev/null and b/prepareGenFromBeagle4_modified20160601/obj/chunk.o differ
diff --git a/prepareGenFromBeagle4_modified20160601/obj/data.o b/prepareGenFromBeagle4_modified20160601/obj/data.o
new file mode 100644
index 0000000..b7f5355
Binary files /dev/null and b/prepareGenFromBeagle4_modified20160601/obj/data.o differ
diff --git a/prepareGenFromBeagle4_modified20160601/obj/prepareGenFromBeagle4.o b/prepareGenFromBeagle4_modified20160601/obj/prepareGenFromBeagle4.o
new file mode 100644
index 0000000..73749df
Binary files /dev/null and b/prepareGenFromBeagle4_modified20160601/obj/prepareGenFromBeagle4.o differ
diff --git a/prepareGenFromBeagle4_modified20160601/obj/utils.o b/prepareGenFromBeagle4_modified20160601/obj/utils.o
new file mode 100644
index 0000000..9d7c0f7
Binary files /dev/null and b/prepareGenFromBeagle4_modified20160601/obj/utils.o differ
diff --git a/prepareGenFromBeagle4_modified20160601/old/chunk.cpp b/prepareGenFromBeagle4_modified20160601/old/chunk.cpp
new file mode 100644
index 0000000..4efdb35
--- /dev/null
+++ b/prepareGenFromBeagle4_modified20160601/old/chunk.cpp
@@ -0,0 +1,76 @@
+#include "classes.h"
+
+
+int hamming (vector < bool > & h1, vector < bool > & h2) {
+ assert(h1.size() == h2.size());
+ int c = 0;
+ for(int l = 0 ; l < h1.size(); l++) if (h1[l] != h2[l]) c++;
+ return c;
+}
+
+chunk::chunk(char * id) {
+ this->id = string(id);
+}
+
+chunk::~chunk() {
+ mappingS.clear();
+ H.clear();
+ P.clear();
+ switches.clear();
+}
+
+bool chunk::operator < (const chunk & c) const {
+ return (mappingS[0] < c.mappingS[0]);
+}
+
+void chunk::assemble(chunk & pc) {
+ //mapping starting point of the overlap
+ int b1 = -1;
+ int e1 = pc.mappingS.size() - 1;
+ for (int l = 0 ; l < pc.mappingS.size() && b1 < 0 ; l ++ ) if (pc.mappingS[l] == mappingS[0]) b1 = l;
+ if (b1 < 0) {cout << "Chunk [" << id << "] does not overlap chunk [" << pc.id << "], code 1" << endl; exit(1);}
+
+ //mapping starting point of the overlap
+ int b2 = 0;
+ int e2 = -1;
+ for (int l = 0 ; l < mappingS.size() && e2 < 0 ; l ++ ) if (mappingS[l] == pc.mappingS.back()) e2 = l;
+ if (e2 < 0) {cout << "Chunk [" << id << "] does not overlap chunk [" << pc.id << "], code 2" << endl; exit(1);}
+
+ //overlap size
+ int osize = e1 - b1 + 1;
+ int osize2 = e2 - b2 + 1;
+ if (osize != osize2) {cout << "Overlaps are differents between [" << id << "] does not overlap chunk [" << pc.id << "] o1=" << osize << " o2=" << osize2 <chr = chr;
+ this->id = id;
+ this->pos = pos;
+ this->a0 = a0;
+ this->a1 = a1;
+ this->idx = idx;
+ done = false;
+ }
+};
+
+class snp_set {
+public:
+ vector < snp * > vec_snp;
+ multimap < int, snp *> map_snp;
+
+ //C/D
+ snp_set() {};
+ ~snp_set() {};
+
+ //M
+ int size() {
+ return vec_snp.size();
+ }
+
+ void push(snp * s) {
+ vec_snp.push_back(s);
+ map_snp.insert ( std::pair< int, snp * >( s->pos , s));
+ }
+
+ snp * get(int p, string & r0, string & r1) {
+ pair < multimap < int, snp *>::iterator, multimap < int, snp *>::iterator > seqM = map_snp.equal_range(p);
+ for (multimap < int, snp *>::iterator itM = seqM.first; itM != seqM.second; ++itM)
+ if (r0 == itM->second->a0 && r1 == itM->second->a1) return itM->second;
+ return NULL;
+ }
+};
+
+
+class genotype {
+public:
+ string id;
+ int idx;
+
+ vector < float > L;
+ vector < float > P;
+ vector < char > G;
+ vector < bool > h1;
+ vector < bool > h2;
+
+ //C/D
+ genotype(string & id, int idx){
+ this->id = id;
+ this->idx = idx;
+ }
+
+ ~genotype () {
+ L.clear(); P.clear(); G.clear(); h1.clear(); h2.clear();
+ }
+
+ int call(float threshold) {
+ int n = 0;
+ for (int l = 0 ; l < h1.size() ; l ++) {
+ G[l] = -1;
+ if (P[3 * l + 0] >= threshold) G[l] = 0;
+ if (P[3 * l + 1] >= threshold) G[l] = 1;
+ if (P[3 * l + 2] >= threshold) G[l] = 2;
+ if (G[l] >= 0) n ++;
+ }
+ return n;
+ }
+};
+
+class chunk {
+public:
+ string id;
+ vector < int > mappingS;
+ vector < vector < bool > > H;
+ vector < vector < float > > P;
+ vector < bool > switches;
+
+ chunk(char *) ;
+ ~chunk();
+
+ bool operator < (const chunk & c) const;
+
+ void assemble(chunk & pc);
+};
+
+class data {
+public:
+ vector < genotype * > G;
+ snp_set M;
+ vector < chunk * > C;
+
+ data();
+ ~data();
+
+ void readLikelihoods(char *);
+ void readPosteriors(char *);
+ void assemble();
+ void call(double);
+ void writeHaplotypes(char *);
+ void writeGenotypes(char *);
+};
+
+#endif
diff --git a/prepareGenFromBeagle4_modified20160601/old/data.cpp b/prepareGenFromBeagle4_modified20160601/old/data.cpp
new file mode 100644
index 0000000..ec532a6
--- /dev/null
+++ b/prepareGenFromBeagle4_modified20160601/old/data.cpp
@@ -0,0 +1,183 @@
+#include "classes.h"
+
+data::data() {
+}
+
+data::~data() {
+ for (int i = 0 ; i < G.size() ; i ++) delete G[i];
+ G.clear();
+ for (int i = 0 ; i < C.size() ; i ++) delete C[i];
+ C.clear();
+}
+
+void data::readLikelihoods (char * vcf) {
+ string buffer;
+ vector < string > tok, field, prob;
+
+ cout << "Reading likelihoods from [" << vcf << "]" << endl;
+ int line = 0;
+ ifile fd(vcf);
+ while(getline(fd, buffer, '\n')) {
+ tok = sutils::tokenize(buffer, "\t");
+ if (tok[0] == "#CHROM") {
+ for (int i = 9 ; i < tok.size() ; i ++) G.push_back(new genotype(tok[i], G.size()));
+ cout << " * #inds = " << G.size() << endl;
+ } else if (tok[0][0] != '#') {
+ M.push(new snp (atoi(tok[0].c_str()), tok[2], atoi(tok[1].c_str()), tok[3], tok[4], M.size()));
+ int idx_gl = -1;
+ field = sutils::tokenize(tok[8], ":");
+ for(int f = 0 ; f < field.size() && idx_gl < 0 ; f++) if (field[f] == "GL") idx_gl = f;
+ if (idx_gl < 0) { cout << "No GL field in VCF file at line = " << line << endl; exit(1); }
+ for (int i = 9 ; i < tok.size() ; i ++) {
+ field = sutils::tokenize(tok[i], ":");
+ prob = sutils::tokenize(field[idx_gl], ",");
+ double l0 = atof(prob[0].c_str());
+ double l1 = atof(prob[1].c_str());
+ double l2 = atof(prob[2].c_str());
+ G[i - 9]->L.push_back(l0);
+ G[i - 9]->L.push_back(l1);
+ G[i - 9]->L.push_back(l2);
+ G[i - 9]->P.push_back(0.0);
+ G[i - 9]->P.push_back(0.0);
+ G[i - 9]->P.push_back(0.0);
+ G[i - 9]->G.push_back(-1);
+ G[i - 9]->h1.push_back(false);
+ G[i - 9]->h2.push_back(false);
+ }
+ }
+
+ if (line % 1000 == 0) { cout << "\r" << line ;cout.flush();}
+ line ++;
+ }
+ cout << "\r * #sites = " << M.size() << endl;
+}
+
+void data::readPosteriors (char * vcf) {
+ string buffer;
+ vector < string > tok, field, prob;
+ chunk * c = new chunk(vcf);
+
+ cout << "Reading posteriors from [" << vcf << "]" << endl;
+ int line = 0;
+ ifile fd(vcf);
+ while(getline(fd, buffer, '\n')) {
+ sutils::tokenize(buffer, "\t");
+ if (tok[0] == "#CHROM") {
+ for (int i = 9 ; i < tok.size() ; i ++)
+ if (G[i-9]->id != tok[i]) { cout << "Sample inconsistency like=" << G[i-9]->id << " post=" << tok[i] << endl;exit(1); }
+ c->switches = vector < bool > (G.size(), false);
+ cout << " * All " << G.size() << " samples found" << endl;
+ } else if (tok[0][0] != '#') {
+ snp * s = M.get(atoi(tok[1].c_str()), tok[3], tok[4]);
+ if (s == NULL) { cout << "Site unfound in likelihoods file! pos = " << atoi(tok[1].c_str()) << endl; exit(1); }
+ c->mappingS.push_back(s->idx);
+ c->H.push_back(vector < bool > (2 * G.size(), false));
+ c->P.push_back(vector < float > (3 * G.size(), 0.0));
+ for (int i = 9 ; i < tok.size() ; i ++) {
+ field = sutils::tokenize(tok[i], ":");
+ prob = sutils::tokenize(field[2], ",");
+ c->H.back()[(i-9) * 2 + 0] = (tok[i][0] == '1');
+ c->H.back()[(i-9) * 2 + 1] = (tok[i][2] == '1');
+ c->P.back()[(i-9) * 3 + 0] = atof(prob[0].c_str());
+ c->P.back()[(i-9) * 3 + 1] = atof(prob[1].c_str());
+ c->P.back()[(i-9) * 3 + 2] = atof(prob[2].c_str());
+ }
+ }
+
+ if (line % 1000 == 0) { cout << "\r" << line ;cout.flush();}
+ line ++;
+ }
+ cout << "\r * #sites = " << c->mappingS.size() << endl;
+ C.push_back(c);
+}
+
+void data::assemble() {
+ cout << "Sorting the " << C.size() << " chunks of data" << endl;
+ sort(C.begin(), C.end());
+ for (int c = 1 ; c < C.size() ; c++) {
+ cout << "Assembling chunk " << c -1 << " witch chunk " << c << endl;
+ C[c]->assemble(*C[c-1]);
+ }
+
+ cout << "Copying chunk genotype & haplotype data" << endl;
+ for (int c = 0 ; c < C.size() ; c++) {
+ for (int l = 0 ; l < C[c]->mappingS.size() ; l ++) {
+ if (C[c]->mappingS[l] >= 0) {
+ //copying posteriors
+ for (int i = 0 ; i < G.size() ; i ++) {
+ G[i]->P[3 * C[c]->mappingS[l] + 0] = C[c]->P[l][3 * i + 0];
+ G[i]->P[3 * C[c]->mappingS[l] + 1] = C[c]->P[l][3 * i + 1];
+ G[i]->P[3 * C[c]->mappingS[l] + 2] = C[c]->P[l][3 * i + 2];
+ }
+ //copying haplotypes
+ for (int i = 0 ; i < G.size() ; i ++) {
+ if (!C[c]->switches[i]) {
+ G[i]->h1[C[c]->mappingS[l]] = C[c]->H[l][2 * i + 0];
+ G[i]->h2[C[c]->mappingS[l]] = C[c]->H[l][2 * i + 1];
+ } else {
+ G[i]->h1[C[c]->mappingS[l]] = C[c]->H[l][2 * i + 1];
+ G[i]->h2[C[c]->mappingS[l]] = C[c]->H[l][2 * i + 0];
+ }
+ }
+ }
+ }
+ }
+}
+
+void data::call(double threshold) {
+ int n_called = 0;
+ int n_total = 0;
+ cout << "Calling genotypes from posteriors" << endl;
+ for (int i = 0 ; i < G.size() ; i ++) {
+ n_called += G[i]->call(threshold);
+ n_total += M.size();
+ }
+ cout << " * #called = " << sutils::double2str(n_called * 100 / n_total, 1) << "%" << endl;
+}
+
+void data::writeHaplotypes(char * hap) {
+ string fhap = string(hap) + ".hap.gz";
+ string fsam = string(hap) + ".hap.sample";
+
+ cout << "Writing samples in [" << fsam << "]" << endl;
+ ofile fds (fhap);
+ fds << "ID_1 ID_2 missing" << endl << "0 0 0" << endl;
+ for (int i = 0 ; i < G.size() ; i ++) fds << G[i]->id << " " << G[i]->id << " 0" << endl;
+ fds.close();
+
+ cout << "Writing haplotypes in [" << fhap << "]" << endl;
+ ofile fdh (fsam);
+ for (int s = 0 ; s < M.size() ; s ++) {
+ fdh << M.vec_snp[s]->chr << " " << M.vec_snp[s]->id << " " << M.vec_snp[s]->pos << " " << M.vec_snp[s]->a0 << " " << M.vec_snp[s]->a1;
+ for (int i = 0 ; i < G.size() ; i ++) fdh << " " << G[i]->h1[s] << " " << G[i]->h2[s];
+ fdh << endl;
+ }
+ fdh.close();
+}
+
+void data::writeGenotypes(char * gen) {
+ string fgen = string(gen) + ".gen.gz";
+ string fsam = string(gen) + ".gen.sample";
+
+ cout << "Writing samples in [" << fsam << "]" << endl;
+ ofile fds (fsam);
+ fds << "ID_1 ID_2 missing" << endl << "0 0 0" << endl;
+ for (int i = 0 ; i < G.size() ; i ++) fds << G[i]->id << " " << G[i]->id << " 0" << endl;
+ fds.close();
+
+ cout << "Writing genotypes in [" << fgen << "]" << endl;
+ ofile fdg (fgen);
+ for (int s = 0 ; s < M.size() ; s ++) {
+ fdg << M.vec_snp[s]->chr << " " << M.vec_snp[s]->id << " " << M.vec_snp[s]->pos << " " << M.vec_snp[s]->a0 << " " << M.vec_snp[s]->a1;
+ for (int i = 0 ; i < G.size() ; i ++) {
+ switch (G[i]->G[s]) {
+ case 0: fdg << " 1 0 0"; break;
+ case 1: fdg << " 0 1 0"; break;
+ case 2: fdg << " 0 0 1"; break;
+ default: fdg << " " << G[i]->P[3 * i + 0] << " " << G[i]->P[3 * i + 1] << " " << G[i]->P[3 * i + 2]; break;
+ }
+ }
+ fdg << endl;
+ }
+ fdg.close();
+}
diff --git a/prepareGenFromBeagle4_modified20160601/old/prepareGenFromBeagle4.cpp b/prepareGenFromBeagle4_modified20160601/old/prepareGenFromBeagle4.cpp
new file mode 100644
index 0000000..eadad29
--- /dev/null
+++ b/prepareGenFromBeagle4_modified20160601/old/prepareGenFromBeagle4.cpp
@@ -0,0 +1,55 @@
+#include "classes.h"
+
+int main(int argc, char ** argv) {
+ string buffer;
+ vector < string > tok;
+
+ char * likelihoods = NULL;
+ vector < char * > posteriors;
+ char * haplotypes = NULL;
+ char * genotypes = NULL;
+ double threshold = 0.995;
+
+ //Reading parameters
+ for (int a =1 ; a < argc ; a++) {
+ if (strcmp(argv[a], "--likelihoods") == 0) {likelihoods = argv[a+1];}
+ if (strcmp(argv[a], "--haplotypes") == 0) {haplotypes = argv[a+1];}
+ if (strcmp(argv[a], "--genotypes") == 0) {genotypes = argv[a+1];}
+ if (strcmp(argv[a], "--threshold") == 0) {threshold= atoi(argv[a+1]);}
+ if (strcmp(argv[a], "--posteriors") == 0) {
+ int cpt = a + 1;
+ while (cpt >= 0) {
+ if (argv[cpt][0] == '-') cpt = -1;
+ else {
+ posteriors.push_back(argv[cpt]);
+ cpt ++;
+ }
+ }
+ }
+ }
+
+ //Checking parameters
+ if (likelihoods == NULL) { cout << "Missing --likelihoods arguments!" << endl; exit(1);}
+ else cout << "Likelihoods:\t[" << likelihoods << "]" << endl;
+
+ if (posteriors.size() == 0) { cout << "Missing --posteriors arguments!" << endl; exit(1);}
+ else {
+ cout << "Posteriors:\n" << endl;
+ for (int p = 0 ; p < posteriors.size() ; p++) cout << "\t[" << posteriors[p] << "]" << endl;
+ }
+
+ if (haplotypes == NULL) { cout << "Missing --haplotypes arguments!" << endl; exit(1);}
+ else cout << "Haplotypes:\t[" << haplotypes << "]" << endl;
+ if (genotypes == NULL) { cout << "Missing --haplotypes arguments!" << endl; exit(1);}
+ else cout << "Genotypes:\t[" << genotypes << "]" << endl;
+ cout << "Threshold:\t" << threshold << endl;
+
+ cout << endl;
+ data D;
+ D.readLikelihoods(likelihoods);
+ for (int p = 0 ; p < posteriors.size() ; p++) D.readPosteriors(posteriors[p]);
+ D.assemble();
+ D.call(threshold);
+ D.writeHaplotypes(haplotypes);
+ D.writeGenotypes(genotypes);
+}
diff --git a/prepareGenFromBeagle4_modified20160601/old/utils.cpp b/prepareGenFromBeagle4_modified20160601/old/utils.cpp
new file mode 100644
index 0000000..18c63b4
--- /dev/null
+++ b/prepareGenFromBeagle4_modified20160601/old/utils.cpp
@@ -0,0 +1,646 @@
+#include "utils.h"
+
+/******************************************************/
+/* UTILS STATISTICS */
+/******************************************************/
+long seed = -123456789;
+
+namespace putils {
+ void initRandom(long s) {
+ if (s == -1) seed = - time(NULL);
+ else seed = - s;
+ }
+
+ double getRandom() {
+ int j;
+ long k;
+ static long iy=0;
+ static long iv[NTAB];
+ double temp;
+ if (seed <= 0 || !iy) {
+ if (-(seed) < 1) seed=1;
+ else seed = -(seed);
+ for (j=NTAB+7;j>=0;j--) {
+ k=(seed)/IQ;
+ seed=IA*(seed-k*IQ)-IR*k;
+ if (seed < 0) seed += IM;
+ if (j < NTAB) iv[j] = seed;
+ }
+ iy=iv[0];
+ }
+ k=(seed)/IQ;
+ seed=IA*(seed-k*IQ)-IR*k;
+ if (seed < 0) seed += IM;
+ j=iy/NDIV;
+ iy=iv[j];
+ iv[j] = seed;
+ if ((temp=AM*iy) > RNMX) return RNMX;
+ else return temp;
+ }
+
+ long getSeed() {
+ return seed;
+ }
+
+ int getRandom(int n) {
+ return (int)floor(getRandom() * n);
+ }
+
+ void normalise(vector < double > & v) {
+ double sum = 0.0;
+ for (int i = 0 ; i < v.size() ; i++) sum += v[i];
+ if (sum != 0.0) for (int i = 0 ; i < v.size() ; i++) v[i] /= sum;
+ }
+
+ int sample(vector< double > & v, double sum) {
+ double csum = v[0];
+ double u = getRandom() * sum;
+ for (int i = 0; i < v.size() - 1; ++i) {
+ if ( u < csum ) return i;
+ csum += v[i+1];
+ }
+ return v.size() - 1;
+ }
+
+ double median (vector < double> & V) {
+ sort(V.begin(), V.end());
+ return V[V.size() / 2];
+ }
+
+ double mean (vector < double> & V) {
+ double m = 0.0;
+ for (int i = 0 ; i < V.size() ; i ++) m += V[i];
+ return m / V.size();
+ }
+};
+
+/******************************************************/
+/* UTILS ALGORITHM */
+/******************************************************/
+namespace autils {
+ int max(vector < double > & v) {
+ double max = -1e300;
+ int index_max = 0;
+ for (int i = 0 ; i < v.size() ; i ++)
+ if (v[i] > max) {
+ max = v[i];
+ index_max = i;
+ }
+ return index_max;
+ }
+
+ int max(vector < int > & v) {
+ int max = -1000000000;
+ int index_max = 0;
+ for (int i = 0 ; i < v.size() ; i ++)
+ if (v[i] > max) {
+ max = v[i];
+ index_max = i;
+ }
+ return index_max;
+ }
+
+ int count(vector < bool > &v) {
+ int c=0;
+ for (int i = 0 ; i < v.size() ; i ++) if (v[i]) c++;
+ return c;
+ }
+
+ void decompose(int min, vector < vector < int > > & B, vector < vector < vector < int > > > & BB) {
+ if (B.size() < 2 * min || B.size() == 2) {
+ BB.push_back(B);
+ return;
+ }
+
+ int l = putils::getRandom(B.size() - 2 * min) + min;
+ vector < vector < int > > LB = vector < vector < int > > (B.begin(), B.begin() + l + 1);
+ vector < vector < int > > RB = vector < vector < int > > (B.begin() + l - 1, B.end());
+ vector < vector < vector < int > > > LBB;
+ vector < vector < vector < int > > > RBB;
+ decompose(min, LB, LBB);
+ decompose(min, RB, RBB);
+ BB = LBB;
+ BB.insert(BB.end(), RBB.begin(), RBB.end());
+ }
+
+ int checkDuo (int pa1, int pa2, int ca1, int ca2) {
+ if (pa1 == 0 && pa2 == 0 && ca1 == 0 && ca2 == 0) { return 0; }
+ if (pa1 == 0 && pa2 == 0 && ca1 == 0 && ca2 == 1) { return 0; }
+ if (pa1 == 0 && pa2 == 0 && ca1 == 1 && ca2 == 0) { return 0; }
+ if (pa1 == 0 && pa2 == 0 && ca1 == 1 && ca2 == 1) { return -1; }
+ if (pa1 == 0 && pa2 == 1 && ca1 == 0 && ca2 == 0) { return 1; }
+ if (pa1 == 0 && pa2 == 1 && ca1 == 0 && ca2 == 1) { return 0; }
+ if (pa1 == 0 && pa2 == 1 && ca1 == 1 && ca2 == 0) { return 0; }
+ if (pa1 == 0 && pa2 == 1 && ca1 == 1 && ca2 == 1) { return 1; }
+ if (pa1 == 1 && pa2 == 0 && ca1 == 0 && ca2 == 0) { return 1; }
+ if (pa1 == 1 && pa2 == 0 && ca1 == 0 && ca2 == 1) { return 0; }
+ if (pa1 == 1 && pa2 == 0 && ca1 == 1 && ca2 == 0) { return 0; }
+ if (pa1 == 1 && pa2 == 0 && ca1 == 1 && ca2 == 1) { return 1; }
+ if (pa1 == 1 && pa2 == 1 && ca1 == 0 && ca2 == 0) { return -1; }
+ if (pa1 == 1 && pa2 == 1 && ca1 == 0 && ca2 == 1) { return 0; }
+ if (pa1 == 1 && pa2 == 1 && ca1 == 1 && ca2 == 0) { return 0; }
+ if (pa1 == 1 && pa2 == 1 && ca1 == 1 && ca2 == 1) { return 0; }
+ return 0;
+ }
+
+ int checkTrio (int fa1, int fa2, int ma1, int ma2, int ca1, int ca2) {
+ if (fa1 == 0 && fa2 == 0 && ma1 == 0 && ma2 == 0 && ca1 == 0 && ca2 == 0 ) { return 0; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 0 && ma2 == 0 && ca1 == 0 && ca2 == 1 ) { return -1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 0 && ma2 == 0 && ca1 == 1 && ca2 == 0 ) { return -1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 0 && ma2 == 0 && ca1 == 1 && ca2 == 1 ) { return -1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 0 && ma2 == 1 && ca1 == 0 && ca2 == 0 ) { return 1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 0 && ma2 == 1 && ca1 == 0 && ca2 == 1 ) { return 1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 0 && ma2 == 1 && ca1 == 1 && ca2 == 0 ) { return 1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 0 && ma2 == 1 && ca1 == 1 && ca2 == 1 ) { return -1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 1 && ma2 == 0 && ca1 == 0 && ca2 == 0 ) { return 1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 1 && ma2 == 0 && ca1 == 0 && ca2 == 1 ) { return 1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 1 && ma2 == 0 && ca1 == 1 && ca2 == 0 ) { return 1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 1 && ma2 == 0 && ca1 == 1 && ca2 == 1 ) { return -1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 1 && ma2 == 1 && ca1 == 0 && ca2 == 0 ) { return -1; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 1 && ma2 == 1 && ca1 == 0 && ca2 == 1 ) { return 0; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 1 && ma2 == 1 && ca1 == 1 && ca2 == 0 ) { return 0; }
+ if (fa1 == 0 && fa2 == 0 && ma1 == 1 && ma2 == 1 && ca1 == 1 && ca2 == 1 ) { return -1; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 0 && ma2 == 0 && ca1 == 0 && ca2 == 0 ) { return 1; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 0 && ma2 == 0 && ca1 == 0 && ca2 == 1 ) { return 1; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 0 && ma2 == 0 && ca1 == 1 && ca2 == 0 ) { return 1; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 0 && ma2 == 0 && ca1 == 1 && ca2 == 1 ) { return -1; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 0 && ma2 == 1 && ca1 == 0 && ca2 == 0 ) { return 2; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 0 && ma2 == 1 && ca1 == 0 && ca2 == 1 ) { return 0; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 0 && ma2 == 1 && ca1 == 1 && ca2 == 0 ) { return 0; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 0 && ma2 == 1 && ca1 == 1 && ca2 == 1 ) { return 2; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 1 && ma2 == 0 && ca1 == 0 && ca2 == 0 ) { return 2; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 1 && ma2 == 0 && ca1 == 0 && ca2 == 1 ) { return 0; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 1 && ma2 == 0 && ca1 == 1 && ca2 == 0 ) { return 0; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 1 && ma2 == 0 && ca1 == 1 && ca2 == 1 ) { return 2; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 1 && ma2 == 1 && ca1 == 0 && ca2 == 0 ) { return -1; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 1 && ma2 == 1 && ca1 == 0 && ca2 == 1 ) { return 1; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 1 && ma2 == 1 && ca1 == 1 && ca2 == 0 ) { return 1; }
+ if (fa1 == 0 && fa2 == 1 && ma1 == 1 && ma2 == 1 && ca1 == 1 && ca2 == 1 ) { return 1; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 0 && ma2 == 0 && ca1 == 0 && ca2 == 0 ) { return 1; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 0 && ma2 == 0 && ca1 == 0 && ca2 == 1 ) { return 1; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 0 && ma2 == 0 && ca1 == 1 && ca2 == 0 ) { return 1; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 0 && ma2 == 0 && ca1 == 1 && ca2 == 1 ) { return -1; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 0 && ma2 == 1 && ca1 == 0 && ca2 == 0 ) { return 2; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 0 && ma2 == 1 && ca1 == 0 && ca2 == 1 ) { return 0; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 0 && ma2 == 1 && ca1 == 1 && ca2 == 0 ) { return 0; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 0 && ma2 == 1 && ca1 == 1 && ca2 == 1 ) { return 2; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 1 && ma2 == 0 && ca1 == 0 && ca2 == 0 ) { return 2; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 1 && ma2 == 0 && ca1 == 0 && ca2 == 1 ) { return 0; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 1 && ma2 == 0 && ca1 == 1 && ca2 == 0 ) { return 0; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 1 && ma2 == 0 && ca1 == 1 && ca2 == 1 ) { return 2; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 1 && ma2 == 1 && ca1 == 0 && ca2 == 0 ) { return -1; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 1 && ma2 == 1 && ca1 == 0 && ca2 == 1 ) { return 1; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 1 && ma2 == 1 && ca1 == 1 && ca2 == 0 ) { return 1; }
+ if (fa1 == 1 && fa2 == 0 && ma1 == 1 && ma2 == 1 && ca1 == 1 && ca2 == 1 ) { return 1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 0 && ma2 == 0 && ca1 == 0 && ca2 == 0 ) { return -1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 0 && ma2 == 0 && ca1 == 0 && ca2 == 1 ) { return 0; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 0 && ma2 == 0 && ca1 == 1 && ca2 == 0 ) { return 0; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 0 && ma2 == 0 && ca1 == 1 && ca2 == 1 ) { return -1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 0 && ma2 == 1 && ca1 == 0 && ca2 == 0 ) { return -1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 0 && ma2 == 1 && ca1 == 0 && ca2 == 1 ) { return 1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 0 && ma2 == 1 && ca1 == 1 && ca2 == 0 ) { return 1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 0 && ma2 == 1 && ca1 == 1 && ca2 == 1 ) { return 1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 1 && ma2 == 0 && ca1 == 0 && ca2 == 0 ) { return -1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 1 && ma2 == 0 && ca1 == 0 && ca2 == 1 ) { return 1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 1 && ma2 == 0 && ca1 == 1 && ca2 == 0 ) { return 1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 1 && ma2 == 0 && ca1 == 1 && ca2 == 1 ) { return 1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 1 && ma2 == 1 && ca1 == 0 && ca2 == 0 ) { return -1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 1 && ma2 == 1 && ca1 == 0 && ca2 == 1 ) { return -1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 1 && ma2 == 1 && ca1 == 1 && ca2 == 0 ) { return -1; }
+ if (fa1 == 1 && fa2 == 1 && ma1 == 1 && ma2 == 1 && ca1 == 1 && ca2 == 1 ) { return 0; }
+ return 0;
+ }
+
+
+ int solveDuoF(int & fa1, int & fa2, int & ca1, int & ca2) {
+ int gf = fa1 + fa2;
+ int gc = ca1 + ca2;
+
+ if (gf == 0 && gc == 2) { return -1;};
+ if (gf == 2 && gc == 0) { return -1;};
+
+ if (gf == 0 && gc == 1) {fa1 = 0; fa2 = 0; ca1 = 0; ca2 = 1; return 1;};
+ if (gf == 1 && gc == 0) {fa1 = 0; fa2 = 1; ca1 = 0; ca2 = 0; return 1;};
+ if (gf == 1 && gc == 2) {fa1 = 1; fa2 = 0; ca1 = 1; ca2 = 1; return 1;};
+ if (gf == 2 && gc == 1) {fa1 = 1; fa2 = 1; ca1 = 1; ca2 = 0; return 1;};
+ return 0;
+ }
+
+ int solveDuoM(int & ma1, int & ma2, int & ca1, int & ca2) {
+ int gm = ma1 + ma2;
+ int gc = ca1 + ca2;
+
+ if (gm == 0 && gc == 2) { return -1; };
+ if (gm == 2 && gc == 0) { return -1; };
+
+ if (gm == 0 && gc == 1) {ma1 = 0; ma2 = 0; ca1 = 1; ca2 = 0; return 1;};
+ if (gm == 1 && gc == 0) {ma1 = 1; ma2 = 0; ca1 = 0; ca2 = 0; return 1;};
+ if (gm == 1 && gc == 2) {ma1 = 0; ma2 = 1; ca1 = 1; ca2 = 1; return 1;};
+ if (gm == 2 && gc == 1) {ma1 = 1; ma2 = 1; ca1 = 0; ca2 = 1; return 1;};
+ return 0;
+ }
+
+
+ int solveTrio (int & fa1, int & fa2, int & ma1, int & ma2, int & ca1, int & ca2) {
+ int gf = fa1 + fa2;
+ int gm = ma1 + ma2;
+ int gc = ca1 + ca2;
+
+ if (gf == 0 && gm == 0 && gc == 1) { return -1;};
+ if (gf == 0 && gm == 0 && gc == 2) { return -1;};
+ if (gf == 0 && gm == 1 && gc == 2) { return -1;};
+ if (gf == 0 && gm == 2 && gc == 0) { return -1;};
+ if (gf == 0 && gm == 2 && gc == 2) { return -1;};
+ if (gf == 1 && gm == 0 && gc == 2) { return -1;};
+ if (gf == 1 && gm == 2 && gc == 0) { return -1;};
+ if (gf == 2 && gm == 0 && gc == 0) { return -1;};
+ if (gf == 2 && gm == 0 && gc == 2) { return -1;};
+ if (gf == 2 && gm == 1 && gc == 0) { return -1;};
+ if (gf == 2 && gm == 2 && gc == 0) { return -1;};
+ if (gf == 2 && gm == 2 && gc == 1) { return -1;};
+
+ if (gf == 0 && gm == 1 && gc == 0) {fa1 = 0; fa2 = 0; ma1 = 1; ma2 = 0; ca1 = 0; ca2 = 0; return 1;};
+ if (gf == 0 && gm == 1 && gc == 1) {fa1 = 0; fa2 = 0; ma1 = 0; ma2 = 1; ca1 = 0; ca2 = 1; return 1;};
+ if (gf == 0 && gm == 2 && gc == 1) {fa1 = 0; fa2 = 0; ma1 = 1; ma2 = 1; ca1 = 0; ca2 = 1; return 1;};
+ if (gf == 1 && gm == 0 && gc == 0) {fa1 = 0; fa2 = 1; ma1 = 0; ma2 = 0; ca1 = 0; ca2 = 0; return 1;};
+ if (gf == 1 && gm == 0 && gc == 1) {fa1 = 1; fa2 = 0; ma1 = 0; ma2 = 0; ca1 = 1; ca2 = 0; return 1;};
+ if (gf == 1 && gm == 1 && gc == 0) {fa1 = 0; fa2 = 1; ma1 = 1; ma2 = 0; ca1 = 0; ca2 = 0; return 1;};
+ if (gf == 1 && gm == 1 && gc == 2) {fa1 = 1; fa2 = 0; ma1 = 0; ma2 = 1; ca1 = 1; ca2 = 1; return 1;};
+ if (gf == 1 && gm == 2 && gc == 1) {fa1 = 0; fa2 = 1; ma1 = 1; ma2 = 1; ca1 = 0; ca2 = 1; return 1;};
+ if (gf == 1 && gm == 2 && gc == 2) {fa1 = 1; fa2 = 0; ma1 = 1; ma2 = 1; ca1 = 1; ca2 = 1; return 1;};
+ if (gf == 2 && gm == 0 && gc == 1) {fa1 = 1; fa2 = 1; ma1 = 0; ma2 = 0; ca1 = 1; ca2 = 0; return 1;};
+ if (gf == 2 && gm == 1 && gc == 1) {fa1 = 1; fa2 = 1; ma1 = 1; ma2 = 0; ca1 = 1; ca2 = 0; return 1;};
+ if (gf == 2 && gm == 1 && gc == 2) {fa1 = 1; fa2 = 1; ma1 = 0; ma2 = 1; ca1 = 1; ca2 = 1; return 1;};
+ return 0;
+ }
+
+ int switch_error(vector < bool > & t1, vector < bool > & t2, vector < bool > & m12, vector < bool > & h1, vector < bool > & h2) {
+ int error = 0;
+
+ //cerr << utils::bool2str(t1) << endl;
+ //cerr << utils::bool2str(t2) << endl;
+ //cerr << utils::bool2str(h1) << endl;
+ //cerr << utils::bool2str(h2) << endl;
+
+ assert(t1.size() == h1.size());
+ assert(t1.size() == h2.size());
+ assert(t1.size() == t2.size());
+ assert(t1.size() == m12.size());
+
+ int locus = -1;
+
+ for (int l = 0 ; l < t1.size() && locus < 0 ; l ++) if (t1[l] != t2[l] && m12[l]) locus = l;
+
+ if (locus < 0) return -1;
+
+ bool swap = (t1[locus] == h2[locus]);
+ for (int l = locus + 1 ; l < t1.size() ; l ++)
+ if (m12[l]) {
+ if (!swap && t1[l] != h1[l]) {
+ swap = true;
+ error++;
+ } else if (swap && t1[l] != h2[l]) {
+ swap = false;
+ error++;
+ }
+ }
+
+ return error;
+ }
+
+};
+
+/******************************************************/
+/* UTILS STRING */
+/******************************************************/
+namespace sutils {
+ vector tokenize(string & str,string d) {
+ vector tokens;
+ string::size_type lastPos = str.find_first_not_of(d, 0);
+ string::size_type pos = str.find_first_of(d, lastPos);
+ while (string::npos != pos || string::npos != lastPos) {
+ tokens.push_back(str.substr(lastPos, pos - lastPos));
+ lastPos = str.find_first_not_of(d, pos);
+ pos = str.find_first_of(d, lastPos);
+ }
+ return tokens;
+ }
+
+ vector tokenize(string & str,string d, int n) {
+ vector tokens;
+ int cpt = 0;
+ string::size_type lastPos = str.find_first_not_of(d, 0);
+ string::size_type pos = str.find_first_of(d, lastPos);
+ while ((string::npos != pos || string::npos != lastPos) && cpt < n) {
+ tokens.push_back(str.substr(lastPos, pos - lastPos));
+ lastPos = str.find_first_not_of(d, pos);
+ pos = str.find_first_of(d, lastPos);
+ cpt++;
+ }
+ return tokens;
+ }
+
+ string int2str(int n) {
+ ostringstream s2( stringstream::out );
+ s2 << n;
+ return s2.str();
+ }
+
+ string int2str(vector < int > & v) {
+ ostringstream s2( stringstream::out );
+ for (int l = 0 ; l < v.size() ; l++) {
+ if (v[l] < 0) s2 << "-";
+ else s2 << v[l] ;
+ }
+ return s2.str();
+ }
+
+ string long2str(long int n) {
+ ostringstream s2( stringstream::out );
+ s2 << n;
+ return s2.str();
+ }
+
+ string double2str(double n, int prc) {
+ ostringstream s2;
+ s2 << setiosflags( ios::fixed );
+ if ( prc > 0 ) s2.precision(prc);
+ s2 << n;
+ return s2.str();
+ }
+
+ string double2str(vector < double > &v, int prc) {
+ ostringstream s2;
+ s2 << setiosflags( ios::fixed );
+ if ( prc >= 0 ) s2.precision(prc);
+ for (int l = 0 ; l < v.size() ; l++) {
+ s2 << v[l] << " ";
+ }
+ return s2.str();
+ }
+
+ string bool2str(vector & v) {
+ ostringstream s2( stringstream::out );
+ for (int l = 0 ; l < v.size() ; l++) {
+ if (v[l]) s2 << "1";
+ else s2 << "0";
+ }
+ return s2.str();
+ }
+
+ string date2str(time_t * t, string format) {
+ struct tm * timeinfo = localtime(t);
+ char buffer[128];
+ strftime(buffer, 128, format.c_str(), timeinfo);
+ ostringstream s2( stringstream::out );
+ s2 << buffer;
+ return s2.str();
+ }
+};
+
+/******************************************************/
+/* UTILS FILE */
+/******************************************************/
+namespace futils {
+ bool isFile(string f) {
+ ifstream inp;
+ inp.open(f.c_str(), ifstream::in);
+ if(inp.fail()) {
+ inp.clear(ios::failbit);
+ inp.close();
+ return false;
+ }
+ inp.close();
+ return true;
+ }
+
+ bool createFile(string f) {
+ ofstream out;
+ out.open(f.c_str(), ofstream::out);
+ if(out.fail()) {
+ out.clear(ios::failbit);
+ out.close();
+ return false;
+ }
+ out.close();
+ return true;
+ }
+
+ string extensionFile(string & filename) {
+ if (filename.find_last_of(".") != string::npos)
+ return filename.substr(filename.find_last_of(".") + 1);
+ return "";
+ }
+
+
+ void bool2binary(vector < bool > & V, ostream &fd) {
+ int nb = V.size();
+ fd.write((char*)&nb, sizeof(int));
+ int cpt_byte = 0;
+ int cpt_bin = 0;
+ int nb_byte = (int)ceil( (V.size() * 1.0) / 8);
+ while (cpt_byte < nb_byte) {
+ bitset<8> byte_bitset;
+ for (int l = 0; l < 8 && cpt_bin < V.size() ; l++) {
+ byte_bitset[l] = V[cpt_bin];
+ cpt_bin ++;
+ }
+ char byte_char = (char)byte_bitset.to_ulong();
+ fd.write(&byte_char, 1);
+ cpt_byte++;
+ }
+ }
+
+ bool binary2bool(vector < bool > & V, istream & fd) {
+ int nb;
+ fd.read((char*)&nb, sizeof(int));
+ if (!fd) return false;
+ int cpt_byte = 0;
+ int cpt_bin = 0;
+ int nb_byte = (int)ceil( (nb * 1.0) / 8);
+ V = vector < bool >(nb);
+ while (cpt_byte < nb_byte) {
+ char byte_char;
+ fd.read(&byte_char, 1);
+ if (!fd) return false;
+ bitset<8> byte_bitset = byte_char;
+ for (int l = 0; l < 8 && cpt_bin < nb ; l++) {
+ V[cpt_bin] = byte_bitset[l];
+ cpt_bin++;
+ }
+ cpt_byte++;
+ }
+ return true;
+ }
+};
+
+/******************************************************/
+/* INPUT FILE */
+/******************************************************/
+ifile::ifile() {
+}
+
+ifile::ifile(string filename , bool binary) {
+ open(filename, binary);
+}
+
+ifile::~ifile() {
+ close();
+}
+
+string ifile::name() {
+ return file;
+}
+
+bool ifile::open(string filename, bool binary) {
+ file = filename;
+ string ext = futils::extensionFile(filename);
+ if (ext == "gz") {
+ fd.open(file.c_str(), ios::in | ios::binary);
+ push(gzip_decompressor());
+ } else if (ext == "bz2") {
+ fd.open(file.c_str(), ios::in | ios::binary);
+ push(bzip2_decompressor());
+ } else if (binary) {
+ fd.open(file.c_str(), ios::in | ios::binary);
+ } else {
+ fd.open(file.c_str());
+ }
+ if (fd.fail()) return false;
+ push(fd);
+ return true;
+}
+
+void ifile::close() {
+ if (!empty()) reset();
+ fd.close();
+}
+
+/******************************************************/
+/* OUTPUT FILE */
+/******************************************************/
+ofile::ofile() {
+}
+
+ofile::ofile(string filename , bool binary) {
+ open(filename, binary);
+}
+
+ofile::~ofile() {
+ close();
+}
+
+string ofile::name() {
+ return file;
+}
+
+bool ofile::open(string filename, bool binary) {
+ file = filename;
+ string ext = futils::extensionFile(filename);
+ if (ext == "gz") {
+ fd.open(file.c_str(), ios::out | ios::binary);
+ push(gzip_compressor());
+ } else if (ext == "bz2") {
+ fd.open(file.c_str(), ios::out | ios::binary);
+ push(bzip2_compressor());
+ } else if (binary) {
+ fd.open(file.c_str(), ios::out | ios::binary);
+ } else {
+ fd.open(file.c_str());
+ }
+ if (fd.fail()) return false;
+ push(fd);
+ return true;
+}
+
+void ofile::close() {
+ if (!empty()) reset();
+ fd.close();
+}
+
+/******************************************************/
+/* LOG FILE */
+/******************************************************/
+lfile::lfile() {
+ verboseC = true;
+ verboseL = true;
+}
+
+lfile::~lfile() {
+ close();
+}
+
+string lfile::name() {
+ return file;
+}
+
+bool lfile::open(string filename) {
+ file = filename;
+ if (futils::extensionFile(file) != "log") file += ".log";
+ fd.open(file.c_str());
+ if (fd.fail()) return false;
+ return true;
+}
+
+void lfile::close() {
+ fd.close();
+}
+
+string lfile::getPrefix() {
+ return file.substr(0, file.find_last_of("."));
+}
+
+void lfile::muteL() {
+ verboseL = false;
+}
+
+void lfile::unmuteL() {
+ verboseL = true;
+}
+
+void lfile::muteC() {
+ verboseC = false;
+}
+
+void lfile::unmuteC() {
+ verboseC = true;
+}
+
+
+void lfile::print(string s) {
+ if (verboseL) { fd << s; fd.flush(); }
+ if (verboseC) { cerr << s; cerr.flush(); }
+}
+
+void lfile::printC(string s) {
+ if (verboseC) { cerr << s; cerr.flush(); }
+}
+
+void lfile::printL(string s) {
+ if (verboseL) { fd << s; fd.flush(); }
+}
+
+void lfile::println(string s) {
+ if (verboseL) { fd << s << endl; }
+ if (verboseC) { cerr << s << endl; }
+}
+
+void lfile::printlnC(string s) {
+ if (verboseC) { cerr << s << endl; }
+}
+
+void lfile::printlnL(string s) {
+ if (verboseL) { fd << s << endl; }
+}
+
+void lfile::warning(string s) {
+ cerr << "\33[33mWARNING:\33[0m " << s << endl;
+ if (verboseL) fd << "WARNING: " << s << endl;
+}
+
+void lfile::error(string s) {
+ cerr << "\33[33mERROR:\33[0m " << s << endl;
+ if (verboseL) fd << "ERROR: " << s << endl;
+ close();
+ exit(1);
+}
diff --git a/prepareGenFromBeagle4_modified20160601/old/utils.h b/prepareGenFromBeagle4_modified20160601/old/utils.h
new file mode 100644
index 0000000..6fdf61a
--- /dev/null
+++ b/prepareGenFromBeagle4_modified20160601/old/utils.h
@@ -0,0 +1,165 @@
+#ifndef _UTILS_H
+#define _UTILS_H
+
+#define IA 16807
+#define IM 2147483647
+#define AM (1.0/IM)
+#define IQ 127773
+#define IR 2836
+#define NTAB 32
+#define NDIV (1+(IM-1)/NTAB)
+#define EPS 1.2e-7
+#define RNMX (1.0-EPS)
+#define PI 3.14159265358979323846
+
+#include
+#include
+#include
+#include