-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #136 from freerkvandijk/master
Two scripts needed for RNA-seq phasing pipeline
- Loading branch information
Showing
21 changed files
with
2,849 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
<?xml version="1.0" encoding="UTF-8" standalone="no"?> | ||
<?eclipse-pydev version="1.0"?><pydev_project> | ||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property> | ||
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property> | ||
</pydev_project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#!/bin/env python3 | ||
|
||
import argparse | ||
from Bio.bgzf import BgzfWriter | ||
import gzip | ||
import sys | ||
|
||
def main(): | ||
|
||
args = parse_args() | ||
with gzip.open(args.out, 'wt') as fd_out: | ||
for i_vcf, vcf in enumerate(args.vcf): | ||
with gzip.open(vcf, 'rt') as fd_vcf: | ||
## loop metadata lines and header line | ||
for line in fd_vcf: | ||
## end of metadata lines | ||
if line[0:2] != '##': | ||
## print header line | ||
print(line, end='', file=fd_out) | ||
## break loop at the header line | ||
break | ||
if i_vcf == 0: | ||
## print metadata line | ||
print(line, end='', file=fd_out) | ||
## loop records | ||
for line in fd_vcf: | ||
l = line.rstrip().split('\t') | ||
field = l[8].split(':') | ||
## index PL | ||
i_PL = l[8].split(':').index('PL') | ||
field.append('GL') | ||
GT = field.pop(0) | ||
field.sort() | ||
field.insert(0, GT) | ||
l[8] = ':'.join(field) | ||
## index GL | ||
i_GL = l[8].split(':').index('GL') | ||
s = '\t'.join(l[:9]) | ||
print(s, sep='\t', file=fd_out, end='') | ||
for i_GT in range(9, len(l)): | ||
lGT = l[i_GT] | ||
info = l[i_GT].split(':') | ||
if lGT == './.:0,0': | ||
toSplit = './.:0,0:0' | ||
info = toSplit.split(':') | ||
if info[0] == './.': | ||
s_GL = '.,.,.' | ||
else: | ||
s_PL = l[i_GT].split(':')[i_PL] | ||
if s_PL == '.': | ||
s_GL = '.,.,.' | ||
else: | ||
s_GL = ','.join( | ||
str(-float(PL)/10) for PL in s_PL.split(',')) | ||
if s_GL==".": | ||
print(line) | ||
exit() | ||
info.insert(i_GL, s_GL) | ||
l[i_GT] = info | ||
print('\t' + | ||
':'.join(l[i_GT]), sep='\t', end='', | ||
file=fd_out) | ||
print(end='\n', file=fd_out) | ||
|
||
return | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--vcf', nargs = '+') | ||
parser.add_argument('--out', required=True) | ||
args = parser.parse_args() | ||
|
||
return args | ||
|
||
if __name__ == '__main__': | ||
main() |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
#compiler | ||
CXX=g++ | ||
|
||
#compiler flags | ||
COPTI= -O3 | ||
CDEBG= -g | ||
CWARN= -Wall -Wextra -Wno-sign-compare | ||
CBAMH= -D_LIBBAM | ||
CSHV1= | ||
|
||
#linker flags | ||
LOPTI= -O3 | ||
LDEBG= -g | ||
LSTDD= -lm -lpthread -lboost_iostreams -lboost_program_options | ||
LSTDS= -Wl,-Bstatic -lboost_iostreams -lz -lbz2 -Wl,-Bdynamic -lm -lpthread | ||
LCL3S= -Wl,-Bstatic -lz -lbz2 -Wl,-Bdynamic -lm -lpthread | ||
LBAMD= -lbam -lz | ||
|
||
#executable file | ||
EFILE= bin/prepareGenFromBeagle4 | ||
|
||
#header files | ||
HFILE= $(shell find src -name *.h) | ||
|
||
#source files | ||
CFILE= $(shell find src -name *.cpp) | ||
|
||
#source path | ||
VPATH= $(shell for file in `find src -name *.cpp`; do echo $$(dirname $$file); done) | ||
|
||
#include path | ||
ISTDP= -Isrc | ||
IBAMP= -Ilib | ||
#ICL3P= -I/apps/software/Boost/1.57.0-goolf-1.7.20-Python-2.7.9/include | ||
#ICL3P= -I$EBROOTBOOST/include | ||
ICL3P= -I/apps/software/Boost/1.60.0-foss-2015b-Python-2.7.9/include/ | ||
|
||
#library path | ||
LSTDP= -Llib | ||
|
||
#object files | ||
OFILE= $(shell for file in `find src -name *.cpp`; do echo obj/$$(basename $$file .cpp).o; done) | ||
OBOST= | ||
|
||
#default | ||
all: cluster | ||
|
||
#dynamic release | ||
dynamic: CFLAG=$(COPTI) $(CWARN) $(CSHV1) | ||
dynamic: LFLAG=$(LOPTI) $(LSTDD) | ||
dynamic: IFLAG=$(ISTDP) | ||
dynamic: $(EFILE) | ||
|
||
#debug release | ||
debug: CFLAG=$(CDEBG) $(CWARN) $(CSHV1) | ||
debug: LFLAG=$(LDEBG) $(LSTDD) | ||
debug: IFLAG=$(ISTDP) | ||
debug: $(EFILE) | ||
|
||
#static release | ||
static: CFLAG=$(COPTI) $(CWARN) $(CSHV1) | ||
static: LFLAG=$(LOPTI) $(LSTDS) | ||
static: IFLAG=$(ISTDP) | ||
static: $(EFILE) | ||
|
||
#cluster release | ||
cluster: CFLAG=$(COPTI) $(CWARN) $(CSHV1) | ||
cluster: LFLAG=$(LOPTI) $(LCL3S) | ||
cluster: IFLAG=$(ISTDP) $(ICL3P) | ||
#cluster: OBOST=/apps/software/Boost/1.57.0-goolf-1.7.20-Python-2.7.9/lib/libboost_iostreams.a /apps/software/Boost/1.57.0-goolf-1.7.20-Python-2.7.9/lib/libboost_program_options.a | ||
OBOST=-lz /apps/software/Boost/1.60.0-foss-2015b-Python-2.7.9/lib/libboost_iostreams.a | ||
cluster: $(EFILE) | ||
|
||
$(EFILE): $(OFILE) | ||
$(CXX) $^ $(OBOST) -o $@ $(LFLAG) | ||
|
||
obj/%.o: %.cpp $(HFILE) | ||
$(CXX) -o $@ -c $< $(CFLAG) $(IFLAG) | ||
|
||
clean: | ||
rm -f obj/*.o $(EFILE) | ||
|
||
test: | ||
cp $(EFILE) ~/$(EFILE).v10 | ||
|
||
oxford: | ||
cp $(EFILE) ~/bin/. | ||
|
||
install: | ||
cp $(EFILE) ./tmp/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
#compiler | ||
CXX=g++ | ||
|
||
#compiler flags | ||
COPTI= -O3 | ||
CDEBG= -g | ||
CWARN= -Wall -Wextra -Wno-sign-compare | ||
CBAMH= -D_LIBBAM | ||
CSHV1= | ||
|
||
#linker flags | ||
LOPTI= -O3 | ||
LDEBG= -g | ||
LSTDD= -lm -lpthread -lboost_iostreams -lboost_program_options | ||
LSTDS= -Wl,-Bstatic -lboost_iostreams -lz -lbz2 -Wl,-Bdynamic -lm -lpthread | ||
LCL3S= -Wl,-Bstatic -lz -lbz2 -Wl,-Bdynamic -lm -lpthread | ||
LBAMD= -lbam -lz | ||
|
||
#executable file | ||
EFILE= bin/prepareGenFromBeagle4 | ||
|
||
#header files | ||
HFILE= $(shell find src -name *.h) | ||
|
||
#source files | ||
CFILE= $(shell find src -name *.cpp) | ||
|
||
#source path | ||
VPATH= $(shell for file in `find src -name *.cpp`; do echo $$(dirname $$file); done) | ||
|
||
#include path | ||
ISTDP= -Isrc | ||
IBAMP= -Ilib | ||
ICL3P= -I/users/delaneau/BOOST/include | ||
|
||
#library path | ||
LSTDP= -Llib | ||
|
||
#object files | ||
OFILE= $(shell for file in `find src -name *.cpp`; do echo obj/$$(basename $$file .cpp).o; done) | ||
OBOST= | ||
|
||
#default | ||
all: dynamic | ||
|
||
#dynamic release | ||
dynamic: CFLAG=$(COPTI) $(CWARN) $(CSHV1) | ||
dynamic: LFLAG=$(LOPTI) $(LSTDD) | ||
dynamic: IFLAG=$(ISTDP) | ||
dynamic: $(EFILE) | ||
|
||
#debug release | ||
debug: CFLAG=$(CDEBG) $(CWARN) $(CSHV1) | ||
debug: LFLAG=$(LDEBG) $(LSTDD) | ||
debug: IFLAG=$(ISTDP) | ||
debug: $(EFILE) | ||
|
||
#static release | ||
static: CFLAG=$(COPTI) $(CWARN) $(CSHV1) | ||
static: LFLAG=$(LOPTI) $(LSTDS) | ||
static: IFLAG=$(ISTDP) | ||
static: $(EFILE) | ||
|
||
#cluster release | ||
cluster: CFLAG=$(COPTI) $(CWARN) $(CSHV1) | ||
cluster: LFLAG=$(LOPTI) $(LCL3S) | ||
cluster: IFLAG=$(ISTDP) $(ICL3P) | ||
cluster: OBOST=~/BOOST/lib/libboost_iostreams.a ~/BOOST/lib/libboost_program_options.a | ||
cluster: $(EFILE) | ||
|
||
$(EFILE): $(OFILE) | ||
$(CXX) $^ $(OBOST) -o $@ $(LFLAG) | ||
|
||
obj/%.o: %.cpp $(HFILE) | ||
$(CXX) -o $@ -c $< $(CFLAG) $(IFLAG) | ||
|
||
clean: | ||
rm -f obj/*.o $(EFILE) | ||
|
||
test: | ||
cp $(EFILE) ~/$(EFILE).v10 | ||
|
||
oxford: | ||
cp $(EFILE) ~/bin/. | ||
|
||
install: | ||
cp $(EFILE) /usr/local/bin/. |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#include "classes.h" | ||
|
||
|
||
int hamming (vector < bool > & h1, vector < bool > & h2) { | ||
assert(h1.size() == h2.size()); | ||
int c = 0; | ||
for(int l = 0 ; l < h1.size(); l++) if (h1[l] != h2[l]) c++; | ||
return c; | ||
} | ||
|
||
chunk::chunk(char * id) { | ||
this->id = string(id); | ||
} | ||
|
||
chunk::~chunk() { | ||
mappingS.clear(); | ||
H.clear(); | ||
P.clear(); | ||
switches.clear(); | ||
} | ||
|
||
bool chunk::operator < (const chunk & c) const { | ||
return (mappingS[0] < c.mappingS[0]); | ||
} | ||
|
||
void chunk::assemble(chunk & pc) { | ||
//mapping starting point of the overlap | ||
int b1 = -1; | ||
int e1 = pc.mappingS.size() - 1; | ||
for (int l = 0 ; l < pc.mappingS.size() && b1 < 0 ; l ++ ) if (pc.mappingS[l] == mappingS[0]) b1 = l; | ||
if (b1 < 0) {cout << "Chunk [" << id << "] does not overlap chunk [" << pc.id << "], code 1" << endl; exit(1);} | ||
|
||
//mapping starting point of the overlap | ||
int b2 = 0; | ||
int e2 = -1; | ||
for (int l = 0 ; l < mappingS.size() && e2 < 0 ; l ++ ) if (mappingS[l] == pc.mappingS.back()) e2 = l; | ||
if (e2 < 0) {cout << "Chunk [" << id << "] does not overlap chunk [" << pc.id << "], code 2" << endl; exit(1);} | ||
|
||
//overlap size | ||
int osize = e1 - b1 + 1; | ||
int osize2 = e2 - b2 + 1; | ||
if (osize != osize2) {cout << "Overlaps are differents between [" << id << "] does not overlap chunk [" << pc.id << "] o1=" << osize << " o2=" << osize2 <<endl; exit(1);} | ||
else cout << " * overlap=" << osize << endl; | ||
|
||
//blanking posteriors | ||
int cpt = 0; | ||
int i1, i2; | ||
for (i1 = b1, i2 = b2; i1 <= e1 && i2 <= e2; i1++, i2 ++) { | ||
if (cpt < (osize / 2)) mappingS[i2] = -1; | ||
else pc.mappingS[i1] = -1; | ||
} | ||
|
||
//switching haplotypes | ||
int n_switches = 0; | ||
for (int i = 0 ; i < switches.size() ; i ++) { | ||
vector < bool > h11 = vector < bool > (pc.H[2 * i + 0].begin() + b1, pc.H[2 * i + 0].end()); | ||
vector < bool > h12 = vector < bool > (pc.H[2 * i + 1].begin() + b1, pc.H[2 * i + 1].end()); | ||
vector < bool > h21 = vector < bool > (H[2 * i + 0].begin(), pc.H[2 * i + 0].begin() + e2 + 1); | ||
vector < bool > h22 = vector < bool > (H[2 * i + 1].begin(), pc.H[2 * i + 1].begin() + e2 + 1); | ||
int score0 = 0, score1 = 0; | ||
if (!pc.switches[i]) { | ||
score0 = hamming(h11, h21) + hamming(h12, h22); | ||
score1 = hamming(h11, h22) + hamming(h12, h21); | ||
} else { | ||
score0 = hamming(h12, h21) + hamming(h11, h22); | ||
score1 = hamming(h12, h22) + hamming(h11, h21); | ||
} | ||
|
||
if (score0 <= score1) switches[i] = false; | ||
else { | ||
switches[i] = true; | ||
n_switches ++; | ||
} | ||
} | ||
cout << " * #switches=" <<n_switches << " out of " << switches.size() << " possible" << endl; | ||
} |
Oops, something went wrong.