-
Notifications
You must be signed in to change notification settings - Fork 0
/
PairSNPDist.py
40 lines (31 loc) · 1.39 KB
/
PairSNPDist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from Bio import SeqIO
import argparse
import pandas as pd
import scipy as sp
def options( ):
parser = argparse.ArgumentParser(description="Calculates Paired SNPs Distribution. Please change the code\
based on your requirement first")
parser.add_argument("--alnfile",dest="alnfile",help="Fasta Alignemnt File",type=argparse.FileType("r"))
parser.add_argument("--groupfile",dest="groupfile",help="Tabbed group file",type=argparse.FileType("r"))
parser.add_argument("--ignoregap",dest="ignoregap",help = "Remove gap from the analysis",type=bool, default=False)
return parser.parse_args( )
if __name__=='__main__':
args = options( )
sequences = {}
distribution = {}
for rec in SeqIO.parse(args.alnfile,"fasta"):
sequences[rec.id] = list(str(rec.seq).upper( ))#Change the code for suited for your need
sequences = pd.DataFrame.from_dict(sequences)
groups = pd.DataFrame.from_csv(args.groupfile.read( ))
sequence1 = sequences[[group[]]]
sequence2 = sequences[[group[]]]
# comparion time
#To data frame
# # Do t-test on fly and produce the files only for with significant
# difference
# Add to ignore gaps and see the difference in the result
# Check for the consitance of the gap. begining or at the end of the
# alignemnt
#
if significat:
pd.DataFrame.from_dict( ).to_csv( )