-
Notifications
You must be signed in to change notification settings - Fork 0
/
s151Rfam-localminima-generator.py
56 lines (46 loc) · 1.66 KB
/
s151Rfam-localminima-generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import glob
import tempfile
import subprocess
import re
def GetS151Dataset():
dataset = []
for address in glob.glob("/home/czno-gen/partial_bpseq/*.bpseq"):
with open(address, "r") as f:
data = []
for line in f:
data.append(str(line).strip().split(" "))
sequence = ""
structure = ""
for d in data:
sequence += str(d[1])
if int(d[2]) == 0:
structure += "."
elif int(d[2]) >int(d[0]):
structure += "("
else:
structure += ")"
dataset.append([address, sequence, structure])
return sorted(dataset)
def GetLocalOptima(sequence):
result = []
with open(".tmp1",'w') as file:
file.write(sequence)
subprocess.call('RNAsubopt -p 10000 < "%s" > "%s"' % (".tmp1", ".tmp2"), shell=True)
subprocess.call('sed -e "1d" %s > "%s"' % (".tmp2", ".tmp3"), shell=True)
subprocess.call('RNAlocmin -s "%s" < "%s" > "%s"' % (".tmp1", ".tmp3", ".tmp4"), shell=True)
with open(".tmp4", "r") as f:
for line in f:
words = line.strip().split(" ")
if len(words) < 2: continue
if re.fullmatch(r"^[(.)]+$", words[1]) is None: continue
if len(words[1]) != len(sequence): continue
result.append(words[1])
return result
if __name__ == '__main__':
s151 = GetS151Dataset()
for data in s151:
print(data[1])
structures = GetLocalOptima(data[1])
for s in structures:
print(s)