-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdbtext.py
executable file
·120 lines (94 loc) · 2.94 KB
/
pdbtext.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# encoding: utf-8
__doc__ = """
Text manipulation of PDB protein structure files.
This is a utility function providing common operations
applied to PDB files that can easily be done with text
processing.
"""
import data
def strip_lines(pdb_txt, tag_func):
new_lines = []
for line in pdb_txt.splitlines():
if tag_func(line):
continue
new_lines.append(line)
return '\n'.join(new_lines)
def strip_hydrogens(pdb_txt):
def strip_space_and_digits(s):
result = ""
for c in s:
if not (c.isdigit() or c is " "):
result += c
return result
new_lines = []
for line in pdb_txt.splitlines():
if line.startswith("ATOM"):
raw_atom_type = line[12:16]
element = strip_space_and_digits(raw_atom_type)[0]
if element == "H":
continue
new_lines.append(line)
return '\n'.join(new_lines)
def strip_solvent(pdb_txt):
new_lines = []
for line in pdb_txt.splitlines():
res_type = line[17:20].strip().upper()
if not res_type in data.solvent_res_types:
new_lines.append(line)
return '\n'.join(new_lines)
def renumber_residues(pdb_txt):
get_res_tag = lambda line: line[17:27]
sorted_res_tags = []
lines = pdb_txt.splitlines()
for line in lines:
if line.startswith('ATOM') or line.startswith('HETATM'):
tag = get_res_tag(line)
if tag not in sorted_res_tags:
sorted_res_tags.append(tag)
res_tag_to_new_resnum = {}
for i, tag in enumerate(sorted_res_tags):
res_tag_to_new_resnum[tag] = "%4d" % (i+1)
new_lines = []
for line in lines:
new_line = line
for start_tag in ['ATOM', 'ANISOU', 'HETATM']:
if line.startswith(start_tag):
tag = get_res_tag(line)
resnum = res_tag_to_new_resnum[tag]
resnum = '%4d ' % (int(resnum) % 10000)
new_line = line[:22] + resnum + line[27:]
new_lines.append(new_line)
txt = ''.join(l + '\n' for l in new_lines)
return txt
def strip_other_nmr_models(pdb_txt):
new_lines = []
for line in pdb_txt.splitlines():
new_lines.append(line)
if line.startswith("ENDMDL"):
break
return '\n'.join(new_lines)
def strip_alternative_atoms(pdb_txt):
new_lines = []
for line in pdb_txt.splitlines():
new_line = line
if line.startswith('ATOM'):
alt_loc = line[16]
if not alt_loc in [' ']:
if alt_loc in ['A', 'a']:
new_line = line[:16] + ' ' + line[17:]
else:
continue
new_lines.append(new_line)
return '\n'.join(new_lines)
def clean_pdb(in_pdb, out_pdb):
txt = open(in_pdb, 'r').read()
txt = strip_other_nmr_models(txt)
txt = strip_lines(txt, lambda l: l.startswith('HETATM'))
txt = strip_lines(txt, lambda l: l.startswith('ANISOU'))
txt = strip_lines(txt, lambda l: l.startswith('CONECT'))
txt = strip_lines(txt, lambda l: l.startswith('MASTER'))
txt = strip_solvent(txt)
txt = strip_alternative_atoms(txt)
txt = strip_hydrogens(txt)
txt = renumber_residues(txt)
open(out_pdb, 'w').write(txt)