forked from getcontacts/getcontacts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcontacts_to_flare.py
executable file
·324 lines (270 loc) · 12 KB
/
contacts_to_flare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
#!/usr/bin/env python3
"""
Takes a list of atomic contacts as input and generates a json representing a
temporal flare which can be visualized using flareplots.
A subset of interaction types can be selected using the --itype argument which
is formatted as a comma-separated list of abbreviations corresponding to
sb salt bridges
pc pi-cation
ps pi-stacking
ts t-stacking
vdw van der Waals
hb hydrogen bonds
lhb ligand hydrogen bonds
hbbb backbone-backbone hydrogen bonds
hbsb backbone-sidechain hydrogen bonds
hbss sidechain-sidechain hydrogen bonds
wb water bridges
wb2 extended water bridges
hls ligand-sidechain residue hydrogen bonds
hlb ligand-backbone residue hydrogen bonds
lwb ligand water bridges
lwb2 extended ligand water bridges
By default, the labels on the plot will reflect the residue identifier.
Optionally, a "flare-label" file can be supplied which indicates how residue
identifiers should be translated to flare-plot labels. The flare-label file is
a tab-separated text-file where each line has one field that indicates a colon-
separated residue identifier and one field that indicates the corresponding
flareplot label. Dots in the flareplot labels can be used to group and organize
labels. A valid flare-label file would look like this:
A:ARG:4 Root.Helix1.1x30
A:LYS:5 Root.Helix1.1x31
...
A:PRO:45 Root.Helix2.2x36
A:MET:46 Root.Helix2.2x37
...
The flare-label file can also act as a filter, as interactions between residues
that are not included in the file will be excluded from the plot. For
convenience it's not necessary to include the second column if the label file
is just used as a filter. A third column can be supplied indicating a color in
CSS-format (e.g. '#FF0000' or 'red').
"""
def main():
"""
Main function called once at the end of this module. Configures and parses command line arguments, parses input
files and generates output files.
"""
# Parse command line arguments
import argparse as ap
parser = ap.ArgumentParser(description=__doc__, formatter_class=ap.RawTextHelpFormatter)
optional = parser._action_groups.pop()
required = parser.add_argument_group('required arguments')
parser._action_groups.append(optional) # added this line
required.add_argument('--input',
required=True,
type=ap.FileType('r'),
help='A multi-frame contact-file generated by dynamic_contact.py')
required.add_argument('--output',
required=False,
type=ap.FileType('w'),
help='The json file to write flare to')
optional.add_argument('--itype',
required=False,
default="all",
type=str,
help='Interaction types to include (comma separated list) [default: all]')
optional.add_argument('--flarelabels',
required=False,
default=None,
type=ap.FileType('r'),
help='Flare-label file')
args = parser.parse_args()
if args.output:
print("Parsing %s contacts from %s" % (args.itype, args.input.name))
# Read contacts and generate graph
itypes = parse_itypes(args.itype)
contacts = parse_contacts(args.input, itypes)
labels = parse_flarelabels(args.flarelabels)
graph = create_graph(contacts, labels)
# Convert string to pretty printed JSON
import json
pretty_json = json.dumps(graph, indent=2)
# "frames" entries can contain a lot of digits so put those on a single line
import re
pretty_json = re.sub(r"(?<=\d,)\n *|(?<=\[)\n *(?=\d)|(?<=\d)\n *(?=\])", "", pretty_json, flags=re.MULTILINE)
# Write to output file
if args.output:
args.output.write(pretty_json)
args.output.close()
print("Done - wrote flare-json to %s" % args.output.name)
else:
print(pretty_json)
def parse_contacts(contact_file, itypes):
"""
Parses the contact file and returns it a list of atomic contacts. Atom strings are converted to tuples by splitting
on ":".
Parameters
----------
contact_file: file
Contact-file generated by dynamic_contacts.py
itypes: set of str
A set of interaction types to retain.
Returns
-------
list of tuples of (str, str, tuple, tuple [[, tuple], tuple])
Each entry contains a column of the contact_file, e.g.
`[("0", "hbbb", ("A", "ARG", "4", "H"), ("A", "PHE", "22", "O")), (..) ]`
"""
def parse_atom(atom_str):
atom_tokens = atom_str.split(":")
return tuple(atom_tokens)
ret = []
for line in contact_file:
line = line.strip()
if not line or line[0] == "#":
continue # Ignore empty and commented lines
# Check number of columns is correct and skip if interaction type is not specified
columns = line.split("\t")
if not len(columns) in range(4, 7):
raise AssertionError("Invalid interaction line: '"+line+"'")
if not columns[1] in itypes:
continue
# Parse atoms
columns[2] = parse_atom(columns[2])
columns[3] = parse_atom(columns[3])
if len(columns) >= 5:
columns[4] = parse_atom(columns[4])
if len(columns) == 6:
columns[5] = parse_atom(columns[5])
ret.append(tuple(columns))
contact_file.close()
return ret
def parse_flarelabels(label_file):
"""
Parses a flare-label file and generates a dictionary mapping residue identifiers (e.g. A:ARG:123) to a
user-specified label, trees that can be parsed by flareplots, and a color indicator for vertices.
Parameters
----------
label_file : file
A flare-label file where each line contains 2-3 columns formatted as
- CHAIN:RESN:RESI (e.g. A:ARG:123)
- [[TOPLEVEL.]MIDLEVEL.]LABEL (e.g. Receptor.Helix2.2x44)
- COLOR (e.g. #FF0000 or white)
Returns
-------
dict of str : (dict of str : str)
Keys are all residue identifiers and values are dicts that hold both the LABEL by itself (key "label", the full
tree-path (key "treepath") and a CSS-compatible color string (key "color").
Raises
------
AssertionError
if a residue identifier (CHAIN:RESN:RESI) is specified twice in the file, or if a LABEL appears twice.
"""
if label_file is None:
return None
ret = {}
flarelabels = set() # Only used to check for duplicates
for line in label_file:
line = line.strip()
if not line:
continue # Ignore empty lines
columns = line.split("\t")
residentifier = columns[0]
flaretreepath = columns[1] if len(columns) > 1 else columns[0]
flarelabel = flaretreepath.split(".")[-1]
flarecolor = columns[2] if len(columns) > 2 else "white"
if residentifier in ret:
raise AssertionError("Residue identifier '"+residentifier+"' appears twice in "+label_file.name)
if flarelabel in flarelabels:
raise AssertionError("Flare label '"+flarelabel+"' used twice in "+label_file.name)
ret[residentifier] = {"label": flarelabel, "treepath": flaretreepath, "color": flarecolor}
flarelabels.add(flarelabel)
return ret
def parse_itypes(itype_argument):
"""Parses the itype argument and returns a set of strings with all the selected interaction types """
if "all" in itype_argument:
return ["sb", "pc", "ps", "ts", "vdw", "hb", "lhb", "hbbb", "hbsb",
"hbss", "wb", "wb2", "hls", "hlb", "lwb", "lwb2"]
return set(itype_argument.split(","))
def create_graph(contacts, resi_labels):
"""
Creates a graph from the contacts and residue labels formatted as a dict that can be trivially dumped to a json that
can be read by flareplot. An "edge" key mapping to a list of edge specifiers with "name1", "name2", and "frames"
attributes will always be generated and if `resi_labels` isn't `None` then the "trees" and "tracks" will be
generated as well.
Parameters
----------
contacts : list of tuples of (str, str, tuple, tuple [[, tuple], tuple])
Each entry specifies a frame-number, an interaction type, and 2 to 4 atom-tuples depending on the interaction
type. Water mediated and water-water mediated interactions will have waters in the third and fourth tuples.
resi_labels : dict of (str : dict of (str : str))
Each key is a residue identifier and the associated value is a dictionary with the label, tree-path, and color
that are consistent with the format of flareplots.
Returns
-------
dict of str : list
The types of the list contents varies depending on the key, but the format corresponds to the specifications of
jsons used as input for flareplot. For example
>>> {
>>> "edges": [
>>> {"name1": "ARG1", "name2": "ALA3", "frames": [0,4,10]},
>>> {"name1": "ALA3", "name2": "THR2", "frames": [1,2]}
>>> ],
>>> "trees": [
>>> {"treeName": "DefaultTree", "treePaths": ["Group1.ARG1", "Group1.THR2", "Group2.ALA3", "Group2.CYS4"]}
>>> ],
>>> "tracks": [
>>> {"trackName": "DefaultTrack", "trackProperties": [
>>> {"nodeName": "ARG1", "size": 1.0, "color": "red"},
>>> {"nodeName": "THR2", "size": 1.0, "color": "red"},
>>> {"nodeName": "ALA3", "size": 1.0, "color": "blue"},
>>> {"nodeName": "CYS4", "size": 1.0, "color": "blue"}
>>> ]}
>>> ]
>>> }
"""
ret = {
"edges": []
}
# print(resi_labels)
# Strip atom3, atom4, and atom names
# unique_chains = set([c[2][0] for c in contacts] + [c[3][0] for c in contacts])
contacts = [(c[0], c[1], c[2][0:3], c[3][0:3]) for c in contacts]
resi_edges = {}
for contact in contacts:
# Compose a key for atom1 and atom2 that ignores the order of residues
a1_key = ":".join(contact[2][0:3])
a2_key = ":".join(contact[3][0:3])
if a1_key == a2_key:
continue
if a1_key > a2_key:
a1_key, a2_key = a2_key, a1_key
contact_key = a1_key + a2_key
# Look up labels
if resi_labels:
if a1_key not in resi_labels or a2_key not in resi_labels:
print("Omitting contact "+str(contact)+" as it doesn't appear in flare-label file")
continue
a1_label = resi_labels[a1_key]["label"]
a2_label = resi_labels[a2_key]["label"]
else:
a1_label = a1_key
a2_label = a2_key
# Create contact_key if it doesn't exist
if contact_key not in resi_edges:
edge = {"name1": a1_label, "name2": a2_label, "frames": []}
resi_edges[contact_key] = edge
ret["edges"].append(edge)
resi_edges[contact_key]["frames"].append(int(contact[0]))
# Sort edge frames and ensure that there are no duplicates
for e in ret["edges"]:
e["frames"] = sorted(set(e["frames"]))
# Create "trees" and "tracks" sections if resi_labels specified
if resi_labels is not None:
tree = {"treeLabel": "DefaultTree", "treePaths": []}
ret["trees"] = [tree]
track = {"trackLabel": "DefaultTrack", "trackProperties": []}
ret["tracks"] = [track]
for rlabel in resi_labels.values():
tree["treePaths"].append(rlabel["treepath"])
track["trackProperties"].append({
"nodeName": rlabel["label"],
"color": rlabel["color"],
"size": 1.0
})
return ret
if __name__ == "__main__":
main()
__license__ = "Apache License 2.0"
__maintainer__ = "Rasmus Fonseca"
__email__ = "[email protected]"