-
Notifications
You must be signed in to change notification settings - Fork 1
/
lsalign_tools.py
164 lines (138 loc) · 4.5 KB
/
lsalign_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import re
import numpy as np
def get_translation_matrix(matrix_path):
'''
Returns translation matrix from file generated by LSalign
Parameters
----------
matrix_path : str
Path to LSalign matrix output
Returns
-------
M : ndarray
Translation matrix
'''
with open(matrix_path, 'r') as file:
next(file)
matrix_raw = np.array([next(file).split() for _ in range(3)])[:, 1:]
M = matrix_raw.astype(np.float64)
return M
def apply_translation(coord, M):
'''
Applies translation specified by the matrix to coordinate vector
Parameters
----------
coord : ndarray
Coordinates vector
M : ndarray
Translation matrix
Returns
-------
vector : ndarray
New coordinates vector
'''
vector = np.ones(4)
vector[1:] = coord
vector = M.dot(vector)
return vector
def apply_translation_explicitly(coord, M):
'''
Does the same thing as 'apply_translation', but exactly as
described in LSalign output matrix file
Parameters
----------
coord : ndarray
Coordinates vector
M : ndarray
Translation matrix
Returns
-------
vector : ndarray
New coordinates vector
'''
x, y, z = coord
vector = np.zeros(3)
vector[0] = M[0,0] + x*M[0,1] + y*M[0,2] + z*M[0,3]
vector[1] = M[1,0] + x*M[1,1] + y*M[1,2] + z*M[1,3]
vector[2] = M[2,0] + x*M[2,1] + y*M[2,2] + z*M[2,3]
return vector
def translate_pdb_structure(pdb_path, rotated_pdb_path, translation_matrix):
'''
Applies translation specified by the matrix to all atoms in pdb file
and writes result to new pdb file
Parameters
----------
pdb_path : str
Path to ligand pdb file
rotated_pdb_path : str
Path to output pdb file where to write rotated structure
translation_matrix : ndarray
Translation matrix
'''
pdb_atom_pattern = re.compile('((ATOM|HETATM)\s+)(\d+)(\s+)([A-Z]+)(.+\n)')
pdb_content = []
with open(pdb_path, 'r') as pdb:
for line in pdb:
match = pdb_atom_pattern.match(line)
if match:
coords = np.array(line.split()[5:8]).astype(np.float32)
coords = apply_translation(coords, translation_matrix)
coords_s = '{:>8.3f}{:>8.3f}{:>8.3f}'.format(*coords)
line = line[:30] + coords_s + line[54:]
pdb_content.append(line)
with open(rotated_pdb_path, 'w') as pdb:
pdb.writelines(pdb_content)
def get_alignment_report(alignment_report_path):
'''
Returns alignment report dict from LSalign console output
Parameters
----------
alignment_report_path : str
Path to LSalign console output file
Returns
-------
alignment_report : dict
Alignment information and metrics
'''
with open(alignment_report_path, 'r') as alignment_report:
next(alignment_report)
header = next(alignment_report).split()
next(alignment_report)
values = next(alignment_report).split()
values[2:8] = map(float, values[2:8])
values[8:] = map(int, values[8:])
alignment_report = dict(zip(header, values))
return alignment_report
def extract_aligned_ligand(reference_pdb, alignment_pdb, output_pdb):
'''
Extracts aligned query ligand from pdb file generated by LSalign,
restores atom names by comparing with the original ligand pdb file
and writes result to new pdb file
Parameters
----------
reference_pdb : str
Path to ligand pdb file
alignment_pdb : str
Path to pdb alignment file generated by LSalign
output_pdb : str
Path to output pdb file where to write extracted structure
'''
pdb_atom_pattern = re.compile('((ATOM|HETATM)\s+)(\d+)(\s+)([A-Z]+)(.+\n)')
atom_lines = {}
with open(reference_pdb, 'r') as pdb:
for line in pdb:
match = pdb_atom_pattern.match(line)
if match:
atom = match.groups()[2]
atom_lines[atom] = line
pdb_content = []
with open(alignment_pdb, 'r') as pdb:
for line in pdb:
if line.startswith('TER'):
break
match = pdb_atom_pattern.match(line)
atom = match.groups()[2]
line = atom_lines[atom][:30] + line[30:]
pdb_content.append(line)
with open(output_pdb, 'w') as pdb:
pdb.writelines(pdb_content)