lsalign_tools.py

import re
import numpy as np


def get_translation_matrix(matrix_path):
    '''
    Returns translation matrix from file generated by LSalign

    Parameters
    ----------
    matrix_path : str
        Path to LSalign matrix output

    Returns
    -------
    M : ndarray
        Translation matrix
    '''
    with open(matrix_path, 'r') as file:
        next(file)
        matrix_raw = np.array([next(file).split() for _ in range(3)])[:, 1:]
        M = matrix_raw.astype(np.float64)
    return M

def apply_translation(coord, M):
    '''
    Applies translation specified by the matrix to coordinate vector

    Parameters
    ----------
    coord : ndarray
        Coordinates vector
    M : ndarray
        Translation matrix

    Returns
    -------
    vector : ndarray
        New coordinates vector
    '''
    vector = np.ones(4)
    vector[1:] = coord
    vector = M.dot(vector)
    return vector

def apply_translation_explicitly(coord, M):
    '''
    Does the same thing as 'apply_translation', but exactly as 
    described in LSalign output matrix file

    Parameters
    ----------
    coord : ndarray
        Coordinates vector
    M : ndarray
        Translation matrix

    Returns
    -------
    vector : ndarray
        New coordinates vector
    '''
    x, y, z = coord
    vector = np.zeros(3)
    vector[0] = M[0,0] + x*M[0,1] + y*M[0,2] + z*M[0,3]
    vector[1] = M[1,0] + x*M[1,1] + y*M[1,2] + z*M[1,3]
    vector[2] = M[2,0] + x*M[2,1] + y*M[2,2] + z*M[2,3]
    
    return vector

def translate_pdb_structure(pdb_path, rotated_pdb_path, translation_matrix):
    '''
    Applies translation specified by the matrix to all atoms in pdb file
    and writes result to new pdb file

    Parameters
    ----------
    pdb_path : str
        Path to ligand pdb file
    rotated_pdb_path : str
        Path to output pdb file where to write rotated structure
    translation_matrix : ndarray
        Translation matrix
    '''
    pdb_atom_pattern = re.compile('((ATOM|HETATM)\s+)(\d+)(\s+)([A-Z]+)(.+\n)')
                
    pdb_content = []
    with open(pdb_path, 'r') as pdb:
        for line in pdb:
            match = pdb_atom_pattern.match(line)
            if match:
                coords = np.array(line.split()[5:8]).astype(np.float32)
                coords = apply_translation(coords, translation_matrix)
                coords_s = '{:>8.3f}{:>8.3f}{:>8.3f}'.format(*coords)
                line = line[:30] + coords_s + line[54:]
            pdb_content.append(line)
            
    with open(rotated_pdb_path, 'w') as pdb:
        pdb.writelines(pdb_content)

def get_alignment_report(alignment_report_path):
    '''
    Returns alignment report dict from LSalign console output

    Parameters
    ----------
    alignment_report_path : str
        Path to LSalign console output file

    Returns
    -------
    alignment_report : dict
        Alignment information and metrics
    '''
    with open(alignment_report_path, 'r') as alignment_report:
        next(alignment_report)
        header = next(alignment_report).split()
        next(alignment_report)
        values = next(alignment_report).split()
        
    values[2:8] = map(float, values[2:8])
    values[8:] = map(int, values[8:])
    alignment_report = dict(zip(header, values))
    
    return alignment_report

def extract_aligned_ligand(reference_pdb, alignment_pdb, output_pdb):
    '''
    Extracts aligned query ligand from pdb file generated by LSalign,
    restores atom names by comparing with the original ligand pdb file
    and writes result to new pdb file

    Parameters
    ----------
    reference_pdb : str
        Path to ligand pdb file
    alignment_pdb : str
        Path to pdb alignment file generated by LSalign
    output_pdb : str
        Path to output pdb file where to write extracted structure
    ''' 
    pdb_atom_pattern = re.compile('((ATOM|HETATM)\s+)(\d+)(\s+)([A-Z]+)(.+\n)')
                
    atom_lines = {}
    with open(reference_pdb, 'r') as pdb:
        for line in pdb:
            match = pdb_atom_pattern.match(line)
            if match:
                atom = match.groups()[2]
                atom_lines[atom] = line
    
    pdb_content = []
    with open(alignment_pdb, 'r') as pdb:
        for line in pdb:
            if line.startswith('TER'):
                break
            match = pdb_atom_pattern.match(line)
            atom = match.groups()[2]
            line = atom_lines[atom][:30] + line[30:]
            pdb_content.append(line)
            
    with open(output_pdb, 'w') as pdb:
        pdb.writelines(pdb_content)