Skip to content

Commit

Permalink
Use canonical rdkit import
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Feb 7, 2025
1 parent f386ccb commit 167c31a
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 50 deletions.
20 changes: 7 additions & 13 deletions doc/tutorial/interface/rdkit.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,14 @@ For a proper structural formula, we need to compute proper 2D coordinates first.

import biotite.interface.rdkit as rdkit_interface
import biotite.structure.info as struc
import rdkit.Chem.AllChem as Chem
from rdkit.Chem.Draw import MolToImage
from rdkit.Chem.rdDepictor import Compute2DCoords
from rdkit.Chem.rdmolops import RemoveHs

penicillin = struc.residue("PNN")
mol = rdkit_interface.to_mol(penicillin)
# We do not want to include explicit hydrogen atoms in the structural formula
mol = RemoveHs(mol)
Compute2DCoords(mol)
mol = Chem.RemoveHs(mol)
Chem.Compute2DCoords(mol)
image = MolToImage(mol, size=(600, 400))
display(image)

Expand All @@ -49,18 +48,13 @@ One way to to obtain them as :class:`.AtomArray` is passing a *SMILES* string to

.. jupyter-execute::

from rdkit.Chem import MolFromSmiles
from rdkit.Chem.rdDistGeom import EmbedMolecule
from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule
from rdkit.Chem.rdmolops import AddHs

ERTAPENEM_SMILES = "C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)[C@@H](C)O"

mol = MolFromSmiles(ERTAPENEM_SMILES)
mol = Chem.MolFromSmiles(ERTAPENEM_SMILES)
# RDKit uses implicit hydrogen atoms by default, but Biotite requires explicit ones
mol = AddHs(mol)
mol = Chem.AddHs(mol)
# Create a 3D conformer
conformer_id = EmbedMolecule(mol)
UFFOptimizeMolecule(mol)
conformer_id = Chem.EmbedMolecule(mol)
Chem.UFFOptimizeMolecule(mol)
ertapenem = rdkit_interface.from_mol(mol, conformer_id)
print(ertapenem)
66 changes: 29 additions & 37 deletions src/biotite/interface/rdkit/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,8 @@
import warnings
from collections import defaultdict
import numpy as np
from rdkit.Chem.rdchem import (
Atom,
AtomPDBResidueInfo,
Conformer,
EditableMol,
KekulizeException,
Mol,
)
from rdkit.Chem.rdchem import BondType as RDKitBondType
from rdkit.Chem.rdmolops import AddHs, Kekulize, SanitizeFlags, SanitizeMol
import rdkit.Chem.AllChem as Chem
from rdkit.Chem import SanitizeFlags
from biotite.interface.version import requires_version
from biotite.interface.warning import LossyConversionWarning
from biotite.structure.atoms import AtomArray, AtomArrayStack
Expand All @@ -31,26 +23,26 @@
BondType.TRIPLE: BondType.AROMATIC_TRIPLE,
}
_BIOTITE_TO_RDKIT_BOND_TYPE = {
BondType.ANY: RDKitBondType.UNSPECIFIED,
BondType.SINGLE: RDKitBondType.SINGLE,
BondType.DOUBLE: RDKitBondType.DOUBLE,
BondType.TRIPLE: RDKitBondType.TRIPLE,
BondType.QUADRUPLE: RDKitBondType.QUADRUPLE,
BondType.AROMATIC_SINGLE: RDKitBondType.AROMATIC,
BondType.AROMATIC_DOUBLE: RDKitBondType.AROMATIC,
BondType.AROMATIC_TRIPLE: RDKitBondType.AROMATIC,
BondType.AROMATIC: RDKitBondType.AROMATIC,
BondType.ANY: Chem.BondType.UNSPECIFIED,
BondType.SINGLE: Chem.BondType.SINGLE,
BondType.DOUBLE: Chem.BondType.DOUBLE,
BondType.TRIPLE: Chem.BondType.TRIPLE,
BondType.QUADRUPLE: Chem.BondType.QUADRUPLE,
BondType.AROMATIC_SINGLE: Chem.BondType.AROMATIC,
BondType.AROMATIC_DOUBLE: Chem.BondType.AROMATIC,
BondType.AROMATIC_TRIPLE: Chem.BondType.AROMATIC,
BondType.AROMATIC: Chem.BondType.AROMATIC,
# Dative bonds may lead to a KekulizeException and may potentially be deprecated
# in the future (https://github.com/rdkit/rdkit/discussions/6995)
BondType.COORDINATION: RDKitBondType.SINGLE,
BondType.COORDINATION: Chem.BondType.SINGLE,
}
_RDKIT_TO_BIOTITE_BOND_TYPE = {
RDKitBondType.UNSPECIFIED: BondType.ANY,
RDKitBondType.SINGLE: BondType.SINGLE,
RDKitBondType.DOUBLE: BondType.DOUBLE,
RDKitBondType.TRIPLE: BondType.TRIPLE,
RDKitBondType.QUADRUPLE: BondType.QUADRUPLE,
RDKitBondType.DATIVE: BondType.COORDINATION,
Chem.BondType.UNSPECIFIED: BondType.ANY,
Chem.BondType.SINGLE: BondType.SINGLE,
Chem.BondType.DOUBLE: BondType.DOUBLE,
Chem.BondType.TRIPLE: BondType.TRIPLE,
Chem.BondType.QUADRUPLE: BondType.QUADRUPLE,
Chem.BondType.DATIVE: BondType.COORDINATION,
}
_STANDARD_ANNOTATIONS = frozenset(
{
Expand Down Expand Up @@ -156,20 +148,20 @@ def to_mol(
else:
atoms = atoms[..., ~hydrogen_mask]

mol = EditableMol(Mol())
mol = Chem.EditableMol(Chem.Mol())

has_annot = frozenset(atoms.get_annotation_categories())
extra_annot = set(include_extra_annotations) - _STANDARD_ANNOTATIONS

for i in range(atoms.array_length()):
rdkit_atom = Atom(atoms.element[i].capitalize())
rdkit_atom = Chem.Atom(atoms.element[i].capitalize())
if explicit_hydrogen:
rdkit_atom.SetNoImplicit(True)
if "charge" in has_annot:
rdkit_atom.SetFormalCharge(atoms.charge[i].item())

# add standard pdb annotations
rdkit_atom_res_info = AtomPDBResidueInfo(
rdkit_atom_res_info = Chem.AtomPDBResidueInfo(
atomName=atoms.atom_name[i].item(),
residueName=atoms.res_name[i].item(),
chainId=atoms.chain_id[i].item(),
Expand Down Expand Up @@ -213,7 +205,7 @@ def to_mol(
# Handle AtomArray and AtomArrayStack consistently
coord = coord[None, :, :]
for model_coord in coord:
conformer = Conformer(mol.GetNumAtoms())
conformer = Chem.Conformer(mol.GetNumAtoms())
# RDKit silently expects the data to be in C-contiguous order
# Otherwise the coordinates would be completely misassigned
# (https://github.com/rdkit/rdkit/issues/8221)
Expand Down Expand Up @@ -290,8 +282,8 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
if add_hydrogen is None:
add_hydrogen = not _has_explicit_hydrogen(mol)
if add_hydrogen:
SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS)
mol = AddHs(mol, addCoords=False, addResidueInfo=False)
Chem.SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS)
mol = Chem.AddHs(mol, addCoords=False, addResidueInfo=False)

rdkit_atoms = mol.GetAtoms()
if rdkit_atoms is None:
Expand Down Expand Up @@ -328,7 +320,7 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
residue_info = rdkit_atom.GetPDBResidueInfo()
if residue_info is None:
# ... default values for atoms with missing residue information
residue_info = AtomPDBResidueInfo(
residue_info = Chem.AtomPDBResidueInfo(
atomName="",
occupancy=0.0,
tempFactor=float("nan"),
Expand Down Expand Up @@ -375,15 +367,15 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):

rdkit_bonds = list(mol.GetBonds())
is_aromatic = np.array(
[bond.GetBondType() == RDKitBondType.AROMATIC for bond in rdkit_bonds]
[bond.GetBondType() == Chem.BondType.AROMATIC for bond in rdkit_bonds]
)
if np.any(is_aromatic):
# Determine the kekulized order of aromatic bonds
# Copy as 'Kekulize()' modifies the molecule in-place
mol = Mol(mol)
mol = Chem.Mol(mol)
try:
Kekulize(mol)
except KekulizeException:
Chem.Kekulize(mol)
except Chem.KekulizeException:
warnings.warn(
"Kekulization failed, "
"using 'BondType.AROMATIC' instead for aromatic bonds instead",
Expand Down

0 comments on commit 167c31a

Please sign in to comment.