From 29565c42b5d0b276515f090422cdb5dd0580b275 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Sun, 2 Feb 2025 11:15:24 +0100 Subject: [PATCH] Use canonical rdkit import --- doc/tutorial/interface/rdkit.rst | 20 ++++------- src/biotite/interface/rdkit/mol.py | 55 +++++++++++++++--------------- 2 files changed, 34 insertions(+), 41 deletions(-) diff --git a/doc/tutorial/interface/rdkit.rst b/doc/tutorial/interface/rdkit.rst index ff9954a52..e91ee55d2 100644 --- a/doc/tutorial/interface/rdkit.rst +++ b/doc/tutorial/interface/rdkit.rst @@ -27,15 +27,14 @@ For a proper structural formula, we need to compute proper 2D coordinates first. import biotite.interface.rdkit as rdkit_interface import biotite.structure.info as struc + import rdkit.Chem.AllChem as Chem from rdkit.Chem.Draw import MolToImage - from rdkit.Chem.rdDepictor import Compute2DCoords - from rdkit.Chem.rdmolops import RemoveHs penicillin = struc.residue("PNN") mol = rdkit_interface.to_mol(penicillin) # We do not want to include explicit hydrogen atoms in the structural formula - mol = RemoveHs(mol) - Compute2DCoords(mol) + mol = Chem.RemoveHs(mol) + Chem.Compute2DCoords(mol) image = MolToImage(mol, size=(600, 400)) display(image) @@ -49,18 +48,13 @@ One way to to obtain them as :class:`.AtomArray` is passing a *SMILES* string to .. jupyter-execute:: - from rdkit.Chem import MolFromSmiles - from rdkit.Chem.rdDistGeom import EmbedMolecule - from rdkit.Chem.rdForceFieldHelpers import UFFOptimizeMolecule - from rdkit.Chem.rdmolops import AddHs - ERTAPENEM_SMILES = "C[C@@H]1[C@@H]2[C@H](C(=O)N2C(=C1S[C@H]3C[C@H](NC3)C(=O)NC4=CC=CC(=C4)C(=O)O)C(=O)O)[C@@H](C)O" - mol = MolFromSmiles(ERTAPENEM_SMILES) + mol = Chem.MolFromSmiles(ERTAPENEM_SMILES) # RDKit uses implicit hydrogen atoms by default, but Biotite requires explicit ones - mol = AddHs(mol) + mol = Chem.AddHs(mol) # Create a 3D conformer - conformer_id = EmbedMolecule(mol) - UFFOptimizeMolecule(mol) + conformer_id = Chem.EmbedMolecule(mol) + Chem.UFFOptimizeMolecule(mol) ertapenem = rdkit_interface.from_mol(mol, conformer_id) print(ertapenem) \ No newline at end of file diff --git a/src/biotite/interface/rdkit/mol.py b/src/biotite/interface/rdkit/mol.py index 8001fa17f..c93ae1e74 100644 --- a/src/biotite/interface/rdkit/mol.py +++ b/src/biotite/interface/rdkit/mol.py @@ -9,9 +9,8 @@ import warnings from collections import defaultdict import numpy as np -from rdkit.Chem.rdchem import Atom, Conformer, EditableMol, KekulizeException, Mol -from rdkit.Chem.rdchem import BondType as RDKitBondType -from rdkit.Chem.rdmolops import AddHs, Kekulize, SanitizeFlags, SanitizeMol +import rdkit.Chem.AllChem as Chem +from rdkit.Chem import SanitizeFlags from biotite.interface.version import requires_version from biotite.interface.warning import LossyConversionWarning from biotite.structure.atoms import AtomArray, AtomArrayStack @@ -24,26 +23,26 @@ BondType.TRIPLE: BondType.AROMATIC_TRIPLE, } _BIOTITE_TO_RDKIT_BOND_TYPE = { - BondType.ANY: RDKitBondType.UNSPECIFIED, - BondType.SINGLE: RDKitBondType.SINGLE, - BondType.DOUBLE: RDKitBondType.DOUBLE, - BondType.TRIPLE: RDKitBondType.TRIPLE, - BondType.QUADRUPLE: RDKitBondType.QUADRUPLE, - BondType.AROMATIC_SINGLE: RDKitBondType.AROMATIC, - BondType.AROMATIC_DOUBLE: RDKitBondType.AROMATIC, - BondType.AROMATIC_TRIPLE: RDKitBondType.AROMATIC, - BondType.AROMATIC: RDKitBondType.AROMATIC, + BondType.ANY: Chem.BondType.UNSPECIFIED, + BondType.SINGLE: Chem.BondType.SINGLE, + BondType.DOUBLE: Chem.BondType.DOUBLE, + BondType.TRIPLE: Chem.BondType.TRIPLE, + BondType.QUADRUPLE: Chem.BondType.QUADRUPLE, + BondType.AROMATIC_SINGLE: Chem.BondType.AROMATIC, + BondType.AROMATIC_DOUBLE: Chem.BondType.AROMATIC, + BondType.AROMATIC_TRIPLE: Chem.BondType.AROMATIC, + BondType.AROMATIC: Chem.BondType.AROMATIC, # Dative bonds may lead to a KekulizeException and may potentially be deprecated # in the future (https://github.com/rdkit/rdkit/discussions/6995) - BondType.COORDINATION: RDKitBondType.SINGLE, + BondType.COORDINATION: Chem.BondType.SINGLE, } _RDKIT_TO_BIOTITE_BOND_TYPE = { - RDKitBondType.UNSPECIFIED: BondType.ANY, - RDKitBondType.SINGLE: BondType.SINGLE, - RDKitBondType.DOUBLE: BondType.DOUBLE, - RDKitBondType.TRIPLE: BondType.TRIPLE, - RDKitBondType.QUADRUPLE: BondType.QUADRUPLE, - RDKitBondType.DATIVE: BondType.COORDINATION, + Chem.BondType.UNSPECIFIED: BondType.ANY, + Chem.BondType.SINGLE: BondType.SINGLE, + Chem.BondType.DOUBLE: BondType.DOUBLE, + Chem.BondType.TRIPLE: BondType.TRIPLE, + Chem.BondType.QUADRUPLE: BondType.QUADRUPLE, + Chem.BondType.DATIVE: BondType.COORDINATION, } @@ -128,11 +127,11 @@ def to_mol( else: atoms = atoms[..., ~hydrogen_mask] - mol = EditableMol(Mol()) + mol = Chem.EditableMol(Chem.Mol()) has_charge_annot = "charge" in atoms.get_annotation_categories() for i in range(atoms.array_length()): - rdkit_atom = Atom(atoms.element[i].capitalize()) + rdkit_atom = Chem.Atom(atoms.element[i].capitalize()) if has_charge_annot: rdkit_atom.SetFormalCharge(atoms.charge[i].item()) if explicit_hydrogen: @@ -162,7 +161,7 @@ def to_mol( # Handle AtomArray and AtomArrayStack consistently coord = coord[None, :, :] for model_coord in coord: - conformer = Conformer(mol.GetNumAtoms()) + conformer = Chem.Conformer(mol.GetNumAtoms()) # RDKit silently expects the data to be in C-contiguous order # Otherwise the coordinates would be completely misassigned # (https://github.com/rdkit/rdkit/issues/8221) @@ -239,8 +238,8 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): if add_hydrogen is None: add_hydrogen = not _has_explicit_hydrogen(mol) if add_hydrogen: - SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS) - mol = AddHs(mol) + Chem.SanitizeMol(mol, SanitizeFlags.SANITIZE_ADJUSTHS) + mol = Chem.AddHs(mol) rdkit_atoms = mol.GetAtoms() if rdkit_atoms is None: @@ -274,15 +273,15 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None): rdkit_bonds = list(mol.GetBonds()) is_aromatic = np.array( - [bond.GetBondType() == RDKitBondType.AROMATIC for bond in rdkit_bonds] + [bond.GetBondType() == Chem.BondType.AROMATIC for bond in rdkit_bonds] ) if np.any(is_aromatic): # Determine the kekulized order of aromatic bonds # Copy as 'Kekulize()' modifies the molecule in-place - mol = Mol(mol) + mol = Chem.Mol(mol) try: - Kekulize(mol) - except KekulizeException: + Chem.Kekulize(mol) + except Chem.KekulizeException: warnings.warn( "Kekulization failed, " "using 'BondType.AROMATIC' instead for aromatic bonds instead",