Skip to content

Commit

Permalink
Add explicit_hydrogen parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Feb 7, 2025
1 parent 4c7204a commit f386ccb
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 10 deletions.
27 changes: 23 additions & 4 deletions src/biotite/interface/rdkit/mol.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def to_mol(
kekulize=False,
use_dative_bonds=False,
include_extra_annotations=(),
explicit_hydrogen=True,
):
"""
Convert an :class:`.AtomArray` or :class:`.AtomArrayStack` into a
Expand Down Expand Up @@ -105,6 +106,11 @@ def to_mol(
are always included per default. These standard annotations can be accessed
with :meth:`rdkit.Chem.rdchem.Atom.GetPDBResidueInfo()` for each atom in the
returned :class:`rdkit.Chem.rdchem.Mol`.
explicit_hydrogen : bool, optional
If set to true, the conversion process expects that all hydrogen atoms are
explicit, i.e. each each hydrogen atom must be part of the :class:`AtomArray`.
If set to false, the conversion process treats all hydrogen atoms as implicit
and all hydrogen atoms in the :class:`AtomArray` are removed.
Returns
-------
Expand Down Expand Up @@ -141,12 +147,24 @@ def to_mol(
HB3
HXT
"""
hydrogen_mask = atoms.element == "H"
if explicit_hydrogen:
if not hydrogen_mask.any():
warnings.warn(
"No hydrogen found in the input, although 'explicit_hydrogen' is 'True'"
)
else:
atoms = atoms[..., ~hydrogen_mask]

mol = EditableMol(Mol())

has_annot = frozenset(atoms.get_annotation_categories())
extra_annot = set(include_extra_annotations) - _STANDARD_ANNOTATIONS

for i in range(atoms.array_length()):
rdkit_atom = Atom(atoms.element[i].capitalize())
if explicit_hydrogen:
rdkit_atom.SetNoImplicit(True)
if "charge" in has_annot:
rdkit_atom.SetFormalCharge(atoms.charge[i].item())

Expand Down Expand Up @@ -176,11 +194,12 @@ def to_mol(

if atoms.bonds is None:
raise BadStructureError("An AtomArray with associated BondList is required")
bonds = atoms.bonds.as_array()
if kekulize:
bonds = bonds.copy()
bonds = atoms.bonds.copy()
bonds.remove_aromaticity()
for atom_i, atom_j, bond_type in atoms.bonds.as_array():
else:
bonds = atoms.bonds
for atom_i, atom_j, bond_type in bonds.as_array():
if not use_dative_bonds and bond_type == BondType.COORDINATION:
bond_type = BondType.SINGLE
mol.AddBond(
Expand Down Expand Up @@ -367,7 +386,7 @@ def from_mol(mol, conformer_id=None, add_hydrogen=None):
except KekulizeException:
warnings.warn(
"Kekulization failed, "
"using 'BondType.ANY' instead for aromatic bonds instead",
"using 'BondType.AROMATIC' instead for aromatic bonds instead",
LossyConversionWarning,
)
rdkit_bonds = list(mol.GetBonds())
Expand Down
10 changes: 4 additions & 6 deletions tests/interface/test_rdkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,9 @@ def _load_smiles():
return file.read().splitlines()


@pytest.mark.filterwarnings(
"ignore:"
"The coordinates are missing for some atoms. "
"The fallback coordinates will be used instead"
)
@pytest.mark.filterwarnings("ignore:Missing coordinates.*")
@pytest.mark.filterwarnings("ignore:.*coordinates are missing.*")
@pytest.mark.filterwarnings("ignore::biotite.interface.LossyConversionWarning")
@pytest.mark.parametrize(
"res_name", np.random.default_rng(0).choice(info.all_residues(), size=200).tolist()
)
Expand All @@ -46,7 +44,7 @@ def test_conversion_from_biotite(res_name):
# Some compounds in the CCD have missing coordinates
assert np.allclose(test_atoms.coord, ref_atoms.coord, equal_nan=True)

# There should be now undefined bonds
# There should be no undefined bonds
assert (test_atoms.bonds.as_array()[:, 2] != struc.BondType.ANY).all()
# Kekulization returns one of multiple resonance structures, so the returned one
# might not be the same as the input
Expand Down

0 comments on commit f386ccb

Please sign in to comment.