From 89df26c20c30ddbbf015b035a0199788afd80b83 Mon Sep 17 00:00:00 2001 From: Alain Vaucher Date: Thu, 19 Sep 2024 22:12:22 +0200 Subject: [PATCH] Support for atom placeholders (as asterisks) in reaction SMILES (#57) --- rxnmapper/smiles_utils.py | 2 +- tests/test_mapper.py | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/rxnmapper/smiles_utils.py b/rxnmapper/smiles_utils.py index 9bd41ab..1b3ae10 100644 --- a/rxnmapper/smiles_utils.py +++ b/rxnmapper/smiles_utils.py @@ -89,7 +89,7 @@ def is_atom(token: str, special_tokens: List[str] = BAD_TOKS) -> bool: bool: True if atom, False if not """ bad_toks = set(special_tokens) - normal_atom = token[0].isalpha() or token[0] == "[" + normal_atom = token[0].isalpha() or token[0] == "[" or "*" in token is_bad = token in bad_toks return (not is_bad) and normal_atom diff --git a/tests/test_mapper.py b/tests/test_mapper.py index b09d374..86789af 100644 --- a/tests/test_mapper.py +++ b/tests/test_mapper.py @@ -121,3 +121,19 @@ def test_reaction_with_dative_bond(rxn_mapper: RXNMapper): results = rxn_mapper.get_attention_guided_atom_maps(rxns, canonicalize_rxns=False) assert_correct_maps(results, expected) + + +def test_reaction_with_asterisks(rxn_mapper: RXNMapper): + # Some reaction SMILES contains asterisks as atom placeholders + # especially if some of the asterisks were inside brackets. + rxns = ["[1*]C=C.O>>*CCO"] + + expected = [ + { + "mapped_rxn": "[1*:1][CH:2]=[CH2:3].[OH2:4]>>[*:1][CH2:2][CH2:3][OH:4]", + "confidence": 0.9988284870307568, + } + ] + + results = rxn_mapper.get_attention_guided_atom_maps(rxns, canonicalize_rxns=False) + assert_correct_maps(results, expected)