Skip to content

Commit

Permalink
Improve synonym and synonym typedef handling (#248)
Browse files Browse the repository at this point in the history
1. Switch storage in the Synonym class to only keep a reference to the
synonym type definition
2. Add warnings when a synonym type definition isn't defined in the
ontology but you try outputting it to OBO lines
  • Loading branch information
cthoyt authored Nov 25, 2024
1 parent ae40806 commit f917529
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 35 deletions.
10 changes: 5 additions & 5 deletions src/pyobo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,23 +449,23 @@ def _extract_synonym(
rest = rest[len(_specificity) :].strip()
break

stype: SynonymTypeDef | None = None
stype: Reference | None = None
for _stype in synonym_typedefs.values():
# Since there aren't a lot of carefully defined synonym definitions, it
# can appear as a string or curie. Therefore, we might see temporary prefixes
# get added, so we should check against full curies as well as local unique
# identifiers
if rest.startswith(_stype.curie):
rest = rest[len(_stype.curie) :].strip()
stype = _stype
stype = _stype.reference
break
elif rest.startswith(_stype.preferred_curie):
rest = rest[len(_stype.preferred_curie) :].strip()
stype = _stype
stype = _stype.reference
break
elif rest.startswith(_stype.identifier):
rest = rest[len(_stype.identifier) :].strip()
stype = _stype
stype = _stype.reference
break

if not rest.startswith("[") or not rest.endswith("]"):
Expand All @@ -477,7 +477,7 @@ def _extract_synonym(
return Synonym(
name=name,
specificity=specificity or "EXACT",
type=stype or DEFAULT_SYNONYM_TYPE,
type=stype or DEFAULT_SYNONYM_TYPE.reference,
provenance=provenance,
)

Expand Down
75 changes: 63 additions & 12 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ class Synonym:
specificity: SynonymSpecificity = "EXACT"

#: The type of synonym. Must be defined in OBO document!
type: SynonymTypeDef = field(
default_factory=lambda: DEFAULT_SYNONYM_TYPE # type:ignore
type: Reference = field(
default_factory=lambda: DEFAULT_SYNONYM_TYPE.reference # type:ignore
)

#: References to articles where the synonym appears
Expand All @@ -117,14 +117,21 @@ def __lt__(self, other: Synonym) -> bool:
"""Sort lexically by name."""
return self._sort_key() < other._sort_key()

def _sort_key(self) -> tuple[str, str, SynonymTypeDef]:
def _sort_key(self) -> tuple[str, str, Reference]:
return self.name, self.specificity, self.type

def to_obo(self, ontology_prefix: str) -> str:
def to_obo(
self, ontology_prefix: str, synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef]
) -> str:
"""Write this synonym as an OBO line to appear in a [Term] stanza."""
return f"synonym: {self._fp(ontology_prefix)}"

def _fp(self, ontology_prefix: str) -> str:
return f"synonym: {self._fp(ontology_prefix, synonym_typedefs)}"

def _fp(
self, ontology_prefix: str, synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef]
) -> str:
_synonym_typedef_warn(ontology_prefix, self.type, synonym_typedefs)
# TODO inherit specificity from typedef?
# TODO validation of specificity against typedef
x = f'"{self._escape(self.name)}" {self.specificity}'
if self.type and self.type.pair != DEFAULT_SYNONYM_TYPE.pair:
x = f"{x} {reference_escape(self.type, ontology_prefix=ontology_prefix)}"
Expand Down Expand Up @@ -303,15 +310,19 @@ def append_synonym(
self,
synonym: str | Synonym,
*,
type: SynonymTypeDef | None = None,
type: Reference | Referenced | None = None,
specificity: SynonymSpecificity | None = None,
provenance: list[Reference] | None = None,
) -> None:
"""Add a synonym."""
if type is None:
type = DEFAULT_SYNONYM_TYPE.reference
elif isinstance(type, Referenced):
type = type.reference
if isinstance(synonym, str):
synonym = Synonym(
synonym,
type=type or DEFAULT_SYNONYM_TYPE,
type=type,
specificity=specificity or "EXACT",
provenance=provenance or [],
)
Expand Down Expand Up @@ -508,6 +519,7 @@ def iterate_obo_lines(
*,
ontology_prefix: str,
typedefs: Mapping[ReferenceTuple, TypeDef],
synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] | None = None,
emit_object_properties: bool = True,
emit_annotation_properties: bool = True,
) -> Iterable[str]:
Expand Down Expand Up @@ -546,8 +558,10 @@ def iterate_obo_lines(
for line in self._emit_properties(ontology_prefix, typedefs):
yield f"property_value: {line}"

if synonym_typedefs is None:
synonym_typedefs = {}
for synonym in sorted(self.synonyms):
yield synonym.to_obo(ontology_prefix=ontology_prefix)
yield synonym.to_obo(ontology_prefix=ontology_prefix, synonym_typedefs=synonym_typedefs)

def _emit_relations(
self, ontology_prefix: str, typedefs: Mapping[ReferenceTuple, TypeDef]
Expand Down Expand Up @@ -617,14 +631,41 @@ def _typedef_warn(
# Throw our hands up in the air. By using `obo` as the prefix,
# we already threw using "real" definitions out the window
logger.warning(
f"[{prefix}] predicate with obo prefix not defined: {predicate.curie}."
f"[{prefix}] predicate with OBO prefix not defined: {predicate.curie}."
f"\n\tThis might be because you used an unqualified prefix in an OBO file, "
f"which automatically gets an OBO prefix."
)
else:
logger.warning(f"[{prefix}] typedef not defined: {predicate.curie}")


#: A set of warnings, used to make sure we don't show the same one over and over
_SYNONYM_TYPEDEF_WARNINGS: set[tuple[str, Reference]] = set()


def _synonym_typedef_warn(
prefix: str, predicate: Reference, synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef]
) -> bool:
if predicate.pair == DEFAULT_SYNONYM_TYPE.pair:
return False
if predicate.pair in default_typedefs or predicate.pair in synonym_typedefs:
return False
key = prefix, predicate
if key not in _SYNONYM_TYPEDEF_WARNINGS:
_SYNONYM_TYPEDEF_WARNINGS.add(key)
if predicate.prefix == "obo":
# Throw our hands up in the air. By using `obo` as the prefix,
# we already threw using "real" definitions out the window
logger.warning(
f"[{prefix}] synonym typedef with OBO prefix not defined: {predicate.preferred_curie}."
f"\n\tThis might be because you used an unqualified prefix in an OBO file, "
f"which automatically gets an OBO prefix."
)
else:
logger.warning(f"[{prefix}] synonym typedef not defined: {predicate.preferred_curie}")
return True


class BioregistryError(ValueError):
"""An error raised for non-canonical prefixes."""

Expand Down Expand Up @@ -864,10 +905,12 @@ def iterate_obo_lines(
yield from typedef.iterate_obo_lines(ontology_prefix=self.ontology)

typedefs = self._index_typedefs()
synonym_typedefs = self._index_synonym_typedefs()
for term in self:
yield from term.iterate_obo_lines(
ontology_prefix=self.ontology,
typedefs=typedefs,
synonym_typedefs=synonym_typedefs,
emit_object_properties=emit_object_properties,
emit_annotation_properties=emit_annotation_properties,
)
Expand All @@ -878,6 +921,12 @@ def _index_typedefs(self) -> Mapping[ReferenceTuple, TypeDef]:
default_typedefs,
)

def _index_synonym_typedefs(self) -> Mapping[ReferenceTuple, SynonymTypeDef]:
return ChainMap(
{t.pair: t for t in self.synonym_typedefs or []},
default_synonym_typedefs,
)

def write_obo(
self,
file: None | str | TextIO | Path = None,
Expand Down Expand Up @@ -1154,6 +1203,7 @@ def to_obonet(self: Obo, *, use_tqdm: bool = False) -> nx.MultiDiGraph:
#: a list of 3-tuples u,v,k
links = []
typedefs = self._index_typedefs()
synonym_typedefs = self._index_synonym_typedefs()
for term in self._iter_terms(use_tqdm=use_tqdm):
parents = []
for parent in term.parents:
Expand All @@ -1179,7 +1229,8 @@ def to_obonet(self: Obo, *, use_tqdm: bool = False) -> nx.MultiDiGraph:
"is_a": parents,
"relationship": relations,
"synonym": [
synonym._fp(ontology_prefix=self.ontology) for synonym in term.synonyms
synonym._fp(ontology_prefix=self.ontology, synonym_typedefs=synonym_typedefs)
for synonym in term.synonyms
],
"property_value": list(term._emit_properties(self.ontology, typedefs)),
}
Expand Down
12 changes: 6 additions & 6 deletions tests/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_extract_synonym(self):
Synonym(
name="LTEC I",
specificity="EXACT",
type=iupac_name,
type=iupac_name.reference,
provenance=[Reference(prefix="orphanet", identifier="93938")],
),
'"LTEC I" EXACT IUPAC_NAME [Orphanet:93938]',
Expand All @@ -144,19 +144,19 @@ def test_extract_synonym(self):
'"LTEC I" []',
),
(
Synonym(name="HAdV-A", specificity="BROAD", type=acronym),
Synonym(name="HAdV-A", specificity="BROAD", type=acronym.reference),
'"HAdV-A" BROAD OMO:0003012 []',
),
(
Synonym(name="HAdV-A", specificity="BROAD", type=acronym),
Synonym(name="HAdV-A", specificity="BROAD", type=acronym.reference),
'"HAdV-A" BROAD omo:0003012 []',
),
(
Synonym(name="HAdV-A", specificity="EXACT", type=acronym),
Synonym(name="HAdV-A", specificity="EXACT", type=acronym.reference),
'"HAdV-A" OMO:0003012 []',
),
(
Synonym(name="HAdV-A", specificity="EXACT", type=acronym),
Synonym(name="HAdV-A", specificity="EXACT", type=acronym.reference),
'"HAdV-A" omo:0003012 []',
),
]:
Expand Down Expand Up @@ -193,7 +193,7 @@ def test_get_node_synonyms(self):
"N,N,N-tributylbutan-1-aminium fluoride", synonym.name, msg="name parsing failed"
)
self.assertEqual("EXACT", synonym.specificity, msg="specificity parsing failed")
self.assertEqual(iupac_name, synonym.type)
self.assertEqual(iupac_name.reference, synonym.type)

def test_get_node_properties(self):
"""Test getting properties from a node in a :mod:`obonet` graph."""
Expand Down
18 changes: 9 additions & 9 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ def test_synonym_minimal(self) -> None:
synonym = term.synonyms[0]
self.assertEqual("LTEC I", synonym.name)
self.assertEqual("EXACT", synonym.specificity)
self.assertEqual(DEFAULT_SYNONYM_TYPE, synonym.type)
self.assertEqual(DEFAULT_SYNONYM_TYPE.reference, synonym.type)
self.assertEqual([], synonym.provenance)

def test_synonym_with_specificity(self) -> None:
Expand All @@ -585,7 +585,7 @@ def test_synonym_with_specificity(self) -> None:
synonym = term.synonyms[0]
self.assertEqual("LTEC I", synonym.name)
self.assertEqual("NARROW", synonym.specificity)
self.assertEqual(DEFAULT_SYNONYM_TYPE, synonym.type)
self.assertEqual(DEFAULT_SYNONYM_TYPE.reference, synonym.type)
self.assertEqual([], synonym.provenance)

def test_synonym_with_type_missing_def(self) -> None:
Expand All @@ -601,7 +601,7 @@ def test_synonym_with_type_missing_def(self) -> None:
self.assertEqual(1, len(term.synonyms))
synonym = term.synonyms[0]
# this is because no typedef existed
self.assertEqual(DEFAULT_SYNONYM_TYPE, synonym.type)
self.assertEqual(DEFAULT_SYNONYM_TYPE.reference, synonym.type)

def test_synonym_with_type(self) -> None:
"""Test parsing a synonym with type."""
Expand All @@ -618,7 +618,7 @@ def test_synonym_with_type(self) -> None:
synonym = term.synonyms[0]
self.assertEqual("LTEC I", synonym.name)
self.assertEqual("EXACT", synonym.specificity)
self.assertEqual(Reference(prefix="omo", identifier="1234567"), synonym.type.reference)
self.assertEqual(Reference(prefix="omo", identifier="1234567"), synonym.type)
self.assertEqual([], synonym.provenance)

def test_synonym_with_type_and_specificity(self) -> None:
Expand All @@ -636,7 +636,7 @@ def test_synonym_with_type_and_specificity(self) -> None:
synonym = term.synonyms[0]
self.assertEqual("LTEC I", synonym.name)
self.assertEqual("NARROW", synonym.specificity)
self.assertEqual(Reference(prefix="omo", identifier="1234567"), synonym.type.reference)
self.assertEqual(Reference(prefix="omo", identifier="1234567"), synonym.type)
self.assertEqual([], synonym.provenance)

def test_synonym_with_empty_prov(self) -> None:
Expand All @@ -654,7 +654,7 @@ def test_synonym_with_empty_prov(self) -> None:
synonym = term.synonyms[0]
self.assertEqual("LTEC I", synonym.name)
self.assertEqual("NARROW", synonym.specificity)
self.assertEqual(Reference(prefix="omo", identifier="1234567"), synonym.type.reference)
self.assertEqual(Reference(prefix="omo", identifier="1234567"), synonym.type)
self.assertEqual([], synonym.provenance)

def test_synonym_no_type(self) -> None:
Expand All @@ -671,7 +671,7 @@ def test_synonym_no_type(self) -> None:
synonym = term.synonyms[0]
self.assertEqual("LTEC I", synonym.name)
self.assertEqual("EXACT", synonym.specificity)
self.assertEqual(DEFAULT_SYNONYM_TYPE, synonym.type)
self.assertEqual(DEFAULT_SYNONYM_TYPE.reference, synonym.type)
self.assertEqual(
[
Reference(prefix="orphanet", identifier="93938"),
Expand All @@ -695,7 +695,7 @@ def test_synonym_full(self) -> None:
synonym = term.synonyms[0]
self.assertEqual("LTEC I", synonym.name)
self.assertEqual("EXACT", synonym.specificity)
self.assertEqual(Reference(prefix="omo", identifier="1234567"), synonym.type.reference)
self.assertEqual(Reference(prefix="omo", identifier="1234567"), synonym.type)
self.assertEqual(
[
Reference(prefix="orphanet", identifier="93938"),
Expand All @@ -719,7 +719,7 @@ def test_synonym_url(self) -> None:
synonym = term.synonyms[0]
self.assertEqual("LTEC I", synonym.name)
self.assertEqual("EXACT", synonym.specificity)
self.assertEqual(Reference(prefix="omo", identifier="1234567"), synonym.type.reference)
self.assertEqual(Reference(prefix="omo", identifier="1234567"), synonym.type)
self.assertEqual(
[
Reference(prefix="orphanet", identifier="93938"),
Expand Down
31 changes: 28 additions & 3 deletions tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -374,11 +374,11 @@ def test_append_synonym(self) -> None:
term.iterate_obo_lines(ontology_prefix="go", typedefs={RO_DUMMY.pair: RO_DUMMY}),
)

omo_dummy = SynonymTypeDef(reference=Reference(prefix="OMO", identifier="1234567"))
term = Term(LYSINE_DEHYDROGENASE_ACT)
term.append_synonym(
"L-lysine:NAD+ oxidoreductase",
# TODO switch to bare reference
type=SynonymTypeDef(reference=Reference(prefix="OMO", identifier="1234567")),
type=omo_dummy,
provenance=[Reference(prefix="orcid", identifier="0000-0003-4423-4370")],
)
self.assert_lines(
Expand All @@ -388,7 +388,32 @@ def test_append_synonym(self) -> None:
name: lysine dehydrogenase activity
synonym: "L-lysine:NAD+ oxidoreductase" EXACT OMO:1234567 [orcid:0000-0003-4423-4370]
""",
term.iterate_obo_lines(ontology_prefix="go", typedefs={RO_DUMMY.pair: RO_DUMMY}),
term.iterate_obo_lines(
ontology_prefix="go",
typedefs={RO_DUMMY.pair: RO_DUMMY},
synonym_typedefs={omo_dummy.pair: omo_dummy},
),
)

def test_append_synonym_missing_typedef(self) -> None:
"""Test appending a synonym."""
term = Term(LYSINE_DEHYDROGENASE_ACT)
term.append_synonym(
"L-lysine:NAD+ oxidoreductase",
type=Reference(prefix="OMO", identifier="1234567"),
)
with self.assertLogs(level="INFO") as log:
self.assert_lines(
"""\
[Term]
id: GO:0050069
name: lysine dehydrogenase activity
synonym: "L-lysine:NAD+ oxidoreductase" EXACT OMO:1234567 []
""",
term.iterate_obo_lines(ontology_prefix="go", typedefs={RO_DUMMY.pair: RO_DUMMY}),
)
self.assertIn(
"WARNING:pyobo.struct.struct:[go] synonym typedef not defined: OMO:1234567", log.output
)

def test_definition(self):
Expand Down

0 comments on commit f917529

Please sign in to comment.