From 4872b423c4a49536a2710fb483db8076158b103a Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt <cthoyt@gmail.com> Date: Tue, 3 Dec 2024 18:27:51 +0100 Subject: [PATCH 01/10] Handle default relations --- src/pyobo/reader.py | 12 ++++++++++++ tests/test_get.py | 38 ++++++++------------------------------ tests/test_reader.py | 24 +++++++++++++++++++++--- 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index 38371cee..f875aacb 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -9,6 +9,7 @@ from pathlib import Path from typing import Any +import bioontologies.relations import bioregistry import networkx as nx from curies import ReferenceTuple @@ -390,6 +391,8 @@ def iterate_graph_typedefs( reference = Reference.from_curie_or_uri( curie, name=name, strict=strict, ontology_prefix=ontology_prefix ) + elif reference := _ground_relation(curie): + pass else: reference = default_reference(ontology_prefix, curie, name=name) if reference is None: @@ -700,6 +703,8 @@ def iterate_node_relationships( relation = Reference.from_curie_or_uri( relation_curie, strict=strict, ontology_prefix=ontology_prefix, node=node ) + elif relation := _ground_relation(relation_curie): + pass else: relation = default_reference(ontology_prefix, relation_curie) logger.debug( @@ -750,3 +755,10 @@ def iterate_node_xrefs( ) if yv is not None: yield yv + + +def _ground_relation(relation_str: str) -> Reference | None: + prefix, identifier = bioontologies.relations.ground_relation(relation_str) + if prefix and identifier: + return Reference(prefix=prefix, identifier=identifier) + return None diff --git a/tests/test_get.py b/tests/test_get.py index be04c5fd..cc84ea10 100644 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -6,7 +6,7 @@ import obonet from curies import ReferenceTuple -from pyobo import Reference, Synonym, SynonymTypeDef, TypeDef, default_reference, get_ontology +from pyobo import Reference, Synonym, SynonymTypeDef, default_reference, get_ontology from pyobo.reader import ( _extract_definition, _extract_synonym, @@ -19,7 +19,6 @@ iterate_node_xrefs, ) from pyobo.struct.struct import acronym -from pyobo.utils.io import multidict from tests.constants import TEST_CHEBI_OBO_PATH, chebi_patch @@ -37,7 +36,7 @@ def test_get_graph_typedefs(self): pairs = { typedef.pair for typedef in iterate_graph_typedefs(self.graph, ontology_prefix="chebi") } - self.assertIn(ReferenceTuple("obo", "chebi#has_part"), pairs) + self.assertIn(ReferenceTuple("obo", "chebi#has_major_microspecies_at_pH_7_3"), pairs) def test_get_graph_synonym_typedefs(self): """Test getting synonym type definitions from an :mod:`obonet` graph.""" @@ -251,7 +250,7 @@ def test_get_node_relations(self): data = self.graph.nodes["CHEBI:17051"] relations = list( iterate_node_relationships( - data, node=Reference(prefix="chebi", identifier="XXX"), ontology_prefix="chebi" + data, node=Reference(prefix="chebi", identifier="17051"), ontology_prefix="chebi" ) ) self.assertEqual(1, len(relations)) @@ -259,13 +258,11 @@ def test_get_node_relations(self): self.assertIsNotNone(target) self.assertIsInstance(target, Reference) - self.assertEqual("chebi", target.prefix) - self.assertEqual("29228", target.identifier) + self.assertEqual(("chebi", "29228"), target.pair) self.assertIsNotNone(typedef) self.assertIsInstance(typedef, Reference) - self.assertEqual("obo", typedef.prefix) - self.assertEqual("chebi#is_conjugate_base_of", typedef.identifier) + self.assertEqual(("ro", "0018033"), typedef.pair) class TestGet(unittest.TestCase): @@ -299,26 +296,7 @@ def test_get_id_alts_mapping(self): def test_typedefs(self): """Test typedefs.""" - xx = default_reference("chebi", "is_conjugate_base_of") - td = {t.pair for t in self.ontology.typedefs} + xx = default_reference("chebi", "has_major_microspecies_at_pH_7_3") + td = self.ontology._index_typedefs() self.assertIn(xx.pair, td) - - def test_iter_filtered_relations(self): - """Test getting filtered relations w/ upgrade.""" - term_reference = Reference(prefix="chebi", identifier="17051") - reference = default_reference("chebi", "is_conjugate_base_of") - object_reference = Reference(prefix="chebi", identifier="29228") - for inp in [ - reference.curie, - reference, - reference.pair, - TypeDef(reference=reference), - ]: - with self.subTest(inp=inp): - rr = multidict( - (term.reference, target) - for term, target in self.ontology.iterate_filtered_relations(inp) - ) - self.assertNotEqual(0, len(rr)) - self.assertIn(term_reference, rr) - self.assertIn(object_reference, rr[term_reference]) + self.assertIn(ReferenceTuple("ro", "0018033"), td) diff --git a/tests/test_reader.py b/tests/test_reader.py index ffa5a120..acc3935e 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -162,19 +162,25 @@ def test_relationship_unqualified(self) -> None: [Term] id: CHEBI:1234 name: Test Name - relationship: is_conjugate_base_of CHEBI:5678 + relationship: xyz CHEBI:5678 [Typedef] - id: is_conjugate_base_of + id: xyz """) term = self.get_only_term(ontology) self.assertIsNone(term.get_relationship(is_conjugate_base_of)) - r = default_reference("chebi", "is_conjugate_base_of") + r = default_reference("chebi", "xyz") td = TypeDef(reference=r) reference = term.get_relationship(td) self.assertIsNotNone(reference) self.assertEqual("chebi:5678", reference.curie) + rr = list(ontology.iterate_filtered_relations(td)) + self.assertEqual(1, len(rr)) + + rr2 = list(ontology.iterate_filtered_relations(is_conjugate_base_of)) + self.assertEqual(0, len(rr2)) + def test_relationship_missing(self) -> None: """Test parsing a relationship that isn't defined.""" ontology = _read("""\ @@ -923,6 +929,18 @@ def test_mappings(self) -> None: {(a.pair, b.pair) for a, b in term.get_mappings(include_xrefs=True)}, ) + def test_default_relation(self): + """Test parsing DO's weird url prefixing.""" + ontology = _read("""\ + ontology: chebi + + [Term] + id: CHEBI:100147 + relationship: derives_from drugbank:DB00779 + """) + term = self.get_only_term(ontology) + self.assertEqual(1, len(term.relationships)) + class TestVersionHandling(unittest.TestCase): """Test version handling.""" From 315d9b1798eeca7ed899a0eebcc79a1dc6da1bf2 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt <cthoyt@gmail.com> Date: Tue, 3 Dec 2024 18:39:47 +0100 Subject: [PATCH 02/10] Refactor --- src/pyobo/reader.py | 45 +++++++---------------------------- src/pyobo/struct/reference.py | 28 ++++++++++++++++++++++ 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index f875aacb..249206c2 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -9,7 +9,6 @@ from pathlib import Path from typing import Any -import bioontologies.relations import bioregistry import networkx as nx from curies import ReferenceTuple @@ -387,14 +386,9 @@ def iterate_graph_typedefs( if name is None: logger.debug("[%s] typedef %s is missing a name", graph.graph["ontology"], curie) - if ":" in curie: - reference = Reference.from_curie_or_uri( - curie, name=name, strict=strict, ontology_prefix=ontology_prefix - ) - elif reference := _ground_relation(curie): - pass - else: - reference = default_reference(ontology_prefix, curie, name=name) + reference = Reference.from_curie_uri_or_default( + curie, strict=strict, ontology_prefix=ontology_prefix, name=name + ) if reference is None: logger.warning("[%s] unable to parse typedef CURIE %s", graph.graph["ontology"], curie) continue @@ -650,12 +644,9 @@ def _get_prop( if prop.startswith(sw): identifier = prop.removeprefix(sw) return default_reference(ontology_prefix, identifier) - if ":" not in prop: - return default_reference(ontology_prefix, prop) - else: - return Reference.from_curie_or_uri( - prop, strict=strict, node=node, ontology_prefix=ontology_prefix - ) + return Reference.from_curie_uri_or_default( + prop, strict=strict, node=node, ontology_prefix=ontology_prefix + ) def iterate_node_parents( @@ -698,20 +689,9 @@ def iterate_node_relationships( """Extract relationships from a :mod:`obonet` node's data.""" for s in data.get("relationship", []): relation_curie, target_curie = s.split(" ") - - if ":" in relation_curie: - relation = Reference.from_curie_or_uri( - relation_curie, strict=strict, ontology_prefix=ontology_prefix, node=node - ) - elif relation := _ground_relation(relation_curie): - pass - else: - relation = default_reference(ontology_prefix, relation_curie) - logger.debug( - "unhandled relation: %s. Parsing as default relation: %s", - relation_curie, - relation.curie, - ) + relation = Reference.from_curie_uri_or_default( + relation_curie, strict=strict, ontology_prefix=ontology_prefix, node=node + ) if relation is None: logger.warning("[%s] could not parse relation %s", node.curie, relation_curie) continue @@ -755,10 +735,3 @@ def iterate_node_xrefs( ) if yv is not None: yield yv - - -def _ground_relation(relation_str: str) -> Reference | None: - prefix, identifier = bioontologies.relations.ground_relation(relation_str) - if prefix and identifier: - return Reference(prefix=prefix, identifier=identifier) - return None diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py index 6ab451a8..6510fe7b 100644 --- a/src/pyobo/struct/reference.py +++ b/src/pyobo/struct/reference.py @@ -4,6 +4,7 @@ from typing import Any +import bioontologies.relations import bioregistry import curies from curies import ReferenceTuple @@ -102,6 +103,26 @@ def from_curie_or_uri( name = get_name(prefix, identifier) return cls.model_validate({"prefix": prefix, "identifier": identifier, "name": name}) + @classmethod + def from_curie_uri_or_default( + cls, + s: str, + *, + ontology_prefix: str, + strict: bool = True, + node: Reference | None = None, + name: str | None = None, + ) -> Reference | None: + """Parse from a CURIE, URI, or default string in the ontology prefix's IDspace.""" + if ":" in s: + return cls.from_curie_or_uri( + s, ontology_prefix=ontology_prefix, name=name, strict=strict, node=node + ) + elif reference := _ground_relation(s): + return reference + else: + return default_reference(ontology_prefix, s, name=name) + @property def _escaped_identifier(self): return obo_escape(self.identifier) @@ -194,3 +215,10 @@ def reference_escape(predicate: Reference | Referenced, *, ontology_prefix: str) def comma_separate_references(references: list[Reference]) -> str: """Map a list to strings and make comma separated.""" return ", ".join(r.preferred_curie for r in references) + + +def _ground_relation(relation_str: str) -> Reference | None: + prefix, identifier = bioontologies.relations.ground_relation(relation_str) + if prefix and identifier: + return Reference(prefix=prefix, identifier=identifier) + return None From df9bddd392744d9d0d1ab7ab86fc92d499946c92 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt <cthoyt@gmail.com> Date: Tue, 3 Dec 2024 18:43:59 +0100 Subject: [PATCH 03/10] Update test_reader.py --- tests/test_reader.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/test_reader.py b/tests/test_reader.py index acc3935e..83d2e01f 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -251,18 +251,23 @@ def test_property_literal_typed(self) -> None: [Term] id: CHEBI:1234 - property_value: mass "121.323" xsd:decimal + property_value: xyz "121.323" xsd:decimal + + [Typedef] + id: xyz """) term = self.get_only_term(ontology) self.assertEqual(1, len(list(term.annotations_literal))) - self.assertEqual("121.323", term.get_property(default_reference("chebi", "mass"))) + ref = default_reference("chebi", "xyz") + self.assertIn(ref, term.annotations_literal) + self.assertEqual("121.323", term.get_property(ref)) df = ontology.get_properties_df() self.assertEqual(4, len(df.columns)) self.assertEqual(1, len(df)) row = dict(df.iloc[0]) self.assertEqual("1234", row["chebi_id"]) - self.assertEqual("mass", row["property"]) + self.assertEqual("xyz", row["property"]) self.assertEqual("121.323", row["value"]) self.assertEqual("xsd:decimal", row["datatype"]) From 2cef76e67b66de43eb2cfd031015fd0eefd0f6d6 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt <cthoyt@gmail.com> Date: Tue, 3 Dec 2024 18:47:28 +0100 Subject: [PATCH 04/10] Minor renames --- src/pyobo/reader.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index 249206c2..daf44c95 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -376,27 +376,30 @@ def iterate_graph_typedefs( """Get type definitions from an :mod:`obonet` graph.""" for typedef in graph.graph.get("typedefs", []): if "id" in typedef: - curie = typedef["id"] + typedef_id = typedef["id"] elif "identifier" in typedef: - curie = typedef["identifier"] + typedef_id = typedef["identifier"] else: raise KeyError("typedef is missing an `id`") name = typedef.get("name") if name is None: - logger.debug("[%s] typedef %s is missing a name", graph.graph["ontology"], curie) + logger.debug("[%s] typedef %s is missing a name", ontology_prefix, typedef_id) reference = Reference.from_curie_uri_or_default( - curie, strict=strict, ontology_prefix=ontology_prefix, name=name + typedef_id, strict=strict, ontology_prefix=ontology_prefix, name=name ) if reference is None: - logger.warning("[%s] unable to parse typedef CURIE %s", graph.graph["ontology"], curie) + logger.warning("[%s] unable to parse typedef ID %s", ontology_prefix, typedef_id) continue xrefs = [] - for curie in typedef.get("xref", []): + for xref_curie in typedef.get("xref", []): _xref = Reference.from_curie_or_uri( - curie, strict=strict, ontology_prefix=ontology_prefix + xref_curie, + strict=strict, + ontology_prefix=ontology_prefix, + node=reference, ) if _xref: xrefs.append(_xref) @@ -511,6 +514,10 @@ def _extract_synonym( ) +#: A counter for errors in parsing provenance +PROVENANCE_COUNTER: Counter[str] = Counter() + + def _parse_trailing_ref_list( rest: str, *, strict: bool = True, node: Reference, ontology_prefix: str | None ) -> list[Reference]: @@ -524,7 +531,9 @@ def _parse_trailing_ref_list( curie, strict=strict, node=node, ontology_prefix=ontology_prefix ) if reference is None: - logger.warning("[%s] could not parse provenance CURIE: %s", node.curie, curie) + if not PROVENANCE_COUNTER[curie]: + logger.warning("[%s] could not parse provenance CURIE: %s", node.curie, curie) + PROVENANCE_COUNTER[curie] += 1 continue rv.append(reference) return rv @@ -637,15 +646,15 @@ def _handle_prop( def _get_prop( - prop: str, *, node: Reference, strict: bool, ontology_prefix: str + property_id: str, *, node: Reference, strict: bool, ontology_prefix: str ) -> Reference | None: for delim in "#/": sw = f"http://purl.obolibrary.org/obo/{ontology_prefix}{delim}" - if prop.startswith(sw): - identifier = prop.removeprefix(sw) + if property_id.startswith(sw): + identifier = property_id.removeprefix(sw) return default_reference(ontology_prefix, identifier) return Reference.from_curie_uri_or_default( - prop, strict=strict, node=node, ontology_prefix=ontology_prefix + property_id, strict=strict, node=node, ontology_prefix=ontology_prefix ) From 2ab69ad09020fbc6a564a656f233c59a5d6b996a Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt <cthoyt@gmail.com> Date: Tue, 3 Dec 2024 18:50:37 +0100 Subject: [PATCH 05/10] Change parsing function --- src/pyobo/reader.py | 9 ++++---- src/pyobo/struct/reference.py | 39 +++++++++++++++++------------------ 2 files changed, 23 insertions(+), 25 deletions(-) diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index daf44c95..2d57004c 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -33,6 +33,7 @@ default_reference, make_ad_hoc_ontology, ) +from .struct.reference import _parse_identifier from .struct.struct import DEFAULT_SYNONYM_TYPE, LiteralProperty, ObjectProperty from .struct.typedef import default_typedefs from .utils.misc import STATIC_VERSION_REWRITES, cleanup_version @@ -386,7 +387,7 @@ def iterate_graph_typedefs( if name is None: logger.debug("[%s] typedef %s is missing a name", ontology_prefix, typedef_id) - reference = Reference.from_curie_uri_or_default( + reference = _parse_identifier( typedef_id, strict=strict, ontology_prefix=ontology_prefix, name=name ) if reference is None: @@ -653,9 +654,7 @@ def _get_prop( if property_id.startswith(sw): identifier = property_id.removeprefix(sw) return default_reference(ontology_prefix, identifier) - return Reference.from_curie_uri_or_default( - property_id, strict=strict, node=node, ontology_prefix=ontology_prefix - ) + return _parse_identifier(property_id, strict=strict, node=node, ontology_prefix=ontology_prefix) def iterate_node_parents( @@ -698,7 +697,7 @@ def iterate_node_relationships( """Extract relationships from a :mod:`obonet` node's data.""" for s in data.get("relationship", []): relation_curie, target_curie = s.split(" ") - relation = Reference.from_curie_uri_or_default( + relation = _parse_identifier( relation_curie, strict=strict, ontology_prefix=ontology_prefix, node=node ) if relation is None: diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py index 6510fe7b..b56e43b4 100644 --- a/src/pyobo/struct/reference.py +++ b/src/pyobo/struct/reference.py @@ -103,26 +103,6 @@ def from_curie_or_uri( name = get_name(prefix, identifier) return cls.model_validate({"prefix": prefix, "identifier": identifier, "name": name}) - @classmethod - def from_curie_uri_or_default( - cls, - s: str, - *, - ontology_prefix: str, - strict: bool = True, - node: Reference | None = None, - name: str | None = None, - ) -> Reference | None: - """Parse from a CURIE, URI, or default string in the ontology prefix's IDspace.""" - if ":" in s: - return cls.from_curie_or_uri( - s, ontology_prefix=ontology_prefix, name=name, strict=strict, node=node - ) - elif reference := _ground_relation(s): - return reference - else: - return default_reference(ontology_prefix, s, name=name) - @property def _escaped_identifier(self): return obo_escape(self.identifier) @@ -222,3 +202,22 @@ def _ground_relation(relation_str: str) -> Reference | None: if prefix and identifier: return Reference(prefix=prefix, identifier=identifier) return None + + +def _parse_identifier( + s: str, + *, + ontology_prefix: str, + strict: bool = True, + node: Reference | None = None, + name: str | None = None, +) -> Reference | None: + """Parse from a CURIE, URI, or default string in the ontology prefix's IDspace.""" + if ":" in s: + return Reference.from_curie_or_uri( + s, ontology_prefix=ontology_prefix, name=name, strict=strict, node=node + ) + elif reference := _ground_relation(s): + return reference + else: + return default_reference(ontology_prefix, s, name=name) From 959ab5c11874628706342512986808f97a975e04 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt <cthoyt@gmail.com> Date: Tue, 3 Dec 2024 19:38:01 +0100 Subject: [PATCH 06/10] Add flag for making upgrading opt-in --- src/pyobo/reader.py | 108 ++++++++++++++++++++++++---------- src/pyobo/reader_utils.py | 22 ++----- src/pyobo/struct/reference.py | 3 +- tests/test_get.py | 23 ++++++-- 4 files changed, 102 insertions(+), 54 deletions(-) diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index 2d57004c..460acefb 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -52,6 +52,7 @@ def from_obo_path( *, strict: bool = True, version: str | None, + upgrade: bool = False, ) -> Obo: """Get the OBO graph from a path.""" path = Path(path).expanduser().resolve() @@ -80,7 +81,7 @@ def from_obo_path( _clean_graph_ontology(graph, prefix) # Convert to an Obo instance and return - return from_obonet(graph, strict=strict, version=version) + return from_obonet(graph, strict=strict, version=version, upgrade=upgrade) def _read_obo(filelike, prefix: str | None) -> nx.MultiDiGraph: @@ -99,7 +100,13 @@ def _read_obo(filelike, prefix: str | None) -> nx.MultiDiGraph: ) -def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True, version: str | None = None) -> Obo: +def from_obonet( + graph: nx.MultiDiGraph, + *, + strict: bool = True, + version: str | None = None, + upgrade: bool = False, +) -> Obo: """Get all of the terms from a OBO graph.""" ontology_prefix_raw = graph.graph["ontology"] ontology_prefix = bioregistry.normalize_prefix(ontology_prefix_raw) # probably always okay @@ -121,13 +128,18 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True, version: str | N #: CURIEs to typedefs typedefs: Mapping[ReferenceTuple, TypeDef] = { typedef.pair: typedef - for typedef in iterate_graph_typedefs(graph, ontology_prefix=ontology_prefix) + for typedef in iterate_graph_typedefs( + graph, ontology_prefix=ontology_prefix, strict=strict, upgrade=upgrade + ) } synonym_typedefs: Mapping[ReferenceTuple, SynonymTypeDef] = { synonym_typedef.pair: synonym_typedef for synonym_typedef in iterate_graph_synonym_typedefs( - graph, ontology_prefix=ontology_prefix + graph, + ontology_prefix=ontology_prefix, + strict=strict, + upgrade=upgrade, ) } @@ -184,6 +196,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True, version: str | N node=reference, strict=strict, ontology_prefix=ontology_prefix, + upgrade=upgrade, ) ) n_synonyms += len(synonyms) @@ -204,6 +217,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True, version: str | N node=reference, strict=strict, ontology_prefix=ontology_prefix, + upgrade=upgrade, ) ) for relation, reference in relations_references: @@ -221,7 +235,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True, version: str | N term.append_relationship(typedef, reference) for t in iterate_node_properties( - data, node=reference, strict=strict, ontology_prefix=ontology_prefix + data, node=reference, strict=strict, ontology_prefix=ontology_prefix, upgrade=upgrade ): n_properties += 1 match t: @@ -348,31 +362,30 @@ def _get_name(graph, ontology_prefix: str) -> str: def iterate_graph_synonym_typedefs( - graph: nx.MultiDiGraph, *, ontology_prefix: str, strict: bool = False + graph: nx.MultiDiGraph, *, ontology_prefix: str, strict: bool = False, upgrade: bool ) -> Iterable[SynonymTypeDef]: """Get synonym type definitions from an :mod:`obonet` graph.""" - for s in graph.graph.get("synonymtypedef", []): - sid, name = s.split(" ", 1) + for line in graph.graph.get("synonymtypedef", []): + synonym_typedef_id, name = line.split(" ", 1) name = name.strip().strip('"') - if ":" not in sid: - # assume it's a default reference - yield SynonymTypeDef(reference=default_reference(ontology_prefix, sid, name=name)) - else: - reference = Reference.from_curie_or_uri( - sid, name=name, strict=strict, ontology_prefix=ontology_prefix + reference = _parse_identifier( + synonym_typedef_id, + ontology_prefix=ontology_prefix, + name=name, + upgrade=upgrade, + strict=strict, + ) + if reference is None: + logger.warning( + "[%s] unable to parse synonym typedef ID %s", ontology_prefix, synonym_typedef_id ) - if reference is not None: - yield SynonymTypeDef(reference=reference) - elif strict: - raise ValueError( - f"[{ontology_prefix}] could not parse synonym type definition: {sid}" - ) - else: - continue + continue + # TODO handle specificity + yield SynonymTypeDef(reference=reference) def iterate_graph_typedefs( - graph: nx.MultiDiGraph, *, ontology_prefix: str, strict: bool = True + graph: nx.MultiDiGraph, *, ontology_prefix: str, strict: bool = True, upgrade: bool ) -> Iterable[TypeDef]: """Get type definitions from an :mod:`obonet` graph.""" for typedef in graph.graph.get("typedefs", []): @@ -388,7 +401,7 @@ def iterate_graph_typedefs( logger.debug("[%s] typedef %s is missing a name", ontology_prefix, typedef_id) reference = _parse_identifier( - typedef_id, strict=strict, ontology_prefix=ontology_prefix, name=name + typedef_id, strict=strict, ontology_prefix=ontology_prefix, name=name, upgrade=upgrade ) if reference is None: logger.warning("[%s] unable to parse typedef ID %s", ontology_prefix, typedef_id) @@ -485,6 +498,7 @@ def _extract_synonym( node: Reference, strict: bool = True, ontology_prefix: str, + upgrade: bool, ) -> Synonym | None: # TODO check if the synonym is written like a CURIE... it shouldn't but I've seen it happen try: @@ -500,6 +514,7 @@ def _extract_synonym( strict=strict, node=node, ontology_prefix=ontology_prefix, + upgrade=upgrade, ) provenance, rest = _chomp_references( rest, strict=strict, node=node, ontology_prefix=ontology_prefix @@ -547,6 +562,7 @@ def iterate_node_synonyms( node: Reference, strict: bool = False, ontology_prefix: str, + upgrade: bool, ) -> Iterable[Synonym]: """Extract synonyms from a :mod:`obonet` node's data. @@ -558,7 +574,12 @@ def iterate_node_synonyms( """ for s in data.get("synonym", []): s = _extract_synonym( - s, synonym_typedefs, node=node, strict=strict, ontology_prefix=ontology_prefix + s, + synonym_typedefs, + node=node, + strict=strict, + ontology_prefix=ontology_prefix, + upgrade=upgrade, ) if s is not None: yield s @@ -571,12 +592,21 @@ def iterate_node_synonyms( def iterate_node_properties( - data: Mapping[str, Any], *, node: Reference, strict: bool = True, ontology_prefix: str + data: Mapping[str, Any], + *, + node: Reference, + strict: bool = True, + ontology_prefix: str, + upgrade: bool, ) -> Iterable[ObjectProperty | LiteralProperty]: """Extract properties from a :mod:`obonet` node's data.""" for prop_value_type in data.get("property_value", []): if yv := _handle_prop( - prop_value_type, node=node, strict=strict, ontology_prefix=ontology_prefix + prop_value_type, + node=node, + strict=strict, + ontology_prefix=ontology_prefix, + upgrade=upgrade, ): yield yv @@ -589,7 +619,12 @@ def iterate_node_properties( def _handle_prop( - prop_value_type: str, *, node: Reference, strict: bool = True, ontology_prefix: str + prop_value_type: str, + *, + node: Reference, + strict: bool = True, + ontology_prefix: str, + upgrade: bool, ) -> ObjectProperty | LiteralProperty | None: try: prop, value_type = prop_value_type.split(" ", 1) @@ -597,7 +632,9 @@ def _handle_prop( logger.warning("[%s] property_value is missing a space: %s", node.curie, prop_value_type) return None - prop_reference = _get_prop(prop, node=node, strict=strict, ontology_prefix=ontology_prefix) + prop_reference = _get_prop( + prop, node=node, strict=strict, ontology_prefix=ontology_prefix, upgrade=upgrade + ) if prop_reference is None: if not UNHANDLED_PROPS[prop]: logger.warning("[%s] unparsable property: %s", node.curie, prop) @@ -647,14 +684,16 @@ def _handle_prop( def _get_prop( - property_id: str, *, node: Reference, strict: bool, ontology_prefix: str + property_id: str, *, node: Reference, strict: bool, ontology_prefix: str, upgrade: bool ) -> Reference | None: for delim in "#/": sw = f"http://purl.obolibrary.org/obo/{ontology_prefix}{delim}" if property_id.startswith(sw): identifier = property_id.removeprefix(sw) return default_reference(ontology_prefix, identifier) - return _parse_identifier(property_id, strict=strict, node=node, ontology_prefix=ontology_prefix) + return _parse_identifier( + property_id, strict=strict, node=node, ontology_prefix=ontology_prefix, upgrade=upgrade + ) def iterate_node_parents( @@ -693,12 +732,17 @@ def iterate_node_relationships( node: Reference, strict: bool = True, ontology_prefix: str, + upgrade: bool, ) -> Iterable[tuple[Reference, Reference]]: """Extract relationships from a :mod:`obonet` node's data.""" for s in data.get("relationship", []): relation_curie, target_curie = s.split(" ") relation = _parse_identifier( - relation_curie, strict=strict, ontology_prefix=ontology_prefix, node=node + relation_curie, + strict=strict, + ontology_prefix=ontology_prefix, + node=node, + upgrade=upgrade, ) if relation is None: logger.warning("[%s] could not parse relation %s", node.curie, relation_curie) diff --git a/src/pyobo/reader_utils.py b/src/pyobo/reader_utils.py index b2021847..d01d65ff 100644 --- a/src/pyobo/reader_utils.py +++ b/src/pyobo/reader_utils.py @@ -6,11 +6,11 @@ from collections import Counter from collections.abc import Mapping -import bioontologies.upgrade from curies import ReferenceTuple from pyobo.struct import SynonymSpecificities, SynonymSpecificity -from pyobo.struct.struct import Reference, SynonymTypeDef, _synonym_typedef_warn, default_reference +from pyobo.struct.reference import _parse_identifier +from pyobo.struct.struct import Reference, SynonymTypeDef, _synonym_typedef_warn logger = logging.getLogger(__name__) @@ -32,6 +32,7 @@ def _chomp_typedef( strict: bool = True, node: Reference, ontology_prefix: str, + upgrade: bool, ) -> tuple[SynonymTypeDef | None, str]: if not s: # This might happen if a synonym is just given as a string @@ -56,20 +57,9 @@ def _chomp_typedef( stype_curie, rest = s, "" - reference: Reference | None - if ":" not in stype_curie: - # this catches situation where it's "ABBREVIATION" - if xx := bioontologies.upgrade.upgrade(stype_curie): - reference = Reference(prefix=xx.prefix, identifier=xx.identifier) - else: - reference = default_reference(ontology_prefix, stype_curie) - else: - reference = Reference.from_curie_or_uri( - stype_curie, - strict=strict, - node=node, - ontology_prefix=ontology_prefix, - ) + reference = _parse_identifier( + stype_curie, strict=strict, node=node, ontology_prefix=ontology_prefix, upgrade=upgrade + ) if reference is None: logger.warning( "[%s] unable to parse synonym type `%s` in line %s", node.curie, stype_curie, s diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py index b56e43b4..8b376da1 100644 --- a/src/pyobo/struct/reference.py +++ b/src/pyobo/struct/reference.py @@ -211,13 +211,14 @@ def _parse_identifier( strict: bool = True, node: Reference | None = None, name: str | None = None, + upgrade: bool, ) -> Reference | None: """Parse from a CURIE, URI, or default string in the ontology prefix's IDspace.""" if ":" in s: return Reference.from_curie_or_uri( s, ontology_prefix=ontology_prefix, name=name, strict=strict, node=node ) - elif reference := _ground_relation(s): + elif upgrade and (reference := _ground_relation(s)): return reference else: return default_reference(ontology_prefix, s, name=name) diff --git a/tests/test_get.py b/tests/test_get.py index cc84ea10..36bbc42a 100644 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -34,14 +34,19 @@ def setUpClass(cls) -> None: def test_get_graph_typedefs(self): """Test getting type definitions from an :mod:`obonet` graph.""" pairs = { - typedef.pair for typedef in iterate_graph_typedefs(self.graph, ontology_prefix="chebi") + typedef.pair + for typedef in iterate_graph_typedefs( + self.graph, ontology_prefix="chebi", upgrade=False + ) } self.assertIn(ReferenceTuple("obo", "chebi#has_major_microspecies_at_pH_7_3"), pairs) def test_get_graph_synonym_typedefs(self): """Test getting synonym type definitions from an :mod:`obonet` graph.""" synonym_typedefs = sorted( - iterate_graph_synonym_typedefs(self.graph, ontology_prefix=self.ontology), + iterate_graph_synonym_typedefs( + self.graph, ontology_prefix=self.ontology, upgrade=False + ), key=attrgetter("curie"), ) self.assertEqual( @@ -165,6 +170,7 @@ def test_extract_synonym(self): synoynym_typedefs, node=Reference(prefix="chebi", identifier="XXX"), ontology_prefix="chebi", + upgrade=False, ) self.assertIsInstance(actual_synonym, Synonym) self.assertEqual(expected_synonym, actual_synonym) @@ -184,6 +190,7 @@ def test_get_node_synonyms(self): synoynym_typedefs, node=Reference(prefix="chebi", identifier="XXX"), ontology_prefix="chebi", + upgrade=False, ) ) self.assertEqual(1, len(synonyms)) @@ -199,7 +206,10 @@ def test_get_node_properties(self): data = self.graph.nodes["CHEBI:51990"] properties = list( iterate_node_properties( - data, node=Reference(prefix="chebi", identifier="51990"), ontology_prefix="chebi" + data, + node=Reference(prefix="chebi", identifier="51990"), + ontology_prefix="chebi", + upgrade=False, ) ) t_prop = default_reference("chebi", "monoisotopicmass") @@ -250,7 +260,10 @@ def test_get_node_relations(self): data = self.graph.nodes["CHEBI:17051"] relations = list( iterate_node_relationships( - data, node=Reference(prefix="chebi", identifier="17051"), ontology_prefix="chebi" + data, + node=Reference(prefix="chebi", identifier="17051"), + ontology_prefix="chebi", + upgrade=False, ) ) self.assertEqual(1, len(relations)) @@ -262,7 +275,7 @@ def test_get_node_relations(self): self.assertIsNotNone(typedef) self.assertIsInstance(typedef, Reference) - self.assertEqual(("ro", "0018033"), typedef.pair) + self.assertEqual(("chebi", "chebi#is_conjugate_base_of"), typedef.pair) class TestGet(unittest.TestCase): From e4d91d7bc857c481cd57450b574d9a1018f9a024 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt <cthoyt@gmail.com> Date: Tue, 3 Dec 2024 19:43:49 +0100 Subject: [PATCH 07/10] Update getters.py --- src/pyobo/getters.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pyobo/getters.py b/src/pyobo/getters.py index 218e55ca..434c2d00 100644 --- a/src/pyobo/getters.py +++ b/src/pyobo/getters.py @@ -70,6 +70,7 @@ def get_ontology( strict: bool = True, version: str | None = None, robot_check: bool = True, + upgrade: bool = False, ) -> Obo: """Get the OBO for a given graph. @@ -81,6 +82,9 @@ def get_ontology( :param robot_check: If set to false, will send the ``--check=false`` command to ROBOT to disregard malformed ontology components. Necessary to load some ontologies like VO. + :param upgrade: + If set to true, will automatically upgrade relationships, such as + ``obo:chebi#part_of`` to ``BFO:0000051`` :returns: An OBO object :raises OnlyOWLError: If the OBO foundry only has an OWL document for this resource. @@ -132,7 +136,7 @@ def get_ontology( else: raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}") - obo = from_obo_path(path, prefix=prefix, strict=strict, version=version) + obo = from_obo_path(path, prefix=prefix, strict=strict, version=version, upgrade=upgrade) obo.write_default(force=force_process) return obo From d7086e7857fc7aeeed70879316e555a4c99ed872 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt <cthoyt@gmail.com> Date: Tue, 3 Dec 2024 19:48:57 +0100 Subject: [PATCH 08/10] Update test_get.py --- tests/test_get.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_get.py b/tests/test_get.py index 36bbc42a..a5f7b75f 100644 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -275,7 +275,7 @@ def test_get_node_relations(self): self.assertIsNotNone(typedef) self.assertIsInstance(typedef, Reference) - self.assertEqual(("chebi", "chebi#is_conjugate_base_of"), typedef.pair) + self.assertEqual(("obo", "chebi#is_conjugate_base_of"), typedef.pair) class TestGet(unittest.TestCase): From c15ff87dcf78de0faadac6881b01debf1feec4d9 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt <cthoyt@gmail.com> Date: Tue, 3 Dec 2024 20:13:53 +0100 Subject: [PATCH 09/10] Make upgrading default behavior --- src/pyobo/getters.py | 2 +- src/pyobo/identifier_utils.py | 12 +++++++----- src/pyobo/reader.py | 4 ++-- src/pyobo/reader_utils.py | 12 ++++++++---- src/pyobo/struct/reference.py | 13 ++++++++----- tests/test_reader.py | 21 ++++++++++++++++----- 6 files changed, 42 insertions(+), 22 deletions(-) diff --git a/src/pyobo/getters.py b/src/pyobo/getters.py index 434c2d00..a6b061ac 100644 --- a/src/pyobo/getters.py +++ b/src/pyobo/getters.py @@ -70,7 +70,7 @@ def get_ontology( strict: bool = True, version: str | None = None, robot_check: bool = True, - upgrade: bool = False, + upgrade: bool = True, ) -> Obo: """Get the OBO for a given graph. diff --git a/src/pyobo/identifier_utils.py b/src/pyobo/identifier_utils.py index 5cbc97e0..3149c7b3 100644 --- a/src/pyobo/identifier_utils.py +++ b/src/pyobo/identifier_utils.py @@ -75,6 +75,7 @@ def normalize_curie( strict: bool = True, ontology_prefix: str | None = None, node: Reference | None = None, + upgrade: bool = True, ) -> ReferenceTuple | tuple[None, None]: """Parse a string that looks like a CURIE. @@ -86,11 +87,12 @@ def normalize_curie( - Normalizes the namespace - Checks against a blacklist for the entire curie, for the namespace, and for suffixes. """ - # Remap the curie with the full list - curie = remap_full(curie) + if upgrade: + # Remap the curie with the full list + curie = remap_full(curie) - # Remap node's prefix (if necessary) - curie = remap_prefix(curie, ontology_prefix=ontology_prefix) + # Remap node's prefix (if necessary) + curie = remap_prefix(curie, ontology_prefix=ontology_prefix) if curie_is_blacklisted(curie): return None, None @@ -99,7 +101,7 @@ def normalize_curie( if curie_has_blacklisted_suffix(curie): return None, None - if reference_t := bioontologies.upgrade.upgrade(curie): + if upgrade and (reference_t := bioontologies.upgrade.upgrade(curie)): return reference_t if curie.startswith("http:") or curie.startswith("https:"): diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index 460acefb..6fc21a8f 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -52,7 +52,7 @@ def from_obo_path( *, strict: bool = True, version: str | None, - upgrade: bool = False, + upgrade: bool = True, ) -> Obo: """Get the OBO graph from a path.""" path = Path(path).expanduser().resolve() @@ -105,7 +105,7 @@ def from_obonet( *, strict: bool = True, version: str | None = None, - upgrade: bool = False, + upgrade: bool = True, ) -> Obo: """Get all of the terms from a OBO graph.""" ontology_prefix_raw = graph.graph["ontology"] diff --git a/src/pyobo/reader_utils.py b/src/pyobo/reader_utils.py index d01d65ff..f117a38a 100644 --- a/src/pyobo/reader_utils.py +++ b/src/pyobo/reader_utils.py @@ -43,7 +43,7 @@ def _chomp_typedef( return None, s try: - stype_curie, rest = (x.strip() for x in s.split(" ", 1)) + synonym_typedef_id, rest = (x.strip() for x in s.split(" ", 1)) except ValueError as e: if "not enough values to unpack" not in str(e): raise @@ -55,14 +55,18 @@ def _chomp_typedef( # if there return None, s - stype_curie, rest = s, "" + synonym_typedef_id, rest = s, "" reference = _parse_identifier( - stype_curie, strict=strict, node=node, ontology_prefix=ontology_prefix, upgrade=upgrade + synonym_typedef_id, + strict=strict, + node=node, + ontology_prefix=ontology_prefix, + upgrade=upgrade, ) if reference is None: logger.warning( - "[%s] unable to parse synonym type `%s` in line %s", node.curie, stype_curie, s + "[%s] unable to parse synonym type `%s` in line %s", node.curie, synonym_typedef_id, s ) return None, rest diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py index 8b376da1..33b377eb 100644 --- a/src/pyobo/struct/reference.py +++ b/src/pyobo/struct/reference.py @@ -5,6 +5,7 @@ from typing import Any import bioontologies.relations +import bioontologies.upgrade import bioregistry import curies from curies import ReferenceTuple @@ -211,14 +212,16 @@ def _parse_identifier( strict: bool = True, node: Reference | None = None, name: str | None = None, - upgrade: bool, + upgrade: bool = True, ) -> Reference | None: """Parse from a CURIE, URI, or default string in the ontology prefix's IDspace.""" if ":" in s: return Reference.from_curie_or_uri( s, ontology_prefix=ontology_prefix, name=name, strict=strict, node=node ) - elif upgrade and (reference := _ground_relation(s)): - return reference - else: - return default_reference(ontology_prefix, s, name=name) + if upgrade: + if xx := bioontologies.upgrade.upgrade(s): + return Reference(prefix=xx.prefix, identifier=xx.identifier, name=name) + if yy := _ground_relation(s): + return Reference(prefix=yy.prefix, identifier=yy.identifier, name=name) + return default_reference(ontology_prefix, s, name=name) diff --git a/tests/test_reader.py b/tests/test_reader.py index 83d2e01f..e8d21a57 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -17,13 +17,15 @@ CHARLIE = Reference(prefix="orcid", identifier="0000-0003-4423-4370") -def _read(text: str, *, strict: bool = True, version: str | None = None) -> Obo: +def _read( + text: str, *, strict: bool = True, version: str | None = None, upgrade: bool = True +) -> Obo: text = dedent(text).strip() io = StringIO() io.write(text) io.seek(0) graph = read_obo(io) - return from_obonet(graph, strict=strict, version=version) + return from_obonet(graph, strict=strict, version=version, upgrade=upgrade) class TestUtils(unittest.TestCase): @@ -832,20 +834,29 @@ def test_synonym_default(self) -> None: def test_synonym_builtin(self) -> None: """Test parsing a synonym with specificity, type, and provenance.""" - ontology = _read("""\ + text = """\ ontology: chebi [Term] id: CHEBI:1234 synonym: "COP" EXACT ABBREVIATION [] - """) + """ + + ontology = _read(text, upgrade=False) + term = self.get_only_term(ontology) + self.assertEqual(1, len(term.synonyms)) + synonym = term.synonyms[0] + self.assertEqual("COP", synonym.name) + self.assertEqual("EXACT", synonym.specificity) + self.assertEqual(DEFAULT_SYNONYM_TYPE.reference, synonym.type) + + ontology = _read(text, upgrade=True) term = self.get_only_term(ontology) self.assertEqual(1, len(term.synonyms)) synonym = term.synonyms[0] self.assertEqual("COP", synonym.name) self.assertEqual("EXACT", synonym.specificity) self.assertEqual(abbreviation.reference, synonym.type) - self.assertEqual(Reference(prefix="OMO", identifier="0003000"), synonym.type) @unittest.skip( reason="This needs to be fixed upstream, since obonet's " From 9473d74763d95d55202e53ca0c22ab7b075d7bf9 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt <cthoyt@gmail.com> Date: Tue, 3 Dec 2024 20:16:17 +0100 Subject: [PATCH 10/10] Update test_reader.py --- tests/test_reader.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_reader.py b/tests/test_reader.py index e8d21a57..6358a9a0 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -12,7 +12,14 @@ from pyobo.reader import from_obonet, get_first_nonescaped_quote from pyobo.struct import default_reference from pyobo.struct.struct import DEFAULT_SYNONYM_TYPE, abbreviation -from pyobo.struct.typedef import TypeDef, exact_match, has_dbxref, is_conjugate_base_of, see_also +from pyobo.struct.typedef import ( + TypeDef, + derives_from, + exact_match, + has_dbxref, + is_conjugate_base_of, + see_also, +) CHARLIE = Reference(prefix="orcid", identifier="0000-0003-4423-4370") @@ -946,7 +953,7 @@ def test_mappings(self) -> None: ) def test_default_relation(self): - """Test parsing DO's weird url prefixing.""" + """Test parsing a default relation.""" ontology = _read("""\ ontology: chebi @@ -956,6 +963,7 @@ def test_default_relation(self): """) term = self.get_only_term(ontology) self.assertEqual(1, len(term.relationships)) + self.assertIn(derives_from.reference, term.relationships) class TestVersionHandling(unittest.TestCase):