diff --git a/src/pyobo/identifier_utils.py b/src/pyobo/identifier_utils.py index 7c6f14c2..7c25ac4b 100644 --- a/src/pyobo/identifier_utils.py +++ b/src/pyobo/identifier_utils.py @@ -6,6 +6,7 @@ from functools import wraps from typing import ClassVar +import bioontologies.upgrade import bioregistry from curies import Reference, ReferenceTuple @@ -74,7 +75,7 @@ def normalize_curie( strict: bool = True, ontology_prefix: str | None = None, node: Reference | None = None, -) -> tuple[str, str] | tuple[None, None]: +) -> ReferenceTuple | tuple[None, None]: """Parse a string that looks like a CURIE. :param curie: A compact uniform resource identifier (CURIE) @@ -98,6 +99,9 @@ def normalize_curie( if curie_has_blacklisted_suffix(curie): return None, None + if reference_t := bioontologies.upgrade.upgrade(curie): + return reference_t + if curie.startswith("http:") or curie.startswith("https:"): if reference := parse_iri(curie): return reference.pair @@ -120,7 +124,7 @@ def normalize_curie( norm_node_prefix = bioregistry.normalize_prefix(prefix) if norm_node_prefix: - return norm_node_prefix, identifier + return ReferenceTuple(norm_node_prefix, identifier) elif strict: raise MissingPrefixError(curie=curie, ontology_prefix=ontology_prefix, node=node) else: diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index 4864c059..05b8ee2f 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -9,7 +9,6 @@ from pathlib import Path from typing import Any -import bioontologies.upgrade import bioregistry import networkx as nx from curies import ReferenceTuple @@ -17,7 +16,6 @@ from tqdm.auto import tqdm from .constants import DATE_FORMAT, PROVENANCE_PREFIXES -from .identifier_utils import normalize_curie from .registries import curie_has_blacklisted_prefix, curie_is_blacklisted, remap_prefix from .struct import ( Obo, @@ -42,8 +40,6 @@ logger = logging.getLogger(__name__) -RELATION_REMAPPINGS: Mapping[str, ReferenceTuple] = bioontologies.upgrade.load() - def from_obo_path( path: str | Path, @@ -363,7 +359,9 @@ def iterate_graph_synonym_typedefs( if reference is not None: yield SynonymTypeDef(reference=reference) elif strict: - raise ValueError(f"Could not parse {sid}") + raise ValueError( + f"[{ontology_prefix}] could not parse synonym type definition: {sid}" + ) else: continue @@ -657,15 +655,7 @@ def _get_prop( if prop.startswith(sw): identifier = prop.removeprefix(sw) return default_reference(ontology_prefix, identifier) - if prop.startswith("http"): - # TODO upstream this into an omni-parser for references? - _pref, _id = bioregistry.parse_iri(prop) - if _pref and _id: - return Reference(prefix=_pref, identifier=_id) - else: - logger.warning("[%s] unable to handle property: %s", node.curie, prop) - return None - elif ":" not in prop: + if ":" not in prop: return default_reference(ontology_prefix, prop) else: return Reference.from_curie_or_uri( @@ -713,16 +703,11 @@ def iterate_node_relationships( """Extract relationships from a :mod:`obonet` node's data.""" for s in data.get("relationship", []): relation_curie, target_curie = s.split(" ") - relation_prefix: str | None - relation_identifier: str | None - if relation_curie in RELATION_REMAPPINGS: - relation_prefix, relation_identifier = RELATION_REMAPPINGS[relation_curie] - else: - relation_prefix, relation_identifier = normalize_curie( + + if ":" in relation_curie: + relation = Reference.from_curie_or_uri( relation_curie, strict=strict, ontology_prefix=ontology_prefix, node=node ) - if relation_prefix is not None and relation_identifier is not None: - relation = Reference(prefix=relation_prefix, identifier=relation_identifier) else: relation = default_reference(ontology_prefix, relation_curie) logger.debug( @@ -730,6 +715,9 @@ def iterate_node_relationships( relation_curie, relation.curie, ) + if relation is None: + logger.warning("[%s] could not parse relation %s", node.curie, relation_curie) + continue target = Reference.from_curie_or_uri( target_curie, strict=strict, ontology_prefix=ontology_prefix, node=node diff --git a/tests/test_reader.py b/tests/test_reader.py index aacd5851..026741c8 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -8,6 +8,7 @@ from obonet import read_obo from pyobo import Obo, Reference, Term +from pyobo.identifier_utils import UnparsableIRIError from pyobo.reader import from_obonet, get_first_nonescaped_quote from pyobo.struct import default_reference from pyobo.struct.struct import DEFAULT_SYNONYM_TYPE @@ -416,13 +417,29 @@ def test_property_unparsable_object(self) -> None: def test_property_literal_url_unregistered(self) -> None: """Test using a full OBO PURL as the property.""" - ontology = _read("""\ + with self.assertRaises(UnparsableIRIError): + _read( + """\ + ontology: chebi + + [Term] + id: CHEBI:1234 + property_value: https://example.com/nope/nope CHEBI:5678 + """, + strict=True, + ) + + ontology = _read( + """\ ontology: chebi [Term] id: CHEBI:1234 property_value: https://example.com/nope/nope CHEBI:5678 - """) + """, + strict=False, + ) + term = self.get_only_term(ontology) self.assertEqual(0, len(list(term.annotations_literal))) self.assertEqual(0, len(list(term.annotations_object)))