From 86557b0e24c3de6089bdea64d90831bf0b4acf21 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Sun, 24 Nov 2024 13:34:25 +0100 Subject: [PATCH] Reader bug fixes - parent parsing and empty quotes (#245) --- src/pyobo/reader.py | 24 ++++++++++++++++++------ tests/test_get.py | 6 +++++- tests/test_reader.py | 25 +++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 7 deletions(-) diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index afe52d35..c15dfb44 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -165,7 +165,11 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo: alt_ids = list(iterate_node_alt_ids(data, strict=strict)) n_alt_ids += len(alt_ids) - parents = list(iterate_node_parents(data, node=reference, strict=strict)) + parents = list( + iterate_node_parents( + data, node=reference, strict=strict, ontology_prefix=ontology_prefix + ) + ) n_parents += len(parents) synonyms = list( @@ -377,6 +381,8 @@ def _extract_definition( def get_first_nonescaped_quote(s: str) -> int | None: """Get the first non-escaped quote.""" + if not s: + return None if s[0] == '"': # special case first position return 0 @@ -523,7 +529,7 @@ def _handle_prop( ) if obj_reference is None: logger.warning( - "[%s:%s] could not parse object: %s", node.curie, prop_reference.curie, value_type + "[%s - %s] could not parse object: %s", node.curie, prop_reference.curie, value_type ) return None # TODO can we drop datatype from this? @@ -579,10 +585,13 @@ def iterate_node_parents( *, node: Reference, strict: bool = True, + ontology_prefix: str, ) -> Iterable[Reference]: """Extract parents from a :mod:`obonet` node's data.""" for parent_curie in data.get("is_a", []): - reference = Reference.from_curie(parent_curie, strict=strict) + reference = Reference.from_curie( + parent_curie, strict=strict, ontology_prefix=ontology_prefix, node=node + ) if reference is None: logger.warning("[%s] could not parse parent curie: %s", node.curie, parent_curie) continue @@ -612,7 +621,9 @@ def iterate_node_relationships( if relation_curie in RELATION_REMAPPINGS: relation_prefix, relation_identifier = RELATION_REMAPPINGS[relation_curie] else: - relation_prefix, relation_identifier = normalize_curie(relation_curie, strict=strict) + relation_prefix, relation_identifier = normalize_curie( + relation_curie, strict=strict, ontology_prefix=ontology_prefix, node=node + ) if relation_prefix is not None and relation_identifier is not None: relation = Reference(prefix=relation_prefix, identifier=relation_identifier) else: @@ -623,8 +634,9 @@ def iterate_node_relationships( relation.curie, ) - # TODO replace with omni-parser from :mod:`curies` - target = Reference.from_curie(target_curie, strict=strict) + target = Reference.from_curie( + target_curie, strict=strict, ontology_prefix=ontology_prefix, node=node + ) if target is None: logger.warning("[%s] %s could not parse target %s", node.curie, relation, target_curie) continue diff --git a/tests/test_get.py b/tests/test_get.py index 86587025..2bcb615a 100644 --- a/tests/test_get.py +++ b/tests/test_get.py @@ -203,7 +203,11 @@ def test_get_node_properties(self): def test_get_node_parents(self): """Test getting parents from a node in a :mod:`obonet` graph.""" data = self.graph.nodes["CHEBI:51990"] - parents = list(iterate_node_parents(data, node=Reference(prefix="chebi", identifier="XXX"))) + parents = list( + iterate_node_parents( + data, node=Reference(prefix="chebi", identifier="XXX"), ontology_prefix="chebi" + ) + ) self.assertEqual(2, len(parents)) self.assertEqual({"24060", "51992"}, {parent.identifier for parent in parents}) self.assertEqual({"chebi"}, {parent.prefix for parent in parents}) diff --git a/tests/test_reader.py b/tests/test_reader.py index eb3476fc..6b8c792a 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -30,6 +30,7 @@ class TestUtils(unittest.TestCase): def test_first_nonescaped_quote(self): """Test finding the first non-escaped double quote.""" + self.assertIsNone(get_first_nonescaped_quote("")) self.assertEqual(0, get_first_nonescaped_quote('"')) self.assertEqual(0, get_first_nonescaped_quote('"abc')) self.assertEqual(0, get_first_nonescaped_quote('"abc"')) @@ -726,3 +727,27 @@ def test_synonym_url(self) -> None: ], synonym.provenance, ) + + def test_parent(self) -> None: + """Test parsing out a parent.""" + ontology = _read("""\ + ontology: chebi + date: 20:11:2024 18:44 + + [Term] + id: CHEBI:1234 + is_a: CHEBI:5678 + """) + term = self.get_only_term(ontology) + self.assertEqual([Reference(prefix="CHEBI", identifier="5678")], term.parents) + + ontology = _read("""\ + ontology: chebi + date: 20:11:2024 18:44 + + [Term] + id: CHEBI:1234 + is_a: http://purl.obolibrary.org/obo/CHEBI_5678 + """) + term = self.get_only_term(ontology) + self.assertEqual([Reference(prefix="CHEBI", identifier="5678")], term.parents)