Skip to content

Commit

Permalink
Reader bug fixes - parent parsing and empty quotes (#245)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Nov 24, 2024
1 parent d2d8d3d commit 86557b0
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 7 deletions.
24 changes: 18 additions & 6 deletions src/pyobo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,11 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo:
alt_ids = list(iterate_node_alt_ids(data, strict=strict))
n_alt_ids += len(alt_ids)

parents = list(iterate_node_parents(data, node=reference, strict=strict))
parents = list(
iterate_node_parents(
data, node=reference, strict=strict, ontology_prefix=ontology_prefix
)
)
n_parents += len(parents)

synonyms = list(
Expand Down Expand Up @@ -377,6 +381,8 @@ def _extract_definition(

def get_first_nonescaped_quote(s: str) -> int | None:
"""Get the first non-escaped quote."""
if not s:
return None
if s[0] == '"':
# special case first position
return 0
Expand Down Expand Up @@ -523,7 +529,7 @@ def _handle_prop(
)
if obj_reference is None:
logger.warning(
"[%s:%s] could not parse object: %s", node.curie, prop_reference.curie, value_type
"[%s - %s] could not parse object: %s", node.curie, prop_reference.curie, value_type
)
return None
# TODO can we drop datatype from this?
Expand Down Expand Up @@ -579,10 +585,13 @@ def iterate_node_parents(
*,
node: Reference,
strict: bool = True,
ontology_prefix: str,
) -> Iterable[Reference]:
"""Extract parents from a :mod:`obonet` node's data."""
for parent_curie in data.get("is_a", []):
reference = Reference.from_curie(parent_curie, strict=strict)
reference = Reference.from_curie(
parent_curie, strict=strict, ontology_prefix=ontology_prefix, node=node
)
if reference is None:
logger.warning("[%s] could not parse parent curie: %s", node.curie, parent_curie)
continue
Expand Down Expand Up @@ -612,7 +621,9 @@ def iterate_node_relationships(
if relation_curie in RELATION_REMAPPINGS:
relation_prefix, relation_identifier = RELATION_REMAPPINGS[relation_curie]
else:
relation_prefix, relation_identifier = normalize_curie(relation_curie, strict=strict)
relation_prefix, relation_identifier = normalize_curie(
relation_curie, strict=strict, ontology_prefix=ontology_prefix, node=node
)
if relation_prefix is not None and relation_identifier is not None:
relation = Reference(prefix=relation_prefix, identifier=relation_identifier)
else:
Expand All @@ -623,8 +634,9 @@ def iterate_node_relationships(
relation.curie,
)

# TODO replace with omni-parser from :mod:`curies`
target = Reference.from_curie(target_curie, strict=strict)
target = Reference.from_curie(
target_curie, strict=strict, ontology_prefix=ontology_prefix, node=node
)
if target is None:
logger.warning("[%s] %s could not parse target %s", node.curie, relation, target_curie)
continue
Expand Down
6 changes: 5 additions & 1 deletion tests/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,11 @@ def test_get_node_properties(self):
def test_get_node_parents(self):
"""Test getting parents from a node in a :mod:`obonet` graph."""
data = self.graph.nodes["CHEBI:51990"]
parents = list(iterate_node_parents(data, node=Reference(prefix="chebi", identifier="XXX")))
parents = list(
iterate_node_parents(
data, node=Reference(prefix="chebi", identifier="XXX"), ontology_prefix="chebi"
)
)
self.assertEqual(2, len(parents))
self.assertEqual({"24060", "51992"}, {parent.identifier for parent in parents})
self.assertEqual({"chebi"}, {parent.prefix for parent in parents})
Expand Down
25 changes: 25 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class TestUtils(unittest.TestCase):

def test_first_nonescaped_quote(self):
"""Test finding the first non-escaped double quote."""
self.assertIsNone(get_first_nonescaped_quote(""))
self.assertEqual(0, get_first_nonescaped_quote('"'))
self.assertEqual(0, get_first_nonescaped_quote('"abc'))
self.assertEqual(0, get_first_nonescaped_quote('"abc"'))
Expand Down Expand Up @@ -726,3 +727,27 @@ def test_synonym_url(self) -> None:
],
synonym.provenance,
)

def test_parent(self) -> None:
"""Test parsing out a parent."""
ontology = _read("""\
ontology: chebi
date: 20:11:2024 18:44
[Term]
id: CHEBI:1234
is_a: CHEBI:5678
""")
term = self.get_only_term(ontology)
self.assertEqual([Reference(prefix="CHEBI", identifier="5678")], term.parents)

ontology = _read("""\
ontology: chebi
date: 20:11:2024 18:44
[Term]
id: CHEBI:1234
is_a: http://purl.obolibrary.org/obo/CHEBI_5678
""")
term = self.get_only_term(ontology)
self.assertEqual([Reference(prefix="CHEBI", identifier="5678")], term.parents)

0 comments on commit 86557b0

Please sign in to comment.