Skip to content

Commit

Permalink
Simplify node iteration
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Nov 25, 2024
1 parent c6c09c9 commit d3c9752
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 35 deletions.
42 changes: 12 additions & 30 deletions src/pyobo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,22 +113,6 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo:
f"[{ontology_prefix}] will not accept slash in data version: {data_version}"
)

#: Parsed CURIEs to references (even external ones)
reference_it = (
Reference(
prefix=prefix,
identifier=bioregistry.standardize_identifier(prefix, identifier),
# if name isn't available, it means its external to this ontology
name=data.get("name"),
)
for prefix, identifier, data in _iter_obo_graph(
graph=graph, strict=strict, ontology_prefix=ontology_prefix
)
)
references: Mapping[ReferenceTuple, Reference] = {
reference.pair: reference for reference in reference_it
}

#: CURIEs to typedefs
typedefs: Mapping[ReferenceTuple, TypeDef] = {
typedef.pair: typedef
Expand All @@ -145,18 +129,16 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo:
missing_typedefs: set[ReferenceTuple] = set()
terms = []
n_alt_ids, n_parents, n_synonyms, n_relations, n_properties, n_xrefs = 0, 0, 0, 0, 0, 0
for prefix, identifier, data in _iter_obo_graph(
n_references = 0
for reference, data in _iter_obo_graph(
graph=graph, strict=strict, ontology_prefix=ontology_prefix
):
if prefix != ontology_prefix or not data:
if reference.prefix != ontology_prefix or not data:
continue

identifier = bioregistry.standardize_identifier(prefix, identifier)
reference = references[ReferenceTuple(ontology_prefix, identifier)]
n_references += 1

node_xrefs = list(
iterate_node_xrefs(
prefix=prefix,
data=data,
strict=strict,
ontology_prefix=ontology_prefix,
Expand Down Expand Up @@ -245,7 +227,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo:
terms.append(term)

logger.info(
f"[{ontology_prefix}] got {len(references):,} references, {len(typedefs):,} typedefs, {len(terms):,} terms,"
f"[{ontology_prefix}] got {n_references:,} references, {len(typedefs):,} typedefs, {len(terms):,} terms,"
f" {n_alt_ids:,} alt ids, {n_parents:,} parents, {n_synonyms:,} synonyms, {n_xrefs:,} xrefs,"
f" {n_relations:,} relations, and {n_properties:,} properties",
)
Expand Down Expand Up @@ -282,13 +264,14 @@ def _iter_obo_graph(
*,
strict: bool = True,
ontology_prefix: str | None = None,
) -> Iterable[tuple[str, str, Mapping[str, Any]]]:
) -> Iterable[tuple[Reference, Mapping[str, Any]]]:
"""Iterate over the nodes in the graph with the prefix stripped (if it's there)."""
for node, data in graph.nodes(data=True):
prefix, identifier = normalize_curie(node, strict=strict, ontology_prefix=ontology_prefix)
if prefix is None or identifier is None:
continue
yield prefix, identifier, data
node = Reference.from_curie_or_uri(
node, strict=strict, ontology_prefix=ontology_prefix, name=data.get("name")
)
if node:
yield node, data


def _get_date(graph, ontology_prefix: str) -> datetime | None:
Expand Down Expand Up @@ -698,7 +681,6 @@ def iterate_node_relationships(

def iterate_node_xrefs(
*,
prefix: str,
data: Mapping[str, Any],
strict: bool = True,
ontology_prefix: str | None,
Expand All @@ -717,7 +699,7 @@ def iterate_node_xrefs(
if split_space:
_xref_split = xref.split(" ", 1)
if _xref_split[1][0] not in {'"', "("}:
logger.debug("[%s] Problem with space in xref %s", prefix, xref)
logger.debug("[%s] Problem with space in xref %s", node.curie, xref)
continue
xref = _xref_split[0]

Expand Down
3 changes: 3 additions & 0 deletions src/pyobo/struct/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ def from_curie_or_uri(
)
if prefix is None or identifier is None:
return None

identifier = bioregistry.standardize_identifier(prefix, identifier)

if name is None and auto:
from ..api import get_name

Expand Down
25 changes: 20 additions & 5 deletions tests/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_extract_definition(self):
]:
with self.subTest(s=s):
actual_text, actual_references = _extract_definition(
s, node=Reference(prefix="chebi", identifier="XXX")
s, node=Reference(prefix="chebi", identifier="XXX"), ontology_prefix="chebi"
)
self.assertEqual(expected_text, actual_text)
self.assertEqual(expected_references, actual_references)
Expand All @@ -95,7 +95,10 @@ def test_extract_definition_with_escapes(self):
expected_text = """The canonical 3' splice site has the sequence "AG"."""
s = """"The canonical 3' splice site has the sequence \\"AG\\"." [PMID:1234]"""
actual_text, actual_references = _extract_definition(
s, strict=True, node=Reference(prefix="chebi", identifier="XXX")
s,
strict=True,
node=Reference(prefix="chebi", identifier="XXX"),
ontology_prefix="chebi",
)
self.assertEqual(expected_text, actual_text)
self.assertEqual([Reference(prefix="pubmed", identifier="1234")], actual_references)
Expand Down Expand Up @@ -159,7 +162,10 @@ def test_extract_synonym(self):
]:
with self.subTest(s=text):
actual_synonym = _extract_synonym(
text, synoynym_typedefs, node=Reference(prefix="chebi", identifier="XXX")
text,
synoynym_typedefs,
node=Reference(prefix="chebi", identifier="XXX"),
ontology_prefix="chebi",
)
self.assertIsInstance(actual_synonym, Synonym)
self.assertEqual(expected_synonym, actual_synonym)
Expand All @@ -175,7 +181,10 @@ def test_get_node_synonyms(self):
data = self.graph.nodes["CHEBI:51990"]
synonyms = list(
iterate_node_synonyms(
data, synoynym_typedefs, node=Reference(prefix="chebi", identifier="XXX")
data,
synoynym_typedefs,
node=Reference(prefix="chebi", identifier="XXX"),
ontology_prefix="chebi",
)
)
self.assertEqual(1, len(synonyms))
Expand Down Expand Up @@ -215,7 +224,13 @@ def test_get_node_parents(self):
def test_get_node_xrefs(self):
"""Test getting parents from a node in a :mod:`obonet` graph."""
data = self.graph.nodes["CHEBI:51990"]
xrefs = list(iterate_node_xrefs(prefix="chebi", data=data))
xrefs = list(
iterate_node_xrefs(
data=data,
ontology_prefix="chebi",
node=Reference(prefix="chebi", identifier="51990"),
)
)
self.assertEqual(7, len(xrefs))
# NOTE the prefixes are remapped by Bioregistry
self.assertEqual({"pubmed", "cas", "reaxys"}, {xref.prefix for xref in xrefs})
Expand Down

0 comments on commit d3c9752

Please sign in to comment.