Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for terms and ontology reading #235

Merged
merged 7 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 30 additions & 24 deletions src/pyobo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@
RELATION_REMAPPINGS: Mapping[str, ReferenceTuple] = bioontologies.upgrade.load()


def from_obo_path(
path: str | Path, prefix: str | None = None, *, strict: bool = True, **kwargs
) -> Obo:
def from_obo_path(path: str | Path, prefix: str | None = None, *, strict: bool = True) -> Obo:
"""Get the OBO graph from a path."""
import obonet

Expand All @@ -67,7 +65,7 @@ def from_obo_path(
_clean_graph_ontology(graph, prefix)

# Convert to an Obo instance and return
return from_obonet(graph, strict=strict, **kwargs)
return from_obonet(graph, strict=strict)


def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo:
Expand Down Expand Up @@ -207,7 +205,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo:
continue
n_relations += 1
term.append_relationship(typedef, reference)
for prop, value in iterate_node_properties(data, term=term):
for prop, value, _is_literal in iterate_node_properties(data, term=term):
n_properties += 1
term.append_property(prop, value)
terms.append(term)
Expand Down Expand Up @@ -315,7 +313,7 @@ def iterate_graph_typedefs(
elif "identifier" in typedef:
curie = typedef["identifier"]
else:
raise KeyError
raise KeyError("typedef is missing an `id`")

name = typedef.get("name")
if name is None:
Expand Down Expand Up @@ -353,23 +351,30 @@ def _extract_definition(
) -> tuple[None, None] | tuple[str, list[Reference]]:
"""Extract the definitions."""
if not s.startswith('"'):
raise ValueError("definition does not start with a quote")
logger.warning(f"[{node.curie}] definition does not start with a quote")
return None, None

try:
definition, rest = _quote_split(s)
except ValueError:
logger.warning("[%s] could not parse definition: %s", node.curie, s)
except ValueError as e:
logger.warning("[%s] failed to parse definition quotes: %s", node.curie, str(e))
return None, None

if not rest.startswith("[") or not rest.endswith("]"):
logger.warning("[%s] problem with definition: %s", node.curie, s)
logger.warning(
"[%s] missing square brackets in rest of: %s (rest = `%s`)", node.curie, s, rest
)
provenance = []
else:
provenance = _parse_trailing_ref_list(rest, strict=strict, node=node)
return definition, provenance


def _get_first_nonquoted(s: str) -> int | None:
def get_first_nonescaped_quote(s: str) -> int | None:
"""Get the first non-escaped quote."""
if s[0] == '"':
# special case first position
return 0
for i, (a, b) in enumerate(pairwise(s), start=1):
if b == '"' and a != "\\":
return i
Expand All @@ -378,9 +383,9 @@ def _get_first_nonquoted(s: str) -> int | None:

def _quote_split(s: str) -> tuple[str, str]:
s = s.lstrip('"')
i = _get_first_nonquoted(s)
i = get_first_nonescaped_quote(s)
if i is None:
raise ValueError
raise ValueError(f"no closing quote found in `{s}`")
return _clean_definition(s[:i].strip()), s[i + 1 :].strip()


Expand Down Expand Up @@ -431,10 +436,9 @@ def _extract_synonym(
break

if not rest.startswith("[") or not rest.endswith("]"):
logger.warning("[%s] problem with synonym: %s", node.curie, s)
return None

provenance = _parse_trailing_ref_list(rest, strict=strict, node=node)
provenance = []
else:
provenance = _parse_trailing_ref_list(rest, strict=strict, node=node)
return Synonym(
name=name,
specificity=specificity or "EXACT",
Expand Down Expand Up @@ -480,25 +484,27 @@ def iterate_node_synonyms(


def iterate_node_properties(
data: Mapping[str, Any], *, property_prefix: str | None = None, term=None
) -> Iterable[tuple[str, str]]:
data: Mapping[str, Any], *, term=None
) -> Iterable[tuple[str, str, bool]]:
"""Extract properties from a :mod:`obonet` node's data."""
for prop_value_type in data.get("property_value", []):
try:
prop, value_type = prop_value_type.split(" ", 1)
except ValueError:
logger.info("malformed property: %s on %s", prop_value_type, term and term.curie)
continue
if property_prefix is not None and prop.startswith(property_prefix):
prop = prop[len(property_prefix) :]

try:
value, _ = value_type.rsplit(" ", 1) # second entry is the value type
except ValueError:
# logger.debug(f'property missing datatype. defaulting to string - {prop_value_type}')
value = value_type # could assign type to be 'xsd:string' by default
value = value.strip('"')
yield prop, value

if value.startswith('"'):
# this is a literal value
value = value.strip('"')
yield prop, value, True
else:
yield prop, value, False


def iterate_node_parents(
Expand Down
34 changes: 25 additions & 9 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,17 +268,26 @@ def append_synonym(
*,
type: SynonymTypeDef | None = None,
specificity: SynonymSpecificity | None = None,
provenance: list[Reference] | None = None,
) -> None:
"""Add a synonym."""
if isinstance(synonym, str):
synonym = Synonym(
synonym, type=type or DEFAULT_SYNONYM_TYPE, specificity=specificity or "EXACT"
synonym,
type=type or DEFAULT_SYNONYM_TYPE,
specificity=specificity or "EXACT",
provenance=provenance or [],
)
self.synonyms.append(synonym)

def append_alt(self, alt: str | Reference) -> None:
"""Add an alternative identifier."""
if isinstance(alt, str):
warnings.warn(
"use fully qualified reference when appending alts",
DeprecationWarning,
stacklevel=2,
)
alt = Reference(prefix=self.prefix, identifier=alt)
self.alt_ids.append(alt)

Expand All @@ -293,7 +302,7 @@ def append_see_also(self, reference: ReferenceHint) -> Self:
# a literal string. otherwise, raise the error again
if isinstance(reference, str):
return self.annotate_literal(see_also, reference)
raise
raise RuntimeError from None # this shouldn't/can't happen?
else:
return self.annotate_object(see_also, _reference)

Expand Down Expand Up @@ -321,6 +330,10 @@ def extend_parents(self, references: Collection[Reference]) -> None:

def get_properties(self, prop) -> list[str]:
"""Get properties from the given key."""
# FIXME this instance check should be removed when
# improving property handling
if isinstance(prop, curies.Reference | Referenced):
prop = prop.curie
return self.properties[prop]

def get_property(self, prop) -> str | None:
Expand Down Expand Up @@ -349,7 +362,6 @@ def append_exact_match(self, reference: ReferenceHint) -> Self:
"""Append an exact match, also adding an xref."""
reference = _ensure_ref(reference)
self.annotate_object(exact_match, reference)
self.append_xref(reference)
return self

def append_xref(self, reference: ReferenceHint) -> None:
Expand Down Expand Up @@ -458,6 +470,11 @@ def iterate_obo_lines(

if self.definition:
yield f"def: {self._definition_fp()}"
elif self.provenance:
logger.warning("%s has provenance but no definition, can't write", self.curie)

for alt in sorted(self.alt_ids):
yield f"alt_id: {alt}" # __str__ bakes in the ! name

for xref in sorted(self.xrefs):
yield f"xref: {xref}" # __str__ bakes in the ! name
Expand All @@ -479,17 +496,16 @@ def iterate_obo_lines(
def _emit_relations(
self, ontology_prefix: str, typedefs: dict[ReferenceTuple, TypeDef]
) -> Iterable[str]:
for typedef, references in sorted(self.relationships.items()):
for typedef, reference in self.iterate_relations():
_typedef_warn(ontology_prefix, typedef.reference, typedefs)
for reference in sorted(references):
s = f"relationship: {typedef.preferred_curie} {reference.preferred_curie}"
if typedef.name or reference.name:
s += " !"
s = f"relationship: {typedef.preferred_curie} {reference.preferred_curie}"
if typedef.name or reference.name:
s += " !"
if typedef.name:
s += f" {typedef.name}"
if reference.name:
s += f" {reference.name}"
yield s
yield s

def _emit_properties(self, typedefs: dict[ReferenceTuple, TypeDef]) -> Iterable[str]:
for prop, value in sorted(self.iterate_properties(), key=_sort_properties):
Expand Down
6 changes: 3 additions & 3 deletions tests/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,9 @@ def test_get_node_properties(self):
data = self.graph.nodes["CHEBI:51990"]
properties = list(iterate_node_properties(data))
t_prop = "http://purl.obolibrary.org/obo/chebi/monoisotopicmass"
self.assertIn(t_prop, {prop for prop, value in properties})
self.assertEqual(1, sum(prop == t_prop for prop, value in properties))
value = next(value for prop, value in properties if prop == t_prop)
self.assertIn(t_prop, {prop for prop, value, _ in properties})
self.assertEqual(1, sum(prop == t_prop for prop, value, _ in properties))
value = next(value for prop, value, _ in properties if prop == t_prop)
self.assertEqual("261.28318", value)

def test_get_node_parents(self):
Expand Down
Loading