Skip to content

Commit

Permalink
Streamline property parsing (#244)
Browse files Browse the repository at this point in the history
1. refactor into own function to reduce code complexity
2. change order to reduce warnings that didn't make sense (don't say
there's an issue with no datatype when parsing a reference)
  • Loading branch information
cthoyt authored Nov 23, 2024
1 parent 0f99f7a commit 3a4fadc
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 36 deletions.
80 changes: 50 additions & 30 deletions src/pyobo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,41 +496,61 @@ def iterate_node_properties(
) -> Iterable[ObjectProperty | LiteralProperty]:
"""Extract properties from a :mod:`obonet` node's data."""
for prop_value_type in data.get("property_value", []):
try:
prop, value_type = prop_value_type.split(" ", 1)
except ValueError:
logger.warning("[%s] malformed property_value: %s", node.curie, prop_value_type)
continue
if yv := _handle_prop(
prop_value_type, node=node, strict=strict, ontology_prefix=ontology_prefix
):
yield yv

prop_reference = _get_prop(prop, node=node, strict=strict, ontology_prefix=ontology_prefix)
if prop_reference is None:
logger.warning("[%s] unparsable property: %s", node.curie, prop)
continue

try:
value, datatype = value_type.rsplit(" ", 1) # second entry is the value type
except ValueError:
logger.warning(f"property missing datatype. defaulting to string - {prop_value_type}")
value = value_type # could assign type to be 'xsd:string' by default
datatype = None
def _handle_prop(
prop_value_type: str, *, node: Reference, strict: bool = True, ontology_prefix: str
) -> ObjectProperty | LiteralProperty | None:
try:
prop, value_type = prop_value_type.split(" ", 1)
except ValueError:
logger.warning("[%s] property_value is missing a space: %s", node.curie, prop_value_type)
return None

if datatype:
datatype = Reference.from_curie(
datatype, strict=strict, ontology_prefix=ontology_prefix, node=node
)
prop_reference = _get_prop(prop, node=node, strict=strict, ontology_prefix=ontology_prefix)
if prop_reference is None:
logger.warning("[%s] unparsable property: %s", node.curie, prop)
return None

if value.startswith('"'):
# this is a literal value
value = value.strip('"')
yield LiteralProperty(prop_reference, value, datatype)
else:
obj = Reference.from_curie(
value, strict=strict, ontology_prefix=ontology_prefix, node=node
# if the value doesn't start with a quote, we're going to
# assume that it's a reference
if not value_type.startswith('"'):
obj_reference = Reference.from_curie(
value_type, strict=strict, ontology_prefix=ontology_prefix, node=node
)
if obj_reference is None:
logger.warning(
"[%s:%s] could not parse object: %s", node.curie, prop_reference.curie, value_type
)
if obj is None:
logger.warning("[%s] could not parse object: %s", node.curie, value)
continue
yield ObjectProperty(prop_reference, obj, datatype)
return None
# TODO can we drop datatype from this?
return ObjectProperty(prop_reference, obj_reference, None)

try:
value, datatype = value_type.rsplit(" ", 1) # second entry is the value type
except ValueError:
logger.warning(
"[%s] property missing datatype. defaulting to string - %s", node.curie, prop_value_type
)
value = value_type
datatype = ""

value = value.strip('"')

if not datatype:
return LiteralProperty(prop_reference, value, Reference(prefix="xsd", identifier="string"))

datatype_reference = Reference.from_curie(
datatype, strict=strict, ontology_prefix=ontology_prefix, node=node
)
if datatype_reference is None:
logger.warning("[%s] had unparsable datatype %s", node.curie, prop_value_type)
return None
return LiteralProperty(prop_reference, value, datatype_reference)


def _get_prop(
Expand Down
46 changes: 40 additions & 6 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,22 @@ def test_property_literal_typed(self) -> None:
self.assertEqual("121.323", row["value"])
self.assertEqual("xsd:decimal", row["datatype"])

def test_property_bad_datatype(self) -> None:
"""Test parsing a property with an unparsable datatype."""
text = """\
ontology: chebi
[Term]
id: CHEBI:1234
property_value: mass "121.323" NOPE:NOPE
"""
with self.assertRaises(ValueError):
_read(text)
ontology = _read(text, strict=False)
term = self.get_only_term(ontology)
self.assertEqual(0, len(term.annotations_literal))
self.assertEqual(0, len(term.annotations_object))

def test_property_literal_url_questionable(self) -> None:
"""Test parsing a property with a literal object."""
ontology = _read("""\
Expand Down Expand Up @@ -340,6 +356,24 @@ def test_property_literal_url(self) -> None:
self.assertEqual(1, len(list(term.annotations_object)))
self.assertEqual("CHEBI:5678", term.get_property(td))

def test_property_unparsable_object(self) -> None:
"""Test when an object can't be parsed."""
text = """\
ontology: chebi
[Term]
id: CHEBI:1234
property_value: https://w3id.org/biolink/vocab/something NOPE:NOPE
"""

with self.assertRaises(ValueError):
_read(text)

ontology = _read(text, strict=False)
term = self.get_only_term(ontology)
self.assertEqual(0, len(list(term.annotations_literal)))
self.assertEqual(0, len(list(term.annotations_object)))

def test_property_literal_url_unregistered(self) -> None:
"""Test using a full OBO PURL as the property."""
ontology = _read("""\
Expand Down Expand Up @@ -368,16 +402,16 @@ def test_property_literal_object(self) -> None:
self.assertEqual("hgnc:1234", term.get_property(see_also))

def test_node_unparsable(self) -> None:
"""Test loading an ontology with unparsable nodes.."""
ontology = _read(
"""\
"""Test loading an ontology with unparsable nodes."""
text = """\
ontology: chebi
[Term]
id: nope:1234
""",
strict=False,
)
"""
with self.assertRaises(ValueError):
_read(text)
ontology = _read(text, strict=False)
self.assertEqual(0, len(list(ontology.iter_terms())))

def test_malformed_typedef(self) -> None:
Expand Down

0 comments on commit 3a4fadc

Please sign in to comment.