diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index 65f3331f..784a03cb 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -496,41 +496,61 @@ def iterate_node_properties( ) -> Iterable[ObjectProperty | LiteralProperty]: """Extract properties from a :mod:`obonet` node's data.""" for prop_value_type in data.get("property_value", []): - try: - prop, value_type = prop_value_type.split(" ", 1) - except ValueError: - logger.warning("[%s] malformed property_value: %s", node.curie, prop_value_type) - continue + if yv := _handle_prop( + prop_value_type, node=node, strict=strict, ontology_prefix=ontology_prefix + ): + yield yv - prop_reference = _get_prop(prop, node=node, strict=strict, ontology_prefix=ontology_prefix) - if prop_reference is None: - logger.warning("[%s] unparsable property: %s", node.curie, prop) - continue - try: - value, datatype = value_type.rsplit(" ", 1) # second entry is the value type - except ValueError: - logger.warning(f"property missing datatype. defaulting to string - {prop_value_type}") - value = value_type # could assign type to be 'xsd:string' by default - datatype = None +def _handle_prop( + prop_value_type: str, *, node: Reference, strict: bool = True, ontology_prefix: str +) -> ObjectProperty | LiteralProperty | None: + try: + prop, value_type = prop_value_type.split(" ", 1) + except ValueError: + logger.warning("[%s] property_value is missing a space: %s", node.curie, prop_value_type) + return None - if datatype: - datatype = Reference.from_curie( - datatype, strict=strict, ontology_prefix=ontology_prefix, node=node - ) + prop_reference = _get_prop(prop, node=node, strict=strict, ontology_prefix=ontology_prefix) + if prop_reference is None: + logger.warning("[%s] unparsable property: %s", node.curie, prop) + return None - if value.startswith('"'): - # this is a literal value - value = value.strip('"') - yield LiteralProperty(prop_reference, value, datatype) - else: - obj = Reference.from_curie( - value, strict=strict, ontology_prefix=ontology_prefix, node=node + # if the value doesn't start with a quote, we're going to + # assume that it's a reference + if not value_type.startswith('"'): + obj_reference = Reference.from_curie( + value_type, strict=strict, ontology_prefix=ontology_prefix, node=node + ) + if obj_reference is None: + logger.warning( + "[%s:%s] could not parse object: %s", node.curie, prop_reference.curie, value_type ) - if obj is None: - logger.warning("[%s] could not parse object: %s", node.curie, value) - continue - yield ObjectProperty(prop_reference, obj, datatype) + return None + # TODO can we drop datatype from this? + return ObjectProperty(prop_reference, obj_reference, None) + + try: + value, datatype = value_type.rsplit(" ", 1) # second entry is the value type + except ValueError: + logger.warning( + "[%s] property missing datatype. defaulting to string - %s", node.curie, prop_value_type + ) + value = value_type + datatype = "" + + value = value.strip('"') + + if not datatype: + return LiteralProperty(prop_reference, value, Reference(prefix="xsd", identifier="string")) + + datatype_reference = Reference.from_curie( + datatype, strict=strict, ontology_prefix=ontology_prefix, node=node + ) + if datatype_reference is None: + logger.warning("[%s] had unparsable datatype %s", node.curie, prop_value_type) + return None + return LiteralProperty(prop_reference, value, datatype_reference) def _get_prop( diff --git a/tests/test_reader.py b/tests/test_reader.py index 01a9b767..28f4884d 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -258,6 +258,22 @@ def test_property_literal_typed(self) -> None: self.assertEqual("121.323", row["value"]) self.assertEqual("xsd:decimal", row["datatype"]) + def test_property_bad_datatype(self) -> None: + """Test parsing a property with an unparsable datatype.""" + text = """\ + ontology: chebi + + [Term] + id: CHEBI:1234 + property_value: mass "121.323" NOPE:NOPE + """ + with self.assertRaises(ValueError): + _read(text) + ontology = _read(text, strict=False) + term = self.get_only_term(ontology) + self.assertEqual(0, len(term.annotations_literal)) + self.assertEqual(0, len(term.annotations_object)) + def test_property_literal_url_questionable(self) -> None: """Test parsing a property with a literal object.""" ontology = _read("""\ @@ -340,6 +356,24 @@ def test_property_literal_url(self) -> None: self.assertEqual(1, len(list(term.annotations_object))) self.assertEqual("CHEBI:5678", term.get_property(td)) + def test_property_unparsable_object(self) -> None: + """Test when an object can't be parsed.""" + text = """\ + ontology: chebi + + [Term] + id: CHEBI:1234 + property_value: https://w3id.org/biolink/vocab/something NOPE:NOPE + """ + + with self.assertRaises(ValueError): + _read(text) + + ontology = _read(text, strict=False) + term = self.get_only_term(ontology) + self.assertEqual(0, len(list(term.annotations_literal))) + self.assertEqual(0, len(list(term.annotations_object))) + def test_property_literal_url_unregistered(self) -> None: """Test using a full OBO PURL as the property.""" ontology = _read("""\ @@ -368,16 +402,16 @@ def test_property_literal_object(self) -> None: self.assertEqual("hgnc:1234", term.get_property(see_also)) def test_node_unparsable(self) -> None: - """Test loading an ontology with unparsable nodes..""" - ontology = _read( - """\ + """Test loading an ontology with unparsable nodes.""" + text = """\ ontology: chebi [Term] id: nope:1234 - """, - strict=False, - ) + """ + with self.assertRaises(ValueError): + _read(text) + ontology = _read(text, strict=False) self.assertEqual(0, len(list(ontology.iter_terms()))) def test_malformed_typedef(self) -> None: