biopragmatics · cthoyt · Nov 21, 2024 · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024
diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py
@@ -44,9 +44,7 @@
 RELATION_REMAPPINGS: Mapping[str, ReferenceTuple] = bioontologies.upgrade.load()
 
 
-def from_obo_path(
-    path: str | Path, prefix: str | None = None, *, strict: bool = True, **kwargs
-) -> Obo:
+def from_obo_path(path: str | Path, prefix: str | None = None, *, strict: bool = True) -> Obo:
     """Get the OBO graph from a path."""
     import obonet
 
@@ -67,7 +65,7 @@ def from_obo_path(
         _clean_graph_ontology(graph, prefix)
 
     # Convert to an Obo instance and return
-    return from_obonet(graph, strict=strict, **kwargs)
+    return from_obonet(graph, strict=strict)
 
 
 def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo:
@@ -207,7 +205,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo:
                 continue
             n_relations += 1
             term.append_relationship(typedef, reference)
-        for prop, value in iterate_node_properties(data, term=term):
+        for prop, value, _is_literal in iterate_node_properties(data, term=term):
             n_properties += 1
             term.append_property(prop, value)
         terms.append(term)
@@ -315,7 +313,7 @@ def iterate_graph_typedefs(
         elif "identifier" in typedef:
             curie = typedef["identifier"]
         else:
-            raise KeyError
+            raise KeyError("typedef is missing an `id`")
 
         name = typedef.get("name")
         if name is None:
@@ -353,23 +351,30 @@ def _extract_definition(
 ) -> tuple[None, None] | tuple[str, list[Reference]]:
     """Extract the definitions."""
     if not s.startswith('"'):
-        raise ValueError("definition does not start with a quote")
+        logger.warning(f"[{node.curie}] definition does not start with a quote")
+        return None, None
 
     try:
         definition, rest = _quote_split(s)
-    except ValueError:
-        logger.warning("[%s] could not parse definition: %s", node.curie, s)
+    except ValueError as e:
+        logger.warning("[%s] failed to parse definition quotes: %s", node.curie, str(e))
         return None, None
 
     if not rest.startswith("[") or not rest.endswith("]"):
-        logger.warning("[%s] problem with definition: %s", node.curie, s)
+        logger.warning(
+            "[%s] missing square brackets in rest of: %s (rest = `%s`)", node.curie, s, rest
+        )
         provenance = []
     else:
         provenance = _parse_trailing_ref_list(rest, strict=strict, node=node)
     return definition, provenance
 
 
-def _get_first_nonquoted(s: str) -> int | None:
+def get_first_nonescaped_quote(s: str) -> int | None:
+    """Get the first non-escaped quote."""
+    if s[0] == '"':
+        # special case first position
+        return 0
     for i, (a, b) in enumerate(pairwise(s), start=1):
         if b == '"' and a != "\\":
             return i
@@ -378,9 +383,9 @@ def _get_first_nonquoted(s: str) -> int | None:
 
 def _quote_split(s: str) -> tuple[str, str]:
     s = s.lstrip('"')
-    i = _get_first_nonquoted(s)
+    i = get_first_nonescaped_quote(s)
     if i is None:
-        raise ValueError
+        raise ValueError(f"no closing quote found in `{s}`")
     return _clean_definition(s[:i].strip()), s[i + 1 :].strip()
 
 
@@ -431,10 +436,9 @@ def _extract_synonym(
             break
 
     if not rest.startswith("[") or not rest.endswith("]"):
-        logger.warning("[%s] problem with synonym: %s", node.curie, s)
-        return None
-
-    provenance = _parse_trailing_ref_list(rest, strict=strict, node=node)
+        provenance = []
+    else:
+        provenance = _parse_trailing_ref_list(rest, strict=strict, node=node)
     return Synonym(
         name=name,
         specificity=specificity or "EXACT",
@@ -480,25 +484,27 @@ def iterate_node_synonyms(
 
 
 def iterate_node_properties(
-    data: Mapping[str, Any], *, property_prefix: str | None = None, term=None
-) -> Iterable[tuple[str, str]]:
+    data: Mapping[str, Any], *, term=None
+) -> Iterable[tuple[str, str, bool]]:
     """Extract properties from a :mod:`obonet` node's data."""
     for prop_value_type in data.get("property_value", []):
         try:
             prop, value_type = prop_value_type.split(" ", 1)
         except ValueError:
             logger.info("malformed property: %s on %s", prop_value_type, term and term.curie)
             continue
-        if property_prefix is not None and prop.startswith(property_prefix):
-            prop = prop[len(property_prefix) :]
-
         try:
             value, _ = value_type.rsplit(" ", 1)  # second entry is the value type
         except ValueError:
             # logger.debug(f'property missing datatype. defaulting to string - {prop_value_type}')
             value = value_type  # could assign type to be 'xsd:string' by default
-        value = value.strip('"')
-        yield prop, value
+
+        if value.startswith('"'):
+            # this is a literal value
+            value = value.strip('"')
+            yield prop, value, True
+        else:
+            yield prop, value, False
 
 
 def iterate_node_parents(

diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py
@@ -268,17 +268,26 @@ def append_synonym(
         *,
         type: SynonymTypeDef | None = None,
         specificity: SynonymSpecificity | None = None,
+        provenance: list[Reference] | None = None,
     ) -> None:
         """Add a synonym."""
         if isinstance(synonym, str):
             synonym = Synonym(
-                synonym, type=type or DEFAULT_SYNONYM_TYPE, specificity=specificity or "EXACT"
+                synonym,
+                type=type or DEFAULT_SYNONYM_TYPE,
+                specificity=specificity or "EXACT",
+                provenance=provenance or [],
             )
         self.synonyms.append(synonym)
 
     def append_alt(self, alt: str | Reference) -> None:
         """Add an alternative identifier."""
         if isinstance(alt, str):
+            warnings.warn(
+                "use fully qualified reference when appending alts",
+                DeprecationWarning,
+                stacklevel=2,
+            )
             alt = Reference(prefix=self.prefix, identifier=alt)
         self.alt_ids.append(alt)
 
@@ -293,7 +302,7 @@ def append_see_also(self, reference: ReferenceHint) -> Self:
             # a literal string. otherwise, raise the error again
             if isinstance(reference, str):
                 return self.annotate_literal(see_also, reference)
-            raise
+            raise RuntimeError from None  # this shouldn't/can't happen?
         else:
             return self.annotate_object(see_also, _reference)
 
@@ -321,6 +330,10 @@ def extend_parents(self, references: Collection[Reference]) -> None:
 
     def get_properties(self, prop) -> list[str]:
         """Get properties from the given key."""
+        # FIXME this instance check should be removed when
+        # improving property handling
+        if isinstance(prop, curies.Reference | Referenced):
+            prop = prop.curie
         return self.properties[prop]
 
     def get_property(self, prop) -> str | None:
@@ -349,7 +362,6 @@ def append_exact_match(self, reference: ReferenceHint) -> Self:
         """Append an exact match, also adding an xref."""
         reference = _ensure_ref(reference)
         self.annotate_object(exact_match, reference)
-        self.append_xref(reference)
         return self
 
     def append_xref(self, reference: ReferenceHint) -> None:
@@ -458,6 +470,11 @@ def iterate_obo_lines(
 
         if self.definition:
             yield f"def: {self._definition_fp()}"
+        elif self.provenance:
+            logger.warning("%s has provenance but no definition, can't write", self.curie)
+
+        for alt in sorted(self.alt_ids):
+            yield f"alt_id: {alt}"  # __str__ bakes in the ! name
 
         for xref in sorted(self.xrefs):
             yield f"xref: {xref}"  # __str__ bakes in the ! name
@@ -479,17 +496,16 @@ def iterate_obo_lines(
     def _emit_relations(
         self, ontology_prefix: str, typedefs: dict[ReferenceTuple, TypeDef]
     ) -> Iterable[str]:
-        for typedef, references in sorted(self.relationships.items()):
+        for typedef, reference in self.iterate_relations():
             _typedef_warn(ontology_prefix, typedef.reference, typedefs)
-            for reference in sorted(references):
-                s = f"relationship: {typedef.preferred_curie} {reference.preferred_curie}"
-                if typedef.name or reference.name:
-                    s += " !"
+            s = f"relationship: {typedef.preferred_curie} {reference.preferred_curie}"
+            if typedef.name or reference.name:
+                s += " !"
                 if typedef.name:
                     s += f" {typedef.name}"
                 if reference.name:
                     s += f" {reference.name}"
-                yield s
+            yield s
 
     def _emit_properties(self, typedefs: dict[ReferenceTuple, TypeDef]) -> Iterable[str]:
         for prop, value in sorted(self.iterate_properties(), key=_sort_properties):

diff --git a/tests/test_get.py b/tests/test_get.py
@@ -191,9 +191,9 @@ def test_get_node_properties(self):
         data = self.graph.nodes["CHEBI:51990"]
         properties = list(iterate_node_properties(data))
         t_prop = "http://purl.obolibrary.org/obo/chebi/monoisotopicmass"
-        self.assertIn(t_prop, {prop for prop, value in properties})
-        self.assertEqual(1, sum(prop == t_prop for prop, value in properties))
-        value = next(value for prop, value in properties if prop == t_prop)
+        self.assertIn(t_prop, {prop for prop, value, _ in properties})
+        self.assertEqual(1, sum(prop == t_prop for prop, value, _ in properties))
+        value = next(value for prop, value, _ in properties if prop == t_prop)
         self.assertEqual("261.28318", value)
 
     def test_get_node_parents(self):