Skip to content

Commit

Permalink
Enable outputting slim OBO (#202)
Browse files Browse the repository at this point in the history
References biopragmatics/obo-db-ingest#13

Demonstration of results are in
biopragmatics/obo-db-ingest#12

This PR enables serializing to OBO but skipping object properties, as
requested by @cmungall
  • Loading branch information
cthoyt authored Nov 5, 2024
1 parent b849b89 commit 989cd88
Showing 1 changed file with 44 additions and 8 deletions.
52 changes: 44 additions & 8 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,14 @@ def iterate_properties(self) -> Iterable[tuple[str, str]]:
for value in sorted(values):
yield prop, value

def iterate_obo_lines(self, *, ontology, typedefs) -> Iterable[str]:
def iterate_obo_lines(
self,
*,
ontology: str,
typedefs: list[TypeDef] | None = None,
emit_object_properties: bool = True,
emit_annotation_properties: bool = True,
) -> Iterable[str]:
"""Iterate over the lines to write in an OBO file."""
yield f"\n[{self.type}]"
yield f"id: {self.preferred_curie}"
Expand All @@ -423,6 +430,18 @@ def iterate_obo_lines(self, *, ontology, typedefs) -> Iterable[str]:
for parent in sorted(self.parents, key=attrgetter("prefix", "identifier")):
yield f"{parent_tag}: {parent}" # __str__ bakes in the ! name

if emit_object_properties:
yield from self._emit_relations(ontology, typedefs)

if emit_annotation_properties:
yield from self._emit_properties()

for synonym in sorted(self.synonyms, key=attrgetter("name")):
yield synonym.to_obo()

def _emit_relations(
self, ontology: str, typedefs: list[TypeDef] | None = None
) -> Iterable[str]:
for typedef, references in sorted(self.relationships.items(), key=_sort_relations):
if (not typedefs or typedef not in typedefs) and (
ontology,
Expand All @@ -442,13 +461,11 @@ def iterate_obo_lines(self, *, ontology, typedefs) -> Iterable[str]:
s += f" {reference.name}"
yield s

def _emit_properties(self) -> Iterable[str]:
for prop, value in sorted(self.iterate_properties(), key=_sort_properties):
# TODO deal with typedefs for properties
yield f'property_value: {prop} "{value}" xsd:string' # TODO deal with types later

for synonym in sorted(self.synonyms, key=attrgetter("name")):
yield synonym.to_obo()

@staticmethod
def _escape(s) -> str:
return s.replace("\n", "\\n").replace('"', '\\"')
Expand Down Expand Up @@ -659,7 +676,11 @@ def _iter_terms(self, use_tqdm: bool = False, desc: str = "terms") -> Iterable[T
else:
yield from self

def iterate_obo_lines(self) -> Iterable[str]:
def iterate_obo_lines(
self,
emit_object_properties: bool = True,
emit_annotation_properties: bool = True,
) -> Iterable[str]:
"""Iterate over the lines to write in an OBO file."""
yield f"format-version: {self.format_version}"

Expand Down Expand Up @@ -700,11 +721,26 @@ def iterate_obo_lines(self) -> Iterable[str]:
yield from typedef.iterate_obo_lines()

for term in self:
yield from term.iterate_obo_lines(ontology=self.ontology, typedefs=self.typedefs)
yield from term.iterate_obo_lines(
ontology=self.ontology,
typedefs=self.typedefs,
emit_object_properties=emit_object_properties,
emit_annotation_properties=emit_annotation_properties,
)

def write_obo(self, file: None | str | TextIO | Path = None, use_tqdm: bool = False) -> None:
def write_obo(
self,
file: None | str | TextIO | Path = None,
*,
use_tqdm: bool = False,
emit_object_properties: bool = True,
emit_annotation_properties: bool = True,
) -> None:
"""Write the OBO to a file."""
it = self.iterate_obo_lines()
it = self.iterate_obo_lines(
emit_object_properties=emit_object_properties,
emit_annotation_properties=emit_annotation_properties,
)
if use_tqdm:
it = tqdm(it, desc=f"Writing {self.ontology}", unit_scale=True, unit="line")
if isinstance(file, str | Path | os.PathLike):
Expand Down

0 comments on commit 989cd88

Please sign in to comment.