Skip to content

Commit

Permalink
Fix reading from compressed OBOs (#256)
Browse files Browse the repository at this point in the history
This specifically makes it possible to get classyfire
  • Loading branch information
cthoyt authored Dec 2, 2024
1 parent 90decf4 commit f32868c
Showing 1 changed file with 36 additions and 13 deletions.
49 changes: 36 additions & 13 deletions src/pyobo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,26 @@ def from_obo_path(
version: str | None,
) -> Obo:
"""Get the OBO graph from a path."""
import obonet

logger.info("[%s] parsing with obonet from %s", prefix or "", path)
with open(path) as file:
graph = obonet.read_obo(
tqdm(
file,
unit_scale=True,
desc=f'[{prefix or ""}] parsing obo',
disable=None,
leave=False,
)
)
path = Path(path).expanduser().resolve()
if path.suffix.endswith(".gz"):
import gzip

logger.info("[%s] parsing gzipped OBO with obonet from %s", prefix or "<unknown>", path)
with gzip.open(path, "rt") as file:
graph = _read_obo(file, prefix)
elif path.suffix.endswith(".zip"):
import io
import zipfile

logger.info("[%s] parsing zipped OBO with obonet from %s", prefix or "<unknown>", path)
with zipfile.ZipFile(path) as zf:
with zf.open(path.name.removesuffix(".zip"), "r") as file:
content = file.read().decode("utf-8")
graph = _read_obo(io.StringIO(content), prefix)
else:
logger.info("[%s] parsing OBO with obonet from %s", prefix or "<unknown>", path)
with open(path) as file:
graph = _read_obo(file, prefix)

if prefix:
# Make sure the graph is named properly
Expand All @@ -74,6 +81,22 @@ def from_obo_path(
return from_obonet(graph, strict=strict, version=version)


def _read_obo(filelike, prefix: str | None) -> nx.MultiDiGraph:
import obonet

return obonet.read_obo(
tqdm(
filelike,
unit_scale=True,
desc=f'[{prefix or ""}] parsing OBO',
disable=None,
leave=True,
),
# TODO this is the default, turn it off and see what happens
ignore_obsolete=True,
)


def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True, version: str | None = None) -> Obo:
"""Get all of the terms from a OBO graph."""
ontology_prefix_raw = graph.graph["ontology"]
Expand Down

0 comments on commit f32868c

Please sign in to comment.