Skip to content

Commit

Permalink
Merge branch 'main' into consistent-caching
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Dec 4, 2024
2 parents 2257327 + 7d8b1ca commit 37299b8
Show file tree
Hide file tree
Showing 16 changed files with 1,383 additions and 695 deletions.
18 changes: 6 additions & 12 deletions src/pyobo/getters.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
)
from .identifier_utils import ParseError, wrap_norm_prefix
from .plugins import has_nomenclature_plugin, run_nomenclature_plugin
from .reader import from_obo_path
from .struct import Obo
from .utils.io import get_writer
from .utils.path import ensure_path, prefix_directory_join
Expand Down Expand Up @@ -69,6 +70,7 @@ def get_ontology(
strict: bool = True,
version: str | None = None,
robot_check: bool = True,
upgrade: bool = True,
) -> Obo:
"""Get the OBO for a given graph.
Expand All @@ -80,6 +82,9 @@ def get_ontology(
:param robot_check:
If set to false, will send the ``--check=false`` command to ROBOT to disregard
malformed ontology components. Necessary to load some ontologies like VO.
:param upgrade:
If set to true, will automatically upgrade relationships, such as
``obo:chebi#part_of`` to ``BFO:0000051``
:returns: An OBO object
:raises OnlyOWLError: If the OBO foundry only has an OWL document for this resource.
Expand Down Expand Up @@ -131,18 +136,7 @@ def get_ontology(
else:
raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}")

from .reader import from_obo_path

obo = from_obo_path(path, prefix=prefix, strict=strict)
if version is not None:
if obo.data_version is None:
logger.warning("[%s] did not have a version, overriding with %s", obo.ontology, version)
obo.data_version = version
elif obo.data_version != version:
logger.warning(
"[%s] had version %s, overriding with %s", obo.ontology, obo.data_version, version
)
obo.data_version = version
obo = from_obo_path(path, prefix=prefix, strict=strict, version=version, upgrade=upgrade)
obo.write_default(force=force_process)
return obo

Expand Down
23 changes: 13 additions & 10 deletions src/pyobo/identifier_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from functools import wraps
from typing import ClassVar

import bioontologies.upgrade
import bioregistry
from curies import Reference, ReferenceTuple

Expand Down Expand Up @@ -74,7 +75,8 @@ def normalize_curie(
strict: bool = True,
ontology_prefix: str | None = None,
node: Reference | None = None,
) -> tuple[str, str] | tuple[None, None]:
upgrade: bool = True,
) -> ReferenceTuple | tuple[None, None]:
"""Parse a string that looks like a CURIE.
:param curie: A compact uniform resource identifier (CURIE)
Expand All @@ -85,11 +87,12 @@ def normalize_curie(
- Normalizes the namespace
- Checks against a blacklist for the entire curie, for the namespace, and for suffixes.
"""
# Remap the curie with the full list
curie = remap_full(curie)
if upgrade:
# Remap the curie with the full list
curie = remap_full(curie)

# Remap node's prefix (if necessary)
curie = remap_prefix(curie, ontology_prefix=ontology_prefix)
# Remap node's prefix (if necessary)
curie = remap_prefix(curie, ontology_prefix=ontology_prefix)

if curie_is_blacklisted(curie):
return None, None
Expand All @@ -98,6 +101,9 @@ def normalize_curie(
if curie_has_blacklisted_suffix(curie):
return None, None

if upgrade and (reference_t := bioontologies.upgrade.upgrade(curie)):
return reference_t

if curie.startswith("http:") or curie.startswith("https:"):
if reference := parse_iri(curie):
return reference.pair
Expand All @@ -114,13 +120,10 @@ def normalize_curie(
logger.debug(f"could not split CURIE on colon: {curie}")
return None, None

# remove redundant prefix
if identifier.casefold().startswith(f"{prefix.casefold()}:"):
identifier = identifier[len(prefix) + 1 :]

norm_node_prefix = bioregistry.normalize_prefix(prefix)
if norm_node_prefix:
return norm_node_prefix, identifier
identifier = bioregistry.standardize_identifier(norm_node_prefix, identifier)
return ReferenceTuple(norm_node_prefix, identifier)
elif strict:
raise MissingPrefixError(curie=curie, ontology_prefix=ontology_prefix, node=node)
else:
Expand Down
11 changes: 4 additions & 7 deletions src/pyobo/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,17 @@ def _iterate_synonyms_to_identifiers(
id_to_synonyms: Mapping[str, Iterable[str]],
remove_prefix: str | None = None,
) -> Iterable[tuple[str, str]]:
if remove_prefix is not None:
remove_prefix = f'{remove_prefix.lower().rstrip(":")}:'

# Add name
for identifier, name in id_to_name.items():
if remove_prefix and identifier.lower().startswith(remove_prefix):
identifier = identifier[len(remove_prefix) :]
if remove_prefix:
identifier = bioregistry.standardize_identifier(remove_prefix, identifier)

yield name, identifier

# Add synonyms
for identifier, synonyms in id_to_synonyms.items():
if remove_prefix and identifier.lower().startswith(remove_prefix):
identifier = identifier[len(remove_prefix) :]
if remove_prefix:
identifier = bioregistry.standardize_identifier(remove_prefix, identifier)

for synonym in synonyms:
# it might overwrite but this is probably always due to alternate ids
Expand Down
Loading

0 comments on commit 37299b8

Please sign in to comment.