Skip to content

Commit

Permalink
Reuse Bioregistry's identifier standardization code (#258)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Dec 3, 2024
1 parent d2af10a commit da05231
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 19 deletions.
5 changes: 1 addition & 4 deletions src/pyobo/identifier_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,9 @@ def normalize_curie(
logger.debug(f"could not split CURIE on colon: {curie}")
return None, None

# remove redundant prefix
if identifier.casefold().startswith(f"{prefix.casefold()}:"):
identifier = identifier[len(prefix) + 1 :]

norm_node_prefix = bioregistry.normalize_prefix(prefix)
if norm_node_prefix:
identifier = bioregistry.standardize_identifier(norm_node_prefix, identifier)
return ReferenceTuple(norm_node_prefix, identifier)
elif strict:
raise MissingPrefixError(curie=curie, ontology_prefix=ontology_prefix, node=node)
Expand Down
11 changes: 4 additions & 7 deletions src/pyobo/normalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,17 @@ def _iterate_synonyms_to_identifiers(
id_to_synonyms: Mapping[str, Iterable[str]],
remove_prefix: str | None = None,
) -> Iterable[tuple[str, str]]:
if remove_prefix is not None:
remove_prefix = f'{remove_prefix.lower().rstrip(":")}:'

# Add name
for identifier, name in id_to_name.items():
if remove_prefix and identifier.lower().startswith(remove_prefix):
identifier = identifier[len(remove_prefix) :]
if remove_prefix:
identifier = bioregistry.standardize_identifier(remove_prefix, identifier)

yield name, identifier

# Add synonyms
for identifier, synonyms in id_to_synonyms.items():
if remove_prefix and identifier.lower().startswith(remove_prefix):
identifier = identifier[len(remove_prefix) :]
if remove_prefix:
identifier = bioregistry.standardize_identifier(remove_prefix, identifier)

for synonym in synonyms:
# it might overwrite but this is probably always due to alternate ids
Expand Down
10 changes: 2 additions & 8 deletions src/pyobo/struct/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,6 @@ def from_curie_or_uri(
if prefix is None or identifier is None:
return None

identifier = bioregistry.standardize_identifier(prefix, identifier)

if name is None and auto:
from ..api import get_name

Expand All @@ -108,12 +106,8 @@ def from_curie_or_uri(
def _escaped_identifier(self):
return obo_escape(self.identifier)

def __str__(self):
identifier_lower = self.identifier.lower()
if identifier_lower.startswith(f"{self.prefix.lower()}:"):
rv = identifier_lower
else:
rv = f"{self.preferred_prefix}:{self._escaped_identifier}"
def __str__(self) -> str:
rv = f"{self.preferred_prefix}:{self._escaped_identifier}"
if self.name:
rv = f"{rv} ! {self.name}"
return rv
Expand Down

0 comments on commit da05231

Please sign in to comment.