-
-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve resolving of URLs via banana curation
- Loading branch information
Showing
7 changed files
with
213 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
"""Resolvers for CURIE (e.g., pairs of prefix and identifier).""" | ||
|
||
from typing import Mapping, Optional | ||
|
||
from .resolve import ( | ||
get_banana, get_format, get_identifiers_org_prefix, get_obofoundry_prefix, get_ols_prefix, get_pattern_re, | ||
namespace_in_lui, | ||
) | ||
|
||
__all__ = [ | ||
'validate', | ||
'get_providers', | ||
'get_identifiers_org_url', | ||
'get_identifiers_org_curie', | ||
'get_obofoundry_link', | ||
'get_ols_link', | ||
] | ||
|
||
|
||
def validate(prefix: str, identifier: str) -> Optional[bool]: | ||
"""Validate the identifier against the prefix's pattern, if it exists.""" | ||
pattern = get_pattern_re(prefix) | ||
if pattern is None: | ||
return None | ||
|
||
if namespace_in_lui(prefix) and not identifier.startswith(f'{prefix.upper()}:'): | ||
# Some cases do not use uppercase | ||
identifier = f'{prefix.upper()}:{identifier}' | ||
|
||
return bool(pattern.match(identifier)) | ||
|
||
|
||
def get_providers(prefix: str, identifier: str) -> Mapping[str, str]: | ||
"""Get all providers for the CURIE.""" | ||
providers = {} | ||
bioregistry_format = get_format(prefix) | ||
if bioregistry_format: | ||
providers['bioregistry'] = bioregistry_format.replace('$1', identifier) | ||
for provider, get_url in [ | ||
('miriam', get_identifiers_org_url), | ||
('obofoundry', get_obofoundry_link), | ||
('ols', get_ols_link), | ||
]: | ||
link = get_url(prefix, identifier) | ||
if link: | ||
providers[provider] = link | ||
return providers | ||
|
||
|
||
def get_identifiers_org_url(prefix: str, identifier: str) -> Optional[str]: | ||
"""Get the identifiers.org URL for the given CURIE.""" | ||
curie = get_identifiers_org_curie(prefix, identifier) | ||
if curie is None: | ||
return None | ||
return f'https://identifiers.org/{curie}' | ||
|
||
|
||
def get_identifiers_org_curie(prefix: str, identifier: str) -> Optional[str]: | ||
"""Get the identifiers.org CURIE for the given CURIE.""" | ||
miriam_prefix = get_identifiers_org_prefix(prefix) | ||
if miriam_prefix is None: | ||
return None | ||
if not namespace_in_lui(prefix): | ||
return f'{prefix}:{identifier}' | ||
banana = get_banana(prefix) | ||
if banana: | ||
if identifier.startswith(f'{banana}:'): | ||
return identifier | ||
else: | ||
return f'{banana}:{identifier}' | ||
else: | ||
if identifier.startswith(prefix.upper()): | ||
return identifier | ||
else: | ||
return f'{prefix.upper()}:{identifier}' | ||
|
||
|
||
def get_obofoundry_link(prefix: str, identifier: str) -> Optional[str]: | ||
"""Get the OBO Foundry URL if possible.""" | ||
obo_prefix = get_obofoundry_prefix(prefix) | ||
if obo_prefix is None: | ||
return None | ||
return f'http://purl.obolibrary.org/obo/{obo_prefix.upper()}_{identifier}' | ||
|
||
|
||
def get_ols_link(prefix: str, identifier: str) -> Optional[str]: | ||
"""Get the OLS URL if possible.""" | ||
ols_prefix = get_ols_prefix(prefix) | ||
obo_link = get_obofoundry_link(prefix, identifier) | ||
if ols_prefix is None or obo_link is None: | ||
return None | ||
return f'https://www.ebi.ac.uk/ols/ontologies/{ols_prefix}/terms?iri={obo_link}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
"""Tests for identifiers.org.""" | ||
|
||
import unittest | ||
|
||
import requests | ||
|
||
from bioregistry import get_identifiers_org_curie, get_identifiers_org_url | ||
|
||
|
||
class TestIdentifiersOrg(unittest.TestCase): | ||
"""Tests for identifiers.org.""" | ||
|
||
def test_url(self): | ||
"""Test formatting URLs.""" | ||
for prefix, identifier, expected, _reason in [ | ||
('efo', '0000400', 'efo:0000400', 'test simple concatenation'), | ||
('chebi', 'CHEBI:1234', 'CHEBI:1234', 'test redundant namespace (standard)'), | ||
('chebi', '1234', 'CHEBI:1234', 'test exclusion of redundant namespace (standard)'), | ||
( | ||
'mzspec', | ||
'PXD002255::ES_XP_Ubi_97H_HCD_349:scan:9617:LAEIYVNSSFYK/2', | ||
'mzspec:PXD002255::ES_XP_Ubi_97H_HCD_349:scan:9617:LAEIYVNSSFYK/2', | ||
'test simple concatenation with false banana', | ||
), | ||
( | ||
'mzspec', | ||
'mzspec:PXD002255::ES_XP_Ubi_97H_HCD_349:scan:9617:LAEIYVNSSFYK/2', | ||
'mzspec:PXD002255::ES_XP_Ubi_97H_HCD_349:scan:9617:LAEIYVNSSFYK/2', | ||
'test simple concatenation (redundant) with false banana', | ||
), | ||
]: | ||
with self.subTest(p=prefix, i=identifier): | ||
curie = get_identifiers_org_curie(prefix, identifier) | ||
self.assertEqual(expected, curie, msg='wrong CURIE') | ||
|
||
url = get_identifiers_org_url(prefix, identifier) | ||
self.assertEqual(f'https://identifiers.org/{curie}', url, msg='wrong URL') | ||
|
||
# Check that the URL resolves | ||
res = requests.get(url) | ||
self.assertEqual(200, res.status_code, msg=res.reason) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters