Skip to content

Commit

Permalink
Update EC, ComplexPortal, NPASS, and UniProt source
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Jan 3, 2024
1 parent b74cda0 commit dc9ad7d
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/pyobo/identifier_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def _wrapped(prefix, *args, **kwargs):

def standardize_ec(ec: str) -> str:
"""Standardize an EC code identifier by removing all trailing dashes and dots."""
ec = ec.strip()
ec = ec.strip().replace(" ", "")
for _ in range(4):
ec = ec.rstrip("-").rstrip(".")
return ec
4 changes: 4 additions & 0 deletions src/pyobo/sources/complexportal.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ def _parse_xrefs(s) -> List[Tuple[Reference, str]]:
logger.warning("xref missing (: %s", xref)
continue
note = note.rstrip(")")
if note.lower().startswith("rhea "):
note = note[len("Rhea ") :]
if note.lower().startswith("EC:"):
note = note[len("EC:") :]
try:
reference = Reference.from_curie(xref_curie)
except ValueError:
Expand Down
4 changes: 3 additions & 1 deletion src/pyobo/sources/npass.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:

# TODO check that the first is always the parent compound?
if pd.notna(pubchem_compound_ids):
pubchem_compound_ids = pubchem_compound_ids.split(";")
pubchem_compound_ids = [
yy.strip() for xx in pubchem_compound_ids.split(";") for yy in xx.strip().split(",")
]
if len(pubchem_compound_ids) > 1:
logger.debug("multiple cids for %s: %s", identifier, pubchem_compound_ids)
for pubchem_compound_id in pubchem_compound_ids:
Expand Down
10 changes: 5 additions & 5 deletions src/pyobo/sources/uniprot/uniprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,17 @@ class UniProtGetter(Obo):

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
yield from iter_terms(force=force, version=self._version_or_raise)
yield from iter_terms(version=self._version_or_raise)


def get_obo(force: bool = False) -> Obo:
"""Get UniProt as OBO."""
return UniProtGetter(force=force)


def iter_terms(version: Optional[str] = None, force: bool = False) -> Iterable[Term]:
def iter_terms(version: Optional[str] = None) -> Iterable[Term]:
"""Iterate over UniProt Terms."""
with open_reader(ensure(version=version, force=force)) as reader:
with open_reader(ensure(version=version)) as reader:
_ = next(reader) # header
for uniprot_id, name, taxonomy_id, _synonyms, ecs, pubmeds, pdbs in tqdm(
reader, desc="Mapping UniProt", unit_scale=True
Expand All @@ -63,11 +63,11 @@ def iter_terms(version: Optional[str] = None, force: bool = False) -> Iterable[T
yield term


def ensure(version: Optional[str] = None, force: bool = False) -> Path:
def ensure(version: Optional[str] = None) -> Path:
"""Ensure the reviewed uniprot names are available."""
if version is None:
version = bioversions.get_version("uniprot")
return RAW_MODULE.ensure(PREFIX, version, name="reviewed.tsv.gz", url=REVIEWED_URL, force=force)
return RAW_MODULE.ensure(PREFIX, version, name="reviewed.tsv.gz", url=REVIEWED_URL)


if __name__ == "__main__":
Expand Down

0 comments on commit dc9ad7d

Please sign in to comment.