From a84c6d476863b7320297d109db594704162586d3 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 3 Dec 2024 21:47:01 +0100 Subject: [PATCH] Add GARD as a source (#262) Closes #260 --- src/pyobo/sources/__init__.py | 2 ++ src/pyobo/sources/gard.py | 60 +++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 src/pyobo/sources/gard.py diff --git a/src/pyobo/sources/__init__.py b/src/pyobo/sources/__init__.py index 29e4cdc4..22908b40 100644 --- a/src/pyobo/sources/__init__.py +++ b/src/pyobo/sources/__init__.py @@ -20,6 +20,7 @@ from .expasy import ExpasyGetter from .famplex import FamPlexGetter from .flybase import FlyBaseGetter +from .gard import GARDGetter from .geonames import GeonamesGetter from .gtdb import GTDBGetter from .gwascentral_phenotype import GWASCentralPhenotypeGetter @@ -79,6 +80,7 @@ "ExpasyGetter", "FamPlexGetter", "FlyBaseGetter", + "GARDGetter", "GTDBGetter", "GWASCentralPhenotypeGetter", "GWASCentralStudyGetter", diff --git a/src/pyobo/sources/gard.py b/src/pyobo/sources/gard.py new file mode 100644 index 00000000..032501b0 --- /dev/null +++ b/src/pyobo/sources/gard.py @@ -0,0 +1,60 @@ +"""Converter for GARD.""" + +from collections.abc import Iterable + +import requests + +from pyobo.struct import Obo, Term, default_reference + +__all__ = [ + "GARDGetter", +] + +PREFIX = "gard" +PP = "gard.category" +URL = "https://rarediseases.info.nih.gov/assets/diseases.trimmed.json" + + +class GARDGetter(Obo): + """An ontology representation of GARD.""" + + bioversions_key = ontology = PREFIX + dynamic_version = True + + def iter_terms(self, force: bool = False) -> Iterable[Term]: + """Iterate over gene terms for GARD.""" + yield from get_terms() + + +def get_terms() -> Iterable[Term]: + """Get GARD terms.""" + rows = requests.get(URL, timeout=5).json() + categories = { + category: default_reference( + prefix=PREFIX, identifier=category.lower().replace(" ", "_"), name=category + ) + for row in rows + for category in row.get("diseaseCategories", []) + } + categories["uncategorized"] = default_reference( + prefix=PREFIX, identifier="uncategorized", name="Uncategorized Disease" + ) + for category_reference in categories.values(): + yield Term(reference=category_reference) + + for row in rows: + term = Term.from_triple(PREFIX, identifier=str(row.pop("id")), name=row.pop("name")) + _name = row.pop("encodedName", None) + for synonym in row.pop("synonyms", []): + term.append_synonym(synonym) + for category in row.pop("diseaseCategories", ["uncategorized"]): + term.append_parent(categories[category]) + + _spanish_id = row.pop("spanishId", None) + _spanish_name = row.pop("spanishName", None) + + yield term + + +if __name__ == "__main__": + GARDGetter().write_default(write_obo=True, write_owl=True, force=True)