Skip to content

Commit

Permalink
Add GARD as a source (#262)
Browse files Browse the repository at this point in the history
Closes #260
  • Loading branch information
cthoyt authored Dec 3, 2024
1 parent 37bb8d4 commit a84c6d4
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/pyobo/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .expasy import ExpasyGetter
from .famplex import FamPlexGetter
from .flybase import FlyBaseGetter
from .gard import GARDGetter
from .geonames import GeonamesGetter
from .gtdb import GTDBGetter
from .gwascentral_phenotype import GWASCentralPhenotypeGetter
Expand Down Expand Up @@ -79,6 +80,7 @@
"ExpasyGetter",
"FamPlexGetter",
"FlyBaseGetter",
"GARDGetter",
"GTDBGetter",
"GWASCentralPhenotypeGetter",
"GWASCentralStudyGetter",
Expand Down
60 changes: 60 additions & 0 deletions src/pyobo/sources/gard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Converter for GARD."""

from collections.abc import Iterable

import requests

from pyobo.struct import Obo, Term, default_reference

__all__ = [
"GARDGetter",
]

PREFIX = "gard"
PP = "gard.category"
URL = "https://rarediseases.info.nih.gov/assets/diseases.trimmed.json"


class GARDGetter(Obo):
"""An ontology representation of GARD."""

bioversions_key = ontology = PREFIX
dynamic_version = True

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over gene terms for GARD."""
yield from get_terms()


def get_terms() -> Iterable[Term]:
"""Get GARD terms."""
rows = requests.get(URL, timeout=5).json()
categories = {
category: default_reference(
prefix=PREFIX, identifier=category.lower().replace(" ", "_"), name=category
)
for row in rows
for category in row.get("diseaseCategories", [])
}
categories["uncategorized"] = default_reference(
prefix=PREFIX, identifier="uncategorized", name="Uncategorized Disease"
)
for category_reference in categories.values():
yield Term(reference=category_reference)

for row in rows:
term = Term.from_triple(PREFIX, identifier=str(row.pop("id")), name=row.pop("name"))
_name = row.pop("encodedName", None)
for synonym in row.pop("synonyms", []):
term.append_synonym(synonym)
for category in row.pop("diseaseCategories", ["uncategorized"]):
term.append_parent(categories[category])

_spanish_id = row.pop("spanishId", None)
_spanish_name = row.pop("spanishName", None)

yield term


if __name__ == "__main__":
GARDGetter().write_default(write_obo=True, write_owl=True, force=True)

0 comments on commit a84c6d4

Please sign in to comment.