Skip to content

Commit

Permalink
Add explicit typedefs for GeoNames and ICD10 (#240)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Nov 23, 2024
1 parent df343f4 commit fc8f639
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 8 deletions.
11 changes: 6 additions & 5 deletions src/pyobo/sources/geonames.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pystow.utils import read_zipfile_csv
from tqdm import tqdm

from pyobo import Obo, Term
from pyobo import Obo, Term, TypeDef
from pyobo.struct import Reference, part_of
from pyobo.utils.path import ensure_df, ensure_path

Expand All @@ -22,14 +22,15 @@
ADMIN1_URL = "https://download.geonames.org/export/dump/admin1CodesASCII.txt"
ADMIN2_URL = "https://download.geonames.org/export/dump/admin2Codes.txt"
CITIES_URL = "https://download.geonames.org/export/dump/cities15000.zip"
CODE_TYPEDEF = TypeDef.default(PREFIX, "code")


class GeonamesGetter(Obo):
"""An ontology representation of GeoNames."""

ontology = PREFIX
dynamic_version = True
typedefs = [part_of]
typedefs = [part_of, CODE_TYPEDEF]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
Expand Down Expand Up @@ -80,7 +81,7 @@ def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
term.append_synonym(fips)
if pd.notna(iso3):
term.append_synonym(iso3)
term.annotate_literal("code", code)
term.annotate_literal(CODE_TYPEDEF, code)
code_to_country[code] = term
logger.info(f"got {len(code_to_country):,} country records")
return code_to_country
Expand All @@ -107,7 +108,7 @@ def get_code_to_admin1(
term = Term.from_triple(
"geonames", identifier, name if pd.notna(name) else None, type="Instance"
)
term.annotate_literal("code", code)
term.annotate_literal(CODE_TYPEDEF, code)
code_to_admin1[code] = term

country_code = code.split(".")[0]
Expand Down Expand Up @@ -135,7 +136,7 @@ def get_code_to_admin2(
term = Term.from_triple(
"geonames", identifier, name if pd.notna(name) else None, type="Instance"
)
term.annotate_literal("code", code)
term.annotate_literal(CODE_TYPEDEF, code)
code_to_admin2[code] = term
admin1_code = code.rsplit(".", 1)[0]
admin1_term = code_to_admin1.get(admin1_code)
Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/sources/icd10.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
get_icd,
visiter,
)
from ..struct import Obo, Reference, Synonym, Term
from ..struct import Obo, Reference, Synonym, Term, has_category
from ..utils.path import prefix_directory_join

__all__ = [
Expand All @@ -35,6 +35,7 @@ class ICD10Getter(Obo):

ontology = PREFIX
dynamic_version = True
typedefs = [has_category]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
Expand Down Expand Up @@ -81,8 +82,7 @@ def _extract_icd10(res_json: Mapping[str, Any]) -> Term:
synonyms=synonyms,
parents=parents,
)

rv.annotate_literal("class_kind", res_json["classKind"])
rv.annotate_literal(has_category, res_json["classKind"])

return rv

Expand Down
2 changes: 2 additions & 0 deletions src/pyobo/struct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
enables,
from_species,
gene_product_member_of,
has_category,
has_gene_product,
has_member,
has_part,
Expand Down Expand Up @@ -47,6 +48,7 @@
"enables",
"from_species",
"gene_product_member_of",
"has_category",
"has_gene_product",
"has_member",
"has_part",
Expand Down

0 comments on commit fc8f639

Please sign in to comment.