Skip to content

Commit

Permalink
Improve ROR synonym generation
Browse files Browse the repository at this point in the history
1. Get labels (which is a different field)
2. Remove "the" from the beginning of terms
  • Loading branch information
cthoyt committed Nov 22, 2023
1 parent ce8af02 commit c7585aa
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion src/pyobo/sources/ror.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import bioregistry
import zenodo_client
from tqdm import tqdm
from tqdm.auto import tqdm

from pyobo.struct import Obo, Reference, SynonymTypeDef, Term, TypeDef

Expand Down Expand Up @@ -80,6 +80,9 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
)
term.append_parent(ORG_CLASS)

if name.startswith("The "):
term.append_synonym(name.removeprefix("The "))

for relationship in record.get("relationships", []):
target_id = relationship["id"].removeprefix("https://ror.org/")
term.append_relationship(
Expand All @@ -96,8 +99,16 @@ def iterate_ror_terms(*, force: bool = False) -> Iterable[Term]:
RMAP["Located in"], Reference(prefix="geonames", identifier=str(city["id"]))
)

for label in record.get("labels", []):
label = label["label"] # there's a language availabel in this dict too
term.append_synonym(label)
if label.startswith("The "):
term.append_synonym(label.removeprefix("The "))

for synonym in record.get("aliases", []):
term.append_synonym(synonym)
if synonym.startswith("The "):
term.append_synonym(synonym.removeprefix("The "))

for acronym in record.get("acronyms", []):
term.append_synonym(acronym, type=ACRONYM)
Expand Down

0 comments on commit c7585aa

Please sign in to comment.