From ae40806b0fc2271525a57b0cdf050c27b73ba508 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Mon, 25 Nov 2024 12:19:35 +0100 Subject: [PATCH] Simplify construction of synonyms in PyOBO sources (#247) --- src/pyobo/obographs.py | 3 +-- src/pyobo/sources/dictybase_gene.py | 4 ++-- src/pyobo/sources/hgnc.py | 9 ++++----- src/pyobo/sources/hgncgenefamily.py | 3 +-- src/pyobo/sources/mgi.py | 3 +-- src/pyobo/sources/npass.py | 4 ++-- src/pyobo/sources/rgd.py | 5 ++--- 7 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/pyobo/obographs.py b/src/pyobo/obographs.py index c3c21ddf..ccd6b108 100644 --- a/src/pyobo/obographs.py +++ b/src/pyobo/obographs.py @@ -91,12 +91,11 @@ def _get_class_node(term: Term) -> Node: ) for xref, xref_type in zip(term.xrefs, term.xref_types, strict=False) ] - default_st = Reference(prefix="oboInOwl", identifier="SynonymType") synonyms = [ Synonym.from_parsed( name=synonym.name, predicate=OIO_TO_REFERENCE[OBO_SYNONYM_TO_OIO[synonym.specificity]], - synonym_type=_rewire(synonym.type.reference) if synonym.type else default_st, + synonym_type=_rewire(synonym.type) if synonym.type else None, references=[_rewire(x) for x in synonym.provenance], ) for synonym in term.synonyms diff --git a/src/pyobo/sources/dictybase_gene.py b/src/pyobo/sources/dictybase_gene.py index a742a32b..b2a3c9ea 100644 --- a/src/pyobo/sources/dictybase_gene.py +++ b/src/pyobo/sources/dictybase_gene.py @@ -9,7 +9,7 @@ import pandas as pd from tqdm.auto import tqdm -from pyobo.struct import Obo, Synonym, Term, from_species, has_gene_product +from pyobo.struct import Obo, Term, from_species, has_gene_product from pyobo.utils.path import ensure_df __all__ = [ @@ -67,7 +67,7 @@ def get_terms(force: bool = False) -> Iterable[Term]: term.append_synonym(synonym.strip()) if synonyms and pd.notna(synonyms): for synonym in synonyms.split(","): - term.append_synonym(Synonym(synonym.strip())) + term.append_synonym(synonym.strip()) # for uniprot_id in uniprot_mappings.get(identifier, []): # if not uniprot_id or pd.isna(uniprot_id) or uniprot_id in {"unknown", "pseudogene"}: # continue diff --git a/src/pyobo/sources/hgnc.py b/src/pyobo/sources/hgnc.py index e7771009..6fee10cd 100644 --- a/src/pyobo/sources/hgnc.py +++ b/src/pyobo/sources/hgnc.py @@ -15,7 +15,6 @@ from pyobo.struct import ( Obo, Reference, - Synonym, SynonymTypeDef, Term, TypeDef, @@ -412,15 +411,15 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term] ) for alias_symbol in entry.pop("alias_symbol", []): - term.append_synonym(Synonym(name=alias_symbol, type=alias_symbol_type)) + term.append_synonym(alias_symbol, type=alias_symbol_type) for alias_name in entry.pop("alias_name", []): - term.append_synonym(Synonym(name=alias_name, type=alias_name_type)) + term.append_synonym(alias_name, type=alias_name_type) for previous_symbol in itt.chain( entry.pop("previous_symbol", []), entry.pop("prev_symbol", []) ): - term.append_synonym(Synonym(name=previous_symbol, type=previous_symbol_type)) + term.append_synonym(previous_symbol, type=previous_symbol_type) for previous_name in entry.pop("prev_name", []): - term.append_synonym(Synonym(name=previous_name, type=previous_name_type)) + term.append_synonym(previous_name, type=previous_name_type) for prop, td in [("location", HAS_LOCATION)]: value = entry.pop(prop, None) diff --git a/src/pyobo/sources/hgncgenefamily.py b/src/pyobo/sources/hgncgenefamily.py index 6a44fc02..7365ffdb 100644 --- a/src/pyobo/sources/hgncgenefamily.py +++ b/src/pyobo/sources/hgncgenefamily.py @@ -8,7 +8,6 @@ from ..struct import ( Obo, Reference, - Synonym, SynonymTypeDef, Term, enables, @@ -104,7 +103,7 @@ def _get_terms_helper(force: bool = False) -> Iterable[Term]: go_id = desc_go[len("http://purl.uniprot.org/go/") :] term.append_relationship(enables, Reference(prefix="GO", identifier=go_id)) if symbol and pd.notna(symbol): - term.append_synonym(Synonym(name=symbol, type=symbol_type)) + term.append_synonym(symbol, type=symbol_type) term.set_species(identifier="9606", name="Homo sapiens") yield term diff --git a/src/pyobo/sources/mgi.py b/src/pyobo/sources/mgi.py index c39c83b1..941e0fec 100644 --- a/src/pyobo/sources/mgi.py +++ b/src/pyobo/sources/mgi.py @@ -12,7 +12,6 @@ from ..struct import ( Obo, Reference, - Synonym, Term, from_species, has_gene_product, @@ -158,7 +157,7 @@ def get_terms(force: bool = False) -> Iterable[Term]: ) if identifier in mgi_to_synonyms: for synonym in mgi_to_synonyms[identifier]: - term.append_synonym(Synonym(name=synonym)) + term.append_synonym(synonym) if identifier in mgi_to_entrez_id: term.append_exact_match( Reference(prefix="ncbigene", identifier=mgi_to_entrez_id[identifier]) diff --git a/src/pyobo/sources/npass.py b/src/pyobo/sources/npass.py index 3fd21d51..085f5cc4 100644 --- a/src/pyobo/sources/npass.py +++ b/src/pyobo/sources/npass.py @@ -6,7 +6,7 @@ import pandas as pd from tqdm.auto import tqdm -from ..struct import Obo, Reference, Synonym, Term +from ..struct import Obo, Reference, Term from ..utils.path import ensure_df __all__ = [ @@ -82,7 +82,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]: for synonym in [iupac]: if pd.notna(synonym): - term.append_synonym(Synonym(name=synonym)) + term.append_synonym(synonym) yield term diff --git a/src/pyobo/sources/rgd.py b/src/pyobo/sources/rgd.py index 99ebafab..75a4595c 100644 --- a/src/pyobo/sources/rgd.py +++ b/src/pyobo/sources/rgd.py @@ -9,7 +9,6 @@ from pyobo.struct import ( Obo, Reference, - Synonym, SynonymTypeDef, Term, default_reference, @@ -124,11 +123,11 @@ def get_terms(force: bool = False, version: str | None = None) -> Iterable[Term] old_names = row["OLD_NAME"] if old_names and pd.notna(old_names): for old_name in old_names.split(";"): - term.append_synonym(Synonym(name=old_name, type=old_name_type)) + term.append_synonym(old_name, type=old_name_type) old_symbols = row["OLD_SYMBOL"] if old_symbols and pd.notna(old_symbols): for old_symbol in old_symbols.split(";"): - term.append_synonym(Synonym(name=old_symbol, type=old_symbol_type)) + term.append_synonym(old_symbol, type=old_symbol_type) for prefix, key in namespace_to_column: xref_ids = str(row[key]) if xref_ids and pd.notna(xref_ids):